ReactOS 0.4.16-dev-92-g0c2cdca
parser.c
Go to the documentation of this file.
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
38#define IN_LIBXML
39#include "libxml.h"
40
41#if defined(_WIN32)
42#define XML_DIR_SEP '\\'
43#else
44#define XML_DIR_SEP '/'
45#endif
46
47#include <stdlib.h>
48#include <limits.h>
49#include <string.h>
50#include <stdarg.h>
51#include <stddef.h>
52#include <ctype.h>
53#include <stdlib.h>
54#include <libxml/xmlmemory.h>
55#include <libxml/threads.h>
56#include <libxml/globals.h>
57#include <libxml/tree.h>
58#include <libxml/parser.h>
60#include <libxml/valid.h>
61#include <libxml/entities.h>
62#include <libxml/xmlerror.h>
63#include <libxml/encoding.h>
64#include <libxml/xmlIO.h>
65#include <libxml/uri.h>
66#ifdef LIBXML_CATALOG_ENABLED
67#include <libxml/catalog.h>
68#endif
69#ifdef LIBXML_SCHEMAS_ENABLED
71#include <libxml/relaxng.h>
72#endif
73
74#include "buf.h"
75#include "enc.h"
76
79 const xmlChar *URI;
80 int line;
81 int nsNr;
82};
83
84static void
86
89 const xmlChar *base, xmlParserCtxtPtr pctx);
90
91static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92
93static int
95
96static void
98
99/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_MAX_HUGE_LENGTH 1000000000
106
107#define XML_PARSER_BIG_ENTITY 1000
108#define XML_PARSER_LOT_ENTITY 5000
109
110/*
111 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112 * replacement over the size in byte of the input indicates that you have
113 * and exponential behaviour. A value of 10 correspond to at least 3 entity
114 * replacement per byte of input.
115 */
116#define XML_PARSER_NON_LINEAR 10
117
118/*
119 * xmlParserEntityCheck
120 *
121 * Function to check non-linear entity expansion behaviour
122 * This is here to detect and stop exponential linear entity expansion
123 * This is not a limitation of the parser but a safety
124 * boundary feature. It can be disabled with the XML_PARSE_HUGE
125 * parser option.
126 */
127static int
129 xmlEntityPtr ent, size_t replacement)
130{
131 size_t consumed = 0;
132 int i;
133
134 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135 return (0);
137 return (1);
138
139 /*
140 * This may look absurd but is needed to detect
141 * entities problems
142 */
143 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144 (ent->content != NULL) && (ent->checked == 0) &&
145 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146 unsigned long oldnbent = ctxt->nbentities, diff;
147 xmlChar *rep;
148
149 ent->checked = 1;
150
151 ++ctxt->depth;
152 rep = xmlStringDecodeEntities(ctxt, ent->content,
153 XML_SUBSTITUTE_REF, 0, 0, 0);
154 --ctxt->depth;
155 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156 ent->content[0] = 0;
157 }
158
159 diff = ctxt->nbentities - oldnbent + 1;
160 if (diff > INT_MAX / 2)
161 diff = INT_MAX / 2;
162 ent->checked = diff * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
170
171 /*
172 * Prevent entity exponential check, not just replacement while
173 * parsing the DTD
174 * The check is potentially costly so do that only once in a thousand
175 */
176 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177 (ctxt->nbentities % 1024 == 0)) {
178 for (i = 0;i < ctxt->inputNr;i++) {
179 consumed += ctxt->inputTab[i]->consumed +
180 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
181 }
184 ctxt->instate = XML_PARSER_EOF;
185 return (1);
186 }
187 consumed = 0;
188 }
189
190
191
192 if (replacement != 0) {
193 if (replacement < XML_MAX_TEXT_LENGTH)
194 return(0);
195
196 /*
197 * If the volume of entity copy reaches 10 times the
198 * amount of parsed data and over the large text threshold
199 * then that's very likely to be an abuse.
200 */
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
204 }
205 consumed += ctxt->sizeentities;
206
207 if (replacement < XML_PARSER_NON_LINEAR * consumed)
208 return(0);
209 } else if (size != 0) {
210 /*
211 * Do the check based on the replacement size of the entity
212 */
214 return(0);
215
216 /*
217 * A limit on the amount of text data reasonably used
218 */
219 if (ctxt->input != NULL) {
220 consumed = ctxt->input->consumed +
221 (ctxt->input->cur - ctxt->input->base);
222 }
223 consumed += ctxt->sizeentities;
224
227 return (0);
228 } else if (ent != NULL) {
229 /*
230 * use the number of parsed entities in the replacement
231 */
232 size = ent->checked / 2;
233
234 /*
235 * The amount of data parsed counting entities size only once
236 */
237 if (ctxt->input != NULL) {
238 consumed = ctxt->input->consumed +
239 (ctxt->input->cur - ctxt->input->base);
240 }
241 consumed += ctxt->sizeentities;
242
243 /*
244 * Check the density of entities for the amount of data
245 * knowing an entity reference will take at least 3 bytes
246 */
248 return (0);
249 } else {
250 /*
251 * strange we got no data for checking
252 */
253 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
255 (ctxt->nbentities <= 10000))
256 return (0);
257 }
259 return (1);
260}
261
270unsigned int xmlParserMaxDepth = 256;
271
272
273
274#define SAX2 1
275#define XML_PARSER_BIG_BUFFER_SIZE 300
276#define XML_PARSER_BUFFER_SIZE 100
277#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278
288#define XML_PARSER_CHUNK_SIZE 100
289
290/*
291 * List of XML prefixed PI allowed by W3C specs
292 */
293
294static const char* const xmlW3CPIs[] = {
295 "xml-stylesheet",
296 "xml-model",
297 NULL
298};
299
300
301/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
303 const xmlChar **str);
304
305static xmlParserErrors
308 void *user_data, int depth, const xmlChar *URL,
309 const xmlChar *ID, xmlNodePtr *list);
310
311static int
313 const char *encoding);
314#ifdef LIBXML_LEGACY_ENABLED
315static void
316xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317 xmlNodePtr lastNode);
318#endif /* LIBXML_LEGACY_ENABLED */
319
320static xmlParserErrors
322 const xmlChar *string, void *user_data, xmlNodePtr *lst);
323
324static int
326
327/************************************************************************
328 * *
329 * Some factorized error routines *
330 * *
331 ************************************************************************/
332
341static void
343 const xmlChar * localname)
344{
345 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346 (ctxt->instate == XML_PARSER_EOF))
347 return;
348 if (ctxt != NULL)
350
351 if (prefix == NULL)
352 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
354 (const char *) localname, NULL, NULL, 0, 0,
355 "Attribute %s redefined\n", localname);
356 else
357 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
359 (const char *) prefix, (const char *) localname,
360 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361 localname);
362 if (ctxt != NULL) {
363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366 }
367}
368
377static void
379{
380 const char *errmsg;
381
382 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383 (ctxt->instate == XML_PARSER_EOF))
384 return;
385 switch (error) {
387 errmsg = "CharRef: invalid hexadecimal value";
388 break;
390 errmsg = "CharRef: invalid decimal value";
391 break;
393 errmsg = "CharRef: invalid value";
394 break;
396 errmsg = "internal error";
397 break;
399 errmsg = "PEReference at end of document";
400 break;
402 errmsg = "PEReference in prolog";
403 break;
405 errmsg = "PEReference in epilog";
406 break;
408 errmsg = "PEReference: no name";
409 break;
411 errmsg = "PEReference: expecting ';'";
412 break;
414 errmsg = "Detected an entity reference loop";
415 break;
417 errmsg = "EntityValue: \" or ' expected";
418 break;
420 errmsg = "PEReferences forbidden in internal subset";
421 break;
423 errmsg = "EntityValue: \" or ' expected";
424 break;
426 errmsg = "AttValue: \" or ' expected";
427 break;
429 errmsg = "Unescaped '<' not allowed in attributes values";
430 break;
432 errmsg = "SystemLiteral \" or ' expected";
433 break;
435 errmsg = "Unfinished System or Public ID \" or ' expected";
436 break;
438 errmsg = "Sequence ']]>' not allowed in content";
439 break;
441 errmsg = "SYSTEM or PUBLIC, the URI is missing";
442 break;
444 errmsg = "PUBLIC, the Public Identifier is missing";
445 break;
447 errmsg = "Comment must not contain '--' (double-hyphen)";
448 break;
450 errmsg = "xmlParsePI : no target name";
451 break;
453 errmsg = "Invalid PI name";
454 break;
456 errmsg = "NOTATION: Name expected here";
457 break;
459 errmsg = "'>' required to close NOTATION declaration";
460 break;
462 errmsg = "Entity value required";
463 break;
465 errmsg = "Fragment not allowed";
466 break;
468 errmsg = "'(' required to start ATTLIST enumeration";
469 break;
471 errmsg = "NmToken expected in ATTLIST enumeration";
472 break;
474 errmsg = "')' required to finish ATTLIST enumeration";
475 break;
477 errmsg = "MixedContentDecl : '|' or ')*' expected";
478 break;
480 errmsg = "MixedContentDecl : '#PCDATA' expected";
481 break;
483 errmsg = "ContentDecl : Name or '(' expected";
484 break;
486 errmsg = "ContentDecl : ',' '|' or ')' expected";
487 break;
489 errmsg =
490 "PEReference: forbidden within markup decl in internal subset";
491 break;
493 errmsg = "expected '>'";
494 break;
496 errmsg = "XML conditional section '[' expected";
497 break;
499 errmsg = "Content error in the external subset";
500 break;
502 errmsg =
503 "conditional section INCLUDE or IGNORE keyword expected";
504 break;
506 errmsg = "XML conditional section not closed";
507 break;
509 errmsg = "Text declaration '<?xml' required";
510 break;
512 errmsg = "parsing XML declaration: '?>' expected";
513 break;
515 errmsg = "external parsed entities cannot be standalone";
516 break;
518 errmsg = "EntityRef: expecting ';'";
519 break;
521 errmsg = "DOCTYPE improperly terminated";
522 break;
524 errmsg = "EndTag: '</' not found";
525 break;
527 errmsg = "expected '='";
528 break;
530 errmsg = "String not closed expecting \" or '";
531 break;
533 errmsg = "String not started expecting ' or \"";
534 break;
536 errmsg = "Invalid XML encoding name";
537 break;
539 errmsg = "standalone accepts only 'yes' or 'no'";
540 break;
542 errmsg = "Document is empty";
543 break;
545 errmsg = "Extra content at the end of the document";
546 break;
548 errmsg = "chunk is not well balanced";
549 break;
551 errmsg = "extra content at the end of well balanced chunk";
552 break;
554 errmsg = "Malformed declaration expecting version";
555 break;
557 errmsg = "Name too long";
558 break;
559#if 0
560 case:
561 errmsg = "";
562 break;
563#endif
564 default:
565 errmsg = "Unregistered error message";
566 }
567 if (ctxt != NULL)
568 ctxt->errNo = error;
569 if (info == NULL) {
570 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572 errmsg);
573 } else {
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576 errmsg, info);
577 }
578 if (ctxt != NULL) {
579 ctxt->wellFormed = 0;
580 if (ctxt->recovery == 0)
581 ctxt->disableSAX = 1;
582 }
583}
584
593static void LIBXML_ATTR_FORMAT(3,0)
594xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg)
596{
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604 if (ctxt != NULL) {
605 ctxt->wellFormed = 0;
606 if (ctxt->recovery == 0)
607 ctxt->disableSAX = 1;
608 }
609}
610
621static void LIBXML_ATTR_FORMAT(3,0)
622xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623 const char *msg, const xmlChar *str1, const xmlChar *str2)
624{
625 xmlStructuredErrorFunc schannel = NULL;
626
627 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628 (ctxt->instate == XML_PARSER_EOF))
629 return;
630 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631 (ctxt->sax->initialized == XML_SAX2_MAGIC))
632 schannel = ctxt->sax->serror;
633 if (ctxt != NULL) {
634 __xmlRaiseError(schannel,
635 (ctxt->sax) ? ctxt->sax->warning : NULL,
636 ctxt->userData,
637 ctxt, NULL, XML_FROM_PARSER, error,
639 (const char *) str1, (const char *) str2, NULL, 0, 0,
640 msg, (const char *) str1, (const char *) str2);
641 } else {
642 __xmlRaiseError(schannel, NULL, NULL,
643 ctxt, NULL, XML_FROM_PARSER, error,
645 (const char *) str1, (const char *) str2, NULL, 0, 0,
646 msg, (const char *) str1, (const char *) str2);
647 }
648}
649
659static void LIBXML_ATTR_FORMAT(3,0)
660xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661 const char *msg, const xmlChar *str1, const xmlChar *str2)
662{
663 xmlStructuredErrorFunc schannel = NULL;
664
665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666 (ctxt->instate == XML_PARSER_EOF))
667 return;
668 if (ctxt != NULL) {
669 ctxt->errNo = error;
670 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671 schannel = ctxt->sax->serror;
672 }
673 if (ctxt != NULL) {
674 __xmlRaiseError(schannel,
675 ctxt->vctxt.error, ctxt->vctxt.userData,
676 ctxt, NULL, XML_FROM_DTD, error,
677 XML_ERR_ERROR, NULL, 0, (const char *) str1,
678 (const char *) str2, NULL, 0, 0,
679 msg, (const char *) str1, (const char *) str2);
680 ctxt->valid = 0;
681 } else {
682 __xmlRaiseError(schannel, NULL, NULL,
683 ctxt, NULL, XML_FROM_DTD, error,
684 XML_ERR_ERROR, NULL, 0, (const char *) str1,
685 (const char *) str2, NULL, 0, 0,
686 msg, (const char *) str1, (const char *) str2);
687 }
688}
689
699static void LIBXML_ATTR_FORMAT(3,0)
700xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701 const char *msg, int val)
702{
703 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704 (ctxt->instate == XML_PARSER_EOF))
705 return;
706 if (ctxt != NULL)
707 ctxt->errNo = error;
708 __xmlRaiseError(NULL, NULL, NULL,
710 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711 if (ctxt != NULL) {
712 ctxt->wellFormed = 0;
713 if (ctxt->recovery == 0)
714 ctxt->disableSAX = 1;
715 }
716}
717
729static void LIBXML_ATTR_FORMAT(3,0)
730xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731 const char *msg, const xmlChar *str1, int val,
732 const xmlChar *str2)
733{
734 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735 (ctxt->instate == XML_PARSER_EOF))
736 return;
737 if (ctxt != NULL)
738 ctxt->errNo = error;
739 __xmlRaiseError(NULL, NULL, NULL,
741 NULL, 0, (const char *) str1, (const char *) str2,
742 NULL, val, 0, msg, str1, val, str2);
743 if (ctxt != NULL) {
744 ctxt->wellFormed = 0;
745 if (ctxt->recovery == 0)
746 ctxt->disableSAX = 1;
747 }
748}
749
759static void LIBXML_ATTR_FORMAT(3,0)
760xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761 const char *msg, const xmlChar * val)
762{
763 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764 (ctxt->instate == XML_PARSER_EOF))
765 return;
766 if (ctxt != NULL)
767 ctxt->errNo = error;
768 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771 val);
772 if (ctxt != NULL) {
773 ctxt->wellFormed = 0;
774 if (ctxt->recovery == 0)
775 ctxt->disableSAX = 1;
776 }
777}
778
788static void LIBXML_ATTR_FORMAT(3,0)
789xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790 const char *msg, const xmlChar * val)
791{
792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
795 if (ctxt != NULL)
796 ctxt->errNo = error;
797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
799 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800 val);
801}
802
813static void LIBXML_ATTR_FORMAT(3,0)
815 const char *msg,
816 const xmlChar * info1, const xmlChar * info2,
817 const xmlChar * info3)
818{
819 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820 (ctxt->instate == XML_PARSER_EOF))
821 return;
822 if (ctxt != NULL)
823 ctxt->errNo = error;
824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_ERROR, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828 if (ctxt != NULL)
829 ctxt->nsWellFormed = 0;
830}
831
842static void LIBXML_ATTR_FORMAT(3,0)
844 const char *msg,
845 const xmlChar * info1, const xmlChar * info2,
846 const xmlChar * info3)
847{
848 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849 (ctxt->instate == XML_PARSER_EOF))
850 return;
851 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852 XML_ERR_WARNING, NULL, 0, (const char *) info1,
853 (const char *) info2, (const char *) info3, 0, 0, msg,
854 info1, info2, info3);
855}
856
857/************************************************************************
858 * *
859 * Library wide options *
860 * *
861 ************************************************************************/
862
873int
875{
876 switch (feature) {
877 case XML_WITH_THREAD:
878#ifdef LIBXML_THREAD_ENABLED
879 return(1);
880#else
881 return(0);
882#endif
883 case XML_WITH_TREE:
884#ifdef LIBXML_TREE_ENABLED
885 return(1);
886#else
887 return(0);
888#endif
889 case XML_WITH_OUTPUT:
890#ifdef LIBXML_OUTPUT_ENABLED
891 return(1);
892#else
893 return(0);
894#endif
895 case XML_WITH_PUSH:
896#ifdef LIBXML_PUSH_ENABLED
897 return(1);
898#else
899 return(0);
900#endif
901 case XML_WITH_READER:
902#ifdef LIBXML_READER_ENABLED
903 return(1);
904#else
905 return(0);
906#endif
907 case XML_WITH_PATTERN:
908#ifdef LIBXML_PATTERN_ENABLED
909 return(1);
910#else
911 return(0);
912#endif
913 case XML_WITH_WRITER:
914#ifdef LIBXML_WRITER_ENABLED
915 return(1);
916#else
917 return(0);
918#endif
919 case XML_WITH_SAX1:
920#ifdef LIBXML_SAX1_ENABLED
921 return(1);
922#else
923 return(0);
924#endif
925 case XML_WITH_FTP:
926#ifdef LIBXML_FTP_ENABLED
927 return(1);
928#else
929 return(0);
930#endif
931 case XML_WITH_HTTP:
932#ifdef LIBXML_HTTP_ENABLED
933 return(1);
934#else
935 return(0);
936#endif
937 case XML_WITH_VALID:
938#ifdef LIBXML_VALID_ENABLED
939 return(1);
940#else
941 return(0);
942#endif
943 case XML_WITH_HTML:
944#ifdef LIBXML_HTML_ENABLED
945 return(1);
946#else
947 return(0);
948#endif
949 case XML_WITH_LEGACY:
950#ifdef LIBXML_LEGACY_ENABLED
951 return(1);
952#else
953 return(0);
954#endif
955 case XML_WITH_C14N:
956#ifdef LIBXML_C14N_ENABLED
957 return(1);
958#else
959 return(0);
960#endif
961 case XML_WITH_CATALOG:
962#ifdef LIBXML_CATALOG_ENABLED
963 return(1);
964#else
965 return(0);
966#endif
967 case XML_WITH_XPATH:
968#ifdef LIBXML_XPATH_ENABLED
969 return(1);
970#else
971 return(0);
972#endif
973 case XML_WITH_XPTR:
974#ifdef LIBXML_XPTR_ENABLED
975 return(1);
976#else
977 return(0);
978#endif
980#ifdef LIBXML_XINCLUDE_ENABLED
981 return(1);
982#else
983 return(0);
984#endif
985 case XML_WITH_ICONV:
986#ifdef LIBXML_ICONV_ENABLED
987 return(1);
988#else
989 return(0);
990#endif
992#ifdef LIBXML_ISO8859X_ENABLED
993 return(1);
994#else
995 return(0);
996#endif
997 case XML_WITH_UNICODE:
998#ifdef LIBXML_UNICODE_ENABLED
999 return(1);
1000#else
1001 return(0);
1002#endif
1003 case XML_WITH_REGEXP:
1004#ifdef LIBXML_REGEXP_ENABLED
1005 return(1);
1006#else
1007 return(0);
1008#endif
1009 case XML_WITH_AUTOMATA:
1010#ifdef LIBXML_AUTOMATA_ENABLED
1011 return(1);
1012#else
1013 return(0);
1014#endif
1015 case XML_WITH_EXPR:
1016#ifdef LIBXML_EXPR_ENABLED
1017 return(1);
1018#else
1019 return(0);
1020#endif
1021 case XML_WITH_SCHEMAS:
1022#ifdef LIBXML_SCHEMAS_ENABLED
1023 return(1);
1024#else
1025 return(0);
1026#endif
1028#ifdef LIBXML_SCHEMATRON_ENABLED
1029 return(1);
1030#else
1031 return(0);
1032#endif
1033 case XML_WITH_MODULES:
1034#ifdef LIBXML_MODULES_ENABLED
1035 return(1);
1036#else
1037 return(0);
1038#endif
1039 case XML_WITH_DEBUG:
1040#ifdef LIBXML_DEBUG_ENABLED
1041 return(1);
1042#else
1043 return(0);
1044#endif
1045 case XML_WITH_DEBUG_MEM:
1046#ifdef DEBUG_MEMORY_LOCATION
1047 return(1);
1048#else
1049 return(0);
1050#endif
1051 case XML_WITH_DEBUG_RUN:
1052#ifdef LIBXML_DEBUG_RUNTIME
1053 return(1);
1054#else
1055 return(0);
1056#endif
1057 case XML_WITH_ZLIB:
1058#ifdef LIBXML_ZLIB_ENABLED
1059 return(1);
1060#else
1061 return(0);
1062#endif
1063 case XML_WITH_LZMA:
1064#ifdef LIBXML_LZMA_ENABLED
1065 return(1);
1066#else
1067 return(0);
1068#endif
1069 case XML_WITH_ICU:
1070#ifdef LIBXML_ICU_ENABLED
1071 return(1);
1072#else
1073 return(0);
1074#endif
1075 default:
1076 break;
1077 }
1078 return(0);
1079}
1080
1081/************************************************************************
1082 * *
1083 * SAX2 defaulted attributes handling *
1084 * *
1085 ************************************************************************/
1086
1093static void
1096
1097 /* Avoid unused variable warning if features are disabled. */
1098 (void) sax;
1099
1100 if (ctxt == NULL) return;
1101 sax = ctxt->sax;
1102#ifdef LIBXML_SAX1_ENABLED
1103 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1104 ((sax->startElementNs != NULL) ||
1105 (sax->endElementNs != NULL) ||
1106 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107 ctxt->sax2 = 1;
1108#else
1109 ctxt->sax2 = 1;
1110#endif /* LIBXML_SAX1_ENABLED */
1111
1112 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116 (ctxt->str_xml_ns == NULL)) {
1117 xmlErrMemory(ctxt, NULL);
1118 }
1119}
1120
1124 int nbAttrs; /* number of defaulted attributes on that element */
1125 int maxAttrs; /* the size of the array */
1126#if __STDC_VERSION__ >= 199901L
1127 /* Using a C99 flexible array member avoids UBSan errors. */
1128 const xmlChar *values[]; /* array of localname/prefix/values/external */
1129#else
1130 const xmlChar *values[5];
1131#endif
1132};
1133
1151static xmlChar *
1153{
1154 if ((src == NULL) || (dst == NULL))
1155 return(NULL);
1156
1157 while (*src == 0x20) src++;
1158 while (*src != 0) {
1159 if (*src == 0x20) {
1160 while (*src == 0x20) src++;
1161 if (*src != 0)
1162 *dst++ = 0x20;
1163 } else {
1164 *dst++ = *src++;
1165 }
1166 }
1167 *dst = 0;
1168 if (dst == src)
1169 return(NULL);
1170 return(dst);
1171}
1172
1184static const xmlChar *
1186{
1187 int i;
1188 int remove_head = 0;
1189 int need_realloc = 0;
1190 const xmlChar *cur;
1191
1192 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193 return(NULL);
1194 i = *len;
1195 if (i <= 0)
1196 return(NULL);
1197
1198 cur = src;
1199 while (*cur == 0x20) {
1200 cur++;
1201 remove_head++;
1202 }
1203 while (*cur != 0) {
1204 if (*cur == 0x20) {
1205 cur++;
1206 if ((*cur == 0x20) || (*cur == 0)) {
1207 need_realloc = 1;
1208 break;
1209 }
1210 } else
1211 cur++;
1212 }
1213 if (need_realloc) {
1214 xmlChar *ret;
1215
1216 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217 if (ret == NULL) {
1218 xmlErrMemory(ctxt, NULL);
1219 return(NULL);
1220 }
1222 *len = (int) strlen((const char *)ret);
1223 return(ret);
1224 } else if (remove_head) {
1225 *len -= remove_head;
1226 memmove(src, src + remove_head, 1 + *len);
1227 return(src);
1228 }
1229 return(NULL);
1230}
1231
1241static void
1243 const xmlChar *fullname,
1244 const xmlChar *fullattr,
1245 const xmlChar *value) {
1247 int len;
1248 const xmlChar *name;
1249 const xmlChar *prefix;
1250
1251 /*
1252 * Allows to detect attribute redefinitions
1253 */
1254 if (ctxt->attsSpecial != NULL) {
1255 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256 return;
1257 }
1258
1259 if (ctxt->attsDefault == NULL) {
1260 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261 if (ctxt->attsDefault == NULL)
1262 goto mem_error;
1263 }
1264
1265 /*
1266 * split the element name into prefix:localname , the string found
1267 * are within the DTD and then not associated to namespace names.
1268 */
1270 if (name == NULL) {
1271 name = xmlDictLookup(ctxt->dict, fullname, -1);
1272 prefix = NULL;
1273 } else {
1274 name = xmlDictLookup(ctxt->dict, name, -1);
1275 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1276 }
1277
1278 /*
1279 * make sure there is some storage
1280 */
1281 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282 if (defaults == NULL) {
1284 (4 * 5) * sizeof(const xmlChar *));
1285 if (defaults == NULL)
1286 goto mem_error;
1287 defaults->nbAttrs = 0;
1288 defaults->maxAttrs = 4;
1289 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290 defaults, NULL) < 0) {
1292 goto mem_error;
1293 }
1294 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1296
1298 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299 if (temp == NULL)
1300 goto mem_error;
1301 defaults = temp;
1302 defaults->maxAttrs *= 2;
1303 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304 defaults, NULL) < 0) {
1306 goto mem_error;
1307 }
1308 }
1309
1310 /*
1311 * Split the element name into prefix:localname , the string found
1312 * are within the DTD and hen not associated to namespace names.
1313 */
1314 name = xmlSplitQName3(fullattr, &len);
1315 if (name == NULL) {
1316 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317 prefix = NULL;
1318 } else {
1319 name = xmlDictLookup(ctxt->dict, name, -1);
1320 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321 }
1322
1323 defaults->values[5 * defaults->nbAttrs] = name;
1324 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325 /* intern the string and precompute the end */
1326 len = xmlStrlen(value);
1327 value = xmlDictLookup(ctxt->dict, value, len);
1328 defaults->values[5 * defaults->nbAttrs + 2] = value;
1329 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330 if (ctxt->external)
1331 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332 else
1333 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334 defaults->nbAttrs++;
1335
1336 return;
1337
1338mem_error:
1339 xmlErrMemory(ctxt, NULL);
1340 return;
1341}
1342
1352static void
1354 const xmlChar *fullname,
1355 const xmlChar *fullattr,
1356 int type)
1357{
1358 if (ctxt->attsSpecial == NULL) {
1359 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360 if (ctxt->attsSpecial == NULL)
1361 goto mem_error;
1362 }
1363
1364 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365 return;
1366
1367 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368 (void *) (ptrdiff_t) type);
1369 return;
1370
1371mem_error:
1372 xmlErrMemory(ctxt, NULL);
1373 return;
1374}
1375
1381static void
1383 const xmlChar *fullname, const xmlChar *fullattr,
1386
1387 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1389 }
1390}
1391
1400static void
1402{
1403 if (ctxt->attsSpecial == NULL)
1404 return;
1405
1407
1408 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1410 ctxt->attsSpecial = NULL;
1411 }
1412 return;
1413}
1414
1473int
1475{
1476 const xmlChar *cur = lang, *nxt;
1477
1478 if (cur == NULL)
1479 return (0);
1480 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481 ((cur[0] == 'I') && (cur[1] == '-')) ||
1482 ((cur[0] == 'x') && (cur[1] == '-')) ||
1483 ((cur[0] == 'X') && (cur[1] == '-'))) {
1484 /*
1485 * Still allow IANA code and user code which were coming
1486 * from the previous version of the XML-1.0 specification
1487 * it's deprecated but we should not fail
1488 */
1489 cur += 2;
1490 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492 cur++;
1493 return(cur[0] == 0);
1494 }
1495 nxt = cur;
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur >= 4) {
1500 /*
1501 * Reserved
1502 */
1503 if ((nxt - cur > 8) || (nxt[0] != 0))
1504 return(0);
1505 return(1);
1506 }
1507 if (nxt - cur < 2)
1508 return(0);
1509 /* we got an ISO 639 code */
1510 if (nxt[0] == 0)
1511 return(1);
1512 if (nxt[0] != '-')
1513 return(0);
1514
1515 nxt++;
1516 cur = nxt;
1517 /* now we can have extlang or script or region or variant */
1518 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519 goto region_m49;
1520
1521 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523 nxt++;
1524 if (nxt - cur == 4)
1525 goto script;
1526 if (nxt - cur == 2)
1527 goto region;
1528 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529 goto variant;
1530 if (nxt - cur != 3)
1531 return(0);
1532 /* we parsed an extlang */
1533 if (nxt[0] == 0)
1534 return(1);
1535 if (nxt[0] != '-')
1536 return(0);
1537
1538 nxt++;
1539 cur = nxt;
1540 /* now we can have script or region or variant */
1541 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542 goto region_m49;
1543
1544 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546 nxt++;
1547 if (nxt - cur == 2)
1548 goto region;
1549 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550 goto variant;
1551 if (nxt - cur != 4)
1552 return(0);
1553 /* we parsed a script */
1554script:
1555 if (nxt[0] == 0)
1556 return(1);
1557 if (nxt[0] != '-')
1558 return(0);
1559
1560 nxt++;
1561 cur = nxt;
1562 /* now we can have region or variant */
1563 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564 goto region_m49;
1565
1566 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568 nxt++;
1569
1570 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571 goto variant;
1572 if (nxt - cur != 2)
1573 return(0);
1574 /* we parsed a region */
1575region:
1576 if (nxt[0] == 0)
1577 return(1);
1578 if (nxt[0] != '-')
1579 return(0);
1580
1581 nxt++;
1582 cur = nxt;
1583 /* now we can just have a variant */
1584 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586 nxt++;
1587
1588 if ((nxt - cur < 5) || (nxt - cur > 8))
1589 return(0);
1590
1591 /* we parsed a variant */
1592variant:
1593 if (nxt[0] == 0)
1594 return(1);
1595 if (nxt[0] != '-')
1596 return(0);
1597 /* extensions and private use subtags not checked */
1598 return (1);
1599
1600region_m49:
1601 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603 nxt += 3;
1604 goto region;
1605 }
1606 return(0);
1607}
1608
1609/************************************************************************
1610 * *
1611 * Parser stacks related functions and macros *
1612 * *
1613 ************************************************************************/
1614
1616 const xmlChar ** str);
1617
1618#ifdef SAX2
1630static int
1631nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1632{
1633 if (ctxt->options & XML_PARSE_NSCLEAN) {
1634 int i;
1635 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636 if (ctxt->nsTab[i] == prefix) {
1637 /* in scope */
1638 if (ctxt->nsTab[i + 1] == URL)
1639 return(-2);
1640 /* out of scope keep it */
1641 break;
1642 }
1643 }
1644 }
1645 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646 ctxt->nsMax = 10;
1647 ctxt->nsNr = 0;
1648 ctxt->nsTab = (const xmlChar **)
1649 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650 if (ctxt->nsTab == NULL) {
1651 xmlErrMemory(ctxt, NULL);
1652 ctxt->nsMax = 0;
1653 return (-1);
1654 }
1655 } else if (ctxt->nsNr >= ctxt->nsMax) {
1656 const xmlChar ** tmp;
1657 ctxt->nsMax *= 2;
1658 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660 if (tmp == NULL) {
1661 xmlErrMemory(ctxt, NULL);
1662 ctxt->nsMax /= 2;
1663 return (-1);
1664 }
1665 ctxt->nsTab = tmp;
1666 }
1667 ctxt->nsTab[ctxt->nsNr++] = prefix;
1668 ctxt->nsTab[ctxt->nsNr++] = URL;
1669 return (ctxt->nsNr);
1670}
1680static int
1682{
1683 int i;
1684
1685 if (ctxt->nsTab == NULL) return(0);
1686 if (ctxt->nsNr < nr) {
1687 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688 nr = ctxt->nsNr;
1689 }
1690 if (ctxt->nsNr <= 0)
1691 return (0);
1692
1693 for (i = 0;i < nr;i++) {
1694 ctxt->nsNr--;
1695 ctxt->nsTab[ctxt->nsNr] = NULL;
1696 }
1697 return(nr);
1698}
1699#endif
1700
1701static int
1703 const xmlChar **atts;
1704 int *attallocs;
1705 int maxatts;
1706
1707 if (ctxt->atts == NULL) {
1708 maxatts = 55; /* allow for 10 attrs by default */
1709 atts = (const xmlChar **)
1710 xmlMalloc(maxatts * sizeof(xmlChar *));
1711 if (atts == NULL) goto mem_error;
1712 ctxt->atts = atts;
1713 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714 if (attallocs == NULL) goto mem_error;
1715 ctxt->attallocs = attallocs;
1716 ctxt->maxatts = maxatts;
1717 } else if (nr + 5 > ctxt->maxatts) {
1718 maxatts = (nr + 5) * 2;
1719 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720 maxatts * sizeof(const xmlChar *));
1721 if (atts == NULL) goto mem_error;
1722 ctxt->atts = atts;
1723 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724 (maxatts / 5) * sizeof(int));
1725 if (attallocs == NULL) goto mem_error;
1726 ctxt->attallocs = attallocs;
1727 ctxt->maxatts = maxatts;
1728 }
1729 return(ctxt->maxatts);
1730mem_error:
1731 xmlErrMemory(ctxt, NULL);
1732 return(-1);
1733}
1734
1744int
1746{
1747 if ((ctxt == NULL) || (value == NULL))
1748 return(-1);
1749 if (ctxt->inputNr >= ctxt->inputMax) {
1750 ctxt->inputMax *= 2;
1751 ctxt->inputTab =
1753 ctxt->inputMax *
1754 sizeof(ctxt->inputTab[0]));
1755 if (ctxt->inputTab == NULL) {
1756 xmlErrMemory(ctxt, NULL);
1757 ctxt->inputMax /= 2;
1758 return (-1);
1759 }
1760 }
1761 ctxt->inputTab[ctxt->inputNr] = value;
1762 ctxt->input = value;
1763 return (ctxt->inputNr++);
1764}
1775{
1777
1778 if (ctxt == NULL)
1779 return(NULL);
1780 if (ctxt->inputNr <= 0)
1781 return (NULL);
1782 ctxt->inputNr--;
1783 if (ctxt->inputNr > 0)
1784 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785 else
1786 ctxt->input = NULL;
1787 ret = ctxt->inputTab[ctxt->inputNr];
1788 ctxt->inputTab[ctxt->inputNr] = NULL;
1789 return (ret);
1790}
1800int
1802{
1803 if (ctxt == NULL) return(0);
1804 if (ctxt->nodeNr >= ctxt->nodeMax) {
1805 xmlNodePtr *tmp;
1806
1807 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808 ctxt->nodeMax * 2 *
1809 sizeof(ctxt->nodeTab[0]));
1810 if (tmp == NULL) {
1811 xmlErrMemory(ctxt, NULL);
1812 return (-1);
1813 }
1814 ctxt->nodeTab = tmp;
1815 ctxt->nodeMax *= 2;
1816 }
1817 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1822 xmlHaltParser(ctxt);
1823 return(-1);
1824 }
1825 ctxt->nodeTab[ctxt->nodeNr] = value;
1826 ctxt->node = value;
1827 return (ctxt->nodeNr++);
1828}
1829
1840{
1842
1843 if (ctxt == NULL) return(NULL);
1844 if (ctxt->nodeNr <= 0)
1845 return (NULL);
1846 ctxt->nodeNr--;
1847 if (ctxt->nodeNr > 0)
1848 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849 else
1850 ctxt->node = NULL;
1851 ret = ctxt->nodeTab[ctxt->nodeNr];
1852 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853 return (ret);
1854}
1855
1869static int
1871 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872{
1874
1875 if (ctxt->nameNr >= ctxt->nameMax) {
1876 const xmlChar * *tmp;
1877 xmlStartTag *tmp2;
1878 ctxt->nameMax *= 2;
1879 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880 ctxt->nameMax *
1881 sizeof(ctxt->nameTab[0]));
1882 if (tmp == NULL) {
1883 ctxt->nameMax /= 2;
1884 goto mem_error;
1885 }
1886 ctxt->nameTab = tmp;
1887 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888 ctxt->nameMax *
1889 sizeof(ctxt->pushTab[0]));
1890 if (tmp2 == NULL) {
1891 ctxt->nameMax /= 2;
1892 goto mem_error;
1893 }
1894 ctxt->pushTab = tmp2;
1895 } else if (ctxt->pushTab == NULL) {
1896 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897 sizeof(ctxt->pushTab[0]));
1898 if (ctxt->pushTab == NULL)
1899 goto mem_error;
1900 }
1901 ctxt->nameTab[ctxt->nameNr] = value;
1902 ctxt->name = value;
1903 tag = &ctxt->pushTab[ctxt->nameNr];
1904 tag->prefix = prefix;
1905 tag->URI = URI;
1906 tag->line = line;
1907 tag->nsNr = nsNr;
1908 return (ctxt->nameNr++);
1909mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1912}
1913#ifdef LIBXML_PUSH_ENABLED
1922static const xmlChar *
1923nameNsPop(xmlParserCtxtPtr ctxt)
1924{
1925 const xmlChar *ret;
1926
1927 if (ctxt->nameNr <= 0)
1928 return (NULL);
1929 ctxt->nameNr--;
1930 if (ctxt->nameNr > 0)
1931 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932 else
1933 ctxt->name = NULL;
1934 ret = ctxt->nameTab[ctxt->nameNr];
1935 ctxt->nameTab[ctxt->nameNr] = NULL;
1936 return (ret);
1937}
1938#endif /* LIBXML_PUSH_ENABLED */
1939
1949int
1951{
1952 if (ctxt == NULL) return (-1);
1953
1954 if (ctxt->nameNr >= ctxt->nameMax) {
1955 const xmlChar * *tmp;
1956 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957 ctxt->nameMax * 2 *
1958 sizeof(ctxt->nameTab[0]));
1959 if (tmp == NULL) {
1960 goto mem_error;
1961 }
1962 ctxt->nameTab = tmp;
1963 ctxt->nameMax *= 2;
1964 }
1965 ctxt->nameTab[ctxt->nameNr] = value;
1966 ctxt->name = value;
1967 return (ctxt->nameNr++);
1968mem_error:
1969 xmlErrMemory(ctxt, NULL);
1970 return (-1);
1971}
1980const xmlChar *
1982{
1983 const xmlChar *ret;
1984
1985 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986 return (NULL);
1987 ctxt->nameNr--;
1988 if (ctxt->nameNr > 0)
1989 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990 else
1991 ctxt->name = NULL;
1992 ret = ctxt->nameTab[ctxt->nameNr];
1993 ctxt->nameTab[ctxt->nameNr] = NULL;
1994 return (ret);
1995}
1996
1997static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998 if (ctxt->spaceNr >= ctxt->spaceMax) {
1999 int *tmp;
2000
2001 ctxt->spaceMax *= 2;
2002 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004 if (tmp == NULL) {
2005 xmlErrMemory(ctxt, NULL);
2006 ctxt->spaceMax /=2;
2007 return(-1);
2008 }
2009 ctxt->spaceTab = tmp;
2010 }
2011 ctxt->spaceTab[ctxt->spaceNr] = val;
2012 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013 return(ctxt->spaceNr++);
2014}
2015
2016static int spacePop(xmlParserCtxtPtr ctxt) {
2017 int ret;
2018 if (ctxt->spaceNr <= 0) return(0);
2019 ctxt->spaceNr--;
2020 if (ctxt->spaceNr > 0)
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022 else
2023 ctxt->space = &ctxt->spaceTab[0];
2024 ret = ctxt->spaceTab[ctxt->spaceNr];
2025 ctxt->spaceTab[ctxt->spaceNr] = -1;
2026 return(ret);
2027}
2028
2029/*
2030 * Macros for accessing the content. Those should be used only by the parser,
2031 * and not exported.
2032 *
2033 * Dirty macros, i.e. one often need to make assumption on the context to
2034 * use them
2035 *
2036 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2037 * To be used with extreme caution since operations consuming
2038 * characters may move the input buffer to a different location !
2039 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2040 * This should be used internally by the parser
2041 * only to compare to ASCII values otherwise it would break when
2042 * running with UTF-8 encoding.
2043 * RAW same as CUR but in the input buffer, bypass any token
2044 * extraction that may have been done
2045 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2046 * to compare on ASCII based substring.
2047 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048 * strings without newlines within the parser.
2049 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050 * defined char within the parser.
2051 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052 *
2053 * NEXT Skip to the next character, this does the proper decoding
2054 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2056 * CUR_CHAR(l) returns the current unicode character (int), set l
2057 * to the number of xmlChars used for the encoding [0-5].
2058 * CUR_SCHAR same but operate on a string instead of the context
2059 * COPY_BUF copy the current unicode char to the target buffer, increment
2060 * the index
2061 * GROW, SHRINK handling of input buffers
2062 */
2063
2064#define RAW (*ctxt->input->cur)
2065#define CUR (*ctxt->input->cur)
2066#define NXT(val) ctxt->input->cur[(val)]
2067#define CUR_PTR ctxt->input->cur
2068#define BASE_PTR ctxt->input->base
2069
2070#define CMP4( s, c1, c2, c3, c4 ) \
2071 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073#define CMP5( s, c1, c2, c3, c4, c5 ) \
2074 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083 ((unsigned char *) s)[ 8 ] == c9 )
2084#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086 ((unsigned char *) s)[ 9 ] == c10 )
2087
2088#define SKIP(val) do { \
2089 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2092 } while (0)
2093
2094#define SKIPL(val) do { \
2095 int skipl; \
2096 for(skipl=0; skipl<val; skipl++) { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur++; \
2101 } \
2102 if (*ctxt->input->cur == 0) \
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2104 } while (0)
2105
2106#define SHRINK if ((ctxt->progressive == 0) && \
2107 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109 xmlSHRINK (ctxt);
2110
2111static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2113 if (*ctxt->input->cur == 0)
2115}
2116
2117#define GROW if ((ctxt->progressive == 0) && \
2118 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119 xmlGROW (ctxt);
2120
2121static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124
2125 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127 ((ctxt->input->buf) &&
2129 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131 xmlHaltParser(ctxt);
2132 return;
2133 }
2135 if ((ctxt->input->cur > ctxt->input->end) ||
2136 (ctxt->input->cur < ctxt->input->base)) {
2137 xmlHaltParser(ctxt);
2138 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139 return;
2140 }
2141 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2143}
2144
2145#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146
2147#define NEXT xmlNextChar(ctxt)
2148
2149#define NEXT1 { \
2150 ctxt->input->col++; \
2151 ctxt->input->cur++; \
2152 if (*ctxt->input->cur == 0) \
2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2154 }
2155
2156#define NEXTL(l) do { \
2157 if (*(ctxt->input->cur) == '\n') { \
2158 ctxt->input->line++; ctxt->input->col = 1; \
2159 } else ctxt->input->col++; \
2160 ctxt->input->cur += l; \
2161 } while (0)
2162
2163#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165
2166#define COPY_BUF(l,b,i,v) \
2167 if (l == 1) b[i++] = (xmlChar) v; \
2168 else i += xmlCopyCharMultiByte(&b[i],v)
2169
2170#define CUR_CONSUMED \
2171 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172
2183int
2185 int res = 0;
2186
2187 /*
2188 * It's Okay to use CUR/NEXT here since all the blanks are on
2189 * the ASCII range.
2190 */
2191 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192 (ctxt->instate == XML_PARSER_START)) {
2193 const xmlChar *cur;
2194 /*
2195 * if we are in the document content, go really fast
2196 */
2197 cur = ctxt->input->cur;
2198 while (IS_BLANK_CH(*cur)) {
2199 if (*cur == '\n') {
2200 ctxt->input->line++; ctxt->input->col = 1;
2201 } else {
2202 ctxt->input->col++;
2203 }
2204 cur++;
2205 if (res < INT_MAX)
2206 res++;
2207 if (*cur == 0) {
2208 ctxt->input->cur = cur;
2210 cur = ctxt->input->cur;
2211 }
2212 }
2213 ctxt->input->cur = cur;
2214 } else {
2215 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216
2217 while (1) {
2218 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219 NEXT;
2220 } else if (CUR == '%') {
2221 /*
2222 * Need to handle support of entities branching here
2223 */
2224 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225 break;
2226 xmlParsePEReference(ctxt);
2227 } else if (CUR == 0) {
2228 if (ctxt->inputNr <= 1)
2229 break;
2230 xmlPopInput(ctxt);
2231 } else {
2232 break;
2233 }
2234
2235 /*
2236 * Also increase the counter when entering or exiting a PERef.
2237 * The spec says: "When a parameter-entity reference is recognized
2238 * in the DTD and included, its replacement text MUST be enlarged
2239 * by the attachment of one leading and one following space (#x20)
2240 * character."
2241 */
2242 if (res < INT_MAX)
2243 res++;
2244 }
2245 }
2246 return(res);
2247}
2248
2249/************************************************************************
2250 * *
2251 * Commodity functions to handle entities *
2252 * *
2253 ************************************************************************/
2254
2264xmlChar
2266 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2269 "Popping input %d\n", ctxt->inputNr);
2270 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271 (ctxt->instate != XML_PARSER_EOF))
2273 "Unfinished entity outside the DTD");
2275 if (*ctxt->input->cur == 0)
2277 return(CUR);
2278}
2279
2289int
2291 int ret;
2292 if (input == NULL) return(-1);
2293
2295 if ((ctxt->input != NULL) && (ctxt->input->filename))
2297 "%s(%d): ", ctxt->input->filename,
2298 ctxt->input->line);
2300 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301 }
2302 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303 (ctxt->inputNr > 1024)) {
2305 while (ctxt->inputNr > 1)
2307 return(-1);
2308 }
2309 ret = inputPush(ctxt, input);
2310 if (ctxt->instate == XML_PARSER_EOF)
2311 return(-1);
2312 GROW;
2313 return(ret);
2314}
2315
2331int
2333 int val = 0;
2334 int count = 0;
2335
2336 /*
2337 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338 */
2339 if ((RAW == '&') && (NXT(1) == '#') &&
2340 (NXT(2) == 'x')) {
2341 SKIP(3);
2342 GROW;
2343 while (RAW != ';') { /* loop blocked by count */
2344 if (count++ > 20) {
2345 count = 0;
2346 GROW;
2347 if (ctxt->instate == XML_PARSER_EOF)
2348 return(0);
2349 }
2350 if ((RAW >= '0') && (RAW <= '9'))
2351 val = val * 16 + (CUR - '0');
2352 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353 val = val * 16 + (CUR - 'a') + 10;
2354 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355 val = val * 16 + (CUR - 'A') + 10;
2356 else {
2358 val = 0;
2359 break;
2360 }
2361 if (val > 0x110000)
2362 val = 0x110000;
2363
2364 NEXT;
2365 count++;
2366 }
2367 if (RAW == ';') {
2368 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 ctxt->input->col++;
2370 ctxt->input->cur++;
2371 }
2372 } else if ((RAW == '&') && (NXT(1) == '#')) {
2373 SKIP(2);
2374 GROW;
2375 while (RAW != ';') { /* loop blocked by count */
2376 if (count++ > 20) {
2377 count = 0;
2378 GROW;
2379 if (ctxt->instate == XML_PARSER_EOF)
2380 return(0);
2381 }
2382 if ((RAW >= '0') && (RAW <= '9'))
2383 val = val * 10 + (CUR - '0');
2384 else {
2386 val = 0;
2387 break;
2388 }
2389 if (val > 0x110000)
2390 val = 0x110000;
2391
2392 NEXT;
2393 count++;
2394 }
2395 if (RAW == ';') {
2396 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397 ctxt->input->col++;
2398 ctxt->input->cur++;
2399 }
2400 } else {
2402 }
2403
2404 /*
2405 * [ WFC: Legal Character ]
2406 * Characters referred to using character references must match the
2407 * production for Char.
2408 */
2409 if (val >= 0x110000) {
2410 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411 "xmlParseCharRef: character reference out of bounds\n",
2412 val);
2413 } else if (IS_CHAR(val)) {
2414 return(val);
2415 } else {
2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417 "xmlParseCharRef: invalid xmlChar value %d\n",
2418 val);
2419 }
2420 return(0);
2421}
2422
2441static int
2443 const xmlChar *ptr;
2444 xmlChar cur;
2445 int val = 0;
2446
2447 if ((str == NULL) || (*str == NULL)) return(0);
2448 ptr = *str;
2449 cur = *ptr;
2450 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451 ptr += 3;
2452 cur = *ptr;
2453 while (cur != ';') { /* Non input consuming loop */
2454 if ((cur >= '0') && (cur <= '9'))
2455 val = val * 16 + (cur - '0');
2456 else if ((cur >= 'a') && (cur <= 'f'))
2457 val = val * 16 + (cur - 'a') + 10;
2458 else if ((cur >= 'A') && (cur <= 'F'))
2459 val = val * 16 + (cur - 'A') + 10;
2460 else {
2462 val = 0;
2463 break;
2464 }
2465 if (val > 0x110000)
2466 val = 0x110000;
2467
2468 ptr++;
2469 cur = *ptr;
2470 }
2471 if (cur == ';')
2472 ptr++;
2473 } else if ((cur == '&') && (ptr[1] == '#')){
2474 ptr += 2;
2475 cur = *ptr;
2476 while (cur != ';') { /* Non input consuming loops */
2477 if ((cur >= '0') && (cur <= '9'))
2478 val = val * 10 + (cur - '0');
2479 else {
2481 val = 0;
2482 break;
2483 }
2484 if (val > 0x110000)
2485 val = 0x110000;
2486
2487 ptr++;
2488 cur = *ptr;
2489 }
2490 if (cur == ';')
2491 ptr++;
2492 } else {
2494 return(0);
2495 }
2496 *str = ptr;
2497
2498 /*
2499 * [ WFC: Legal Character ]
2500 * Characters referred to using character references must match the
2501 * production for Char.
2502 */
2503 if (val >= 0x110000) {
2504 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505 "xmlParseStringCharRef: character reference out of bounds\n",
2506 val);
2507 } else if (IS_CHAR(val)) {
2508 return(val);
2509 } else {
2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512 val);
2513 }
2514 return(0);
2515}
2516
2549void
2551 switch(ctxt->instate) {
2553 return;
2554 case XML_PARSER_COMMENT:
2555 return;
2557 return;
2558 case XML_PARSER_END_TAG:
2559 return;
2560 case XML_PARSER_EOF:
2562 return;
2563 case XML_PARSER_PROLOG:
2564 case XML_PARSER_START:
2565 case XML_PARSER_MISC:
2567 return;
2569 case XML_PARSER_CONTENT:
2571 case XML_PARSER_PI:
2574 /* we just ignore it there */
2575 return;
2576 case XML_PARSER_EPILOG:
2578 return;
2580 /*
2581 * NOTE: in the case of entity values, we don't do the
2582 * substitution here since we need the literal
2583 * entity value to be able to save the internal
2584 * subset of the document.
2585 * This will be handled by xmlStringDecodeEntities
2586 */
2587 return;
2588 case XML_PARSER_DTD:
2589 /*
2590 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591 * In the internal DTD subset, parameter-entity references
2592 * can occur only where markup declarations can occur, not
2593 * within markup declarations.
2594 * In that case this is handled in xmlParseMarkupDecl
2595 */
2596 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597 return;
2598 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599 return;
2600 break;
2601 case XML_PARSER_IGNORE:
2602 return;
2603 }
2604
2605 xmlParsePEReference(ctxt);
2606}
2607
2608/*
2609 * Macro used to grow the current buffer.
2610 * buffer##_size is expected to be a size_t
2611 * mem_error: is expected to handle memory allocation failures
2612 */
2613#define growBuffer(buffer, n) { \
2614 xmlChar *tmp; \
2615 size_t new_size = buffer##_size * 2 + n; \
2616 if (new_size < buffer##_size) goto mem_error; \
2617 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2618 if (tmp == NULL) goto mem_error; \
2619 buffer = tmp; \
2620 buffer##_size = new_size; \
2621}
2622
2642xmlChar *
2644 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2645 xmlChar *buffer = NULL;
2646 size_t buffer_size = 0;
2647 size_t nbchars = 0;
2648
2649 xmlChar *current = NULL;
2650 xmlChar *rep = NULL;
2651 const xmlChar *last;
2652 xmlEntityPtr ent;
2653 int c,l;
2654
2655 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656 return(NULL);
2657 last = str + len;
2658
2659 if (((ctxt->depth > 40) &&
2660 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661 (ctxt->depth > 1024)) {
2663 return(NULL);
2664 }
2665
2666 /*
2667 * allocate a translation buffer.
2668 */
2671 if (buffer == NULL) goto mem_error;
2672
2673 /*
2674 * OK loop until we reach one of the ending char or a size limit.
2675 * we are operating on already parsed values.
2676 */
2677 if (str < last)
2678 c = CUR_SCHAR(str, l);
2679 else
2680 c = 0;
2681 while ((c != 0) && (c != end) && /* non input consuming loop */
2682 (c != end2) && (c != end3) &&
2683 (ctxt->instate != XML_PARSER_EOF)) {
2684
2685 if (c == 0) break;
2686 if ((c == '&') && (str[1] == '#')) {
2687 int val = xmlParseStringCharRef(ctxt, &str);
2688 if (val == 0)
2689 goto int_error;
2690 COPY_BUF(0,buffer,nbchars,val);
2691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2693 }
2694 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2697 "String decoding Entity Reference: %.30s\n",
2698 str);
2699 ent = xmlParseStringEntityRef(ctxt, &str);
2700 xmlParserEntityCheck(ctxt, 0, ent, 0);
2701 if (ent != NULL)
2702 ctxt->nbentities += ent->checked / 2;
2703 if ((ent != NULL) &&
2705 if (ent->content != NULL) {
2706 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2709 }
2710 } else {
2711 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712 "predefined entity has no content\n");
2713 goto int_error;
2714 }
2715 } else if ((ent != NULL) && (ent->content != NULL)) {
2716 ctxt->depth++;
2717 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718 0, 0, 0);
2719 ctxt->depth--;
2720 if (rep == NULL) {
2721 ent->content[0] = 0;
2722 goto int_error;
2723 }
2724
2725 current = rep;
2726 while (*current != 0) { /* non input consuming loop */
2727 buffer[nbchars++] = *current++;
2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730 goto int_error;
2732 }
2733 }
2734 xmlFree(rep);
2735 rep = NULL;
2736 } else if (ent != NULL) {
2737 int i = xmlStrlen(ent->name);
2738 const xmlChar *cur = ent->name;
2739
2740 buffer[nbchars++] = '&';
2741 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2743 }
2744 for (;i > 0;i--)
2745 buffer[nbchars++] = *cur++;
2746 buffer[nbchars++] = ';';
2747 }
2748 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2751 "String decoding PE Reference: %.30s\n", str);
2752 ent = xmlParseStringPEReference(ctxt, &str);
2753 xmlParserEntityCheck(ctxt, 0, ent, 0);
2754 if (ent != NULL)
2755 ctxt->nbentities += ent->checked / 2;
2756 if (ent != NULL) {
2757 if (ent->content == NULL) {
2758 /*
2759 * Note: external parsed entities will not be loaded,
2760 * it is not required for a non-validating parser to
2761 * complete external PEReferences coming from the
2762 * internal subset
2763 */
2764 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766 (ctxt->validate != 0)) {
2767 xmlLoadEntityContent(ctxt, ent);
2768 } else {
2769 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770 "not validating will not read content for PE entity %s\n",
2771 ent->name, NULL);
2772 }
2773 }
2774 ctxt->depth++;
2775 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776 0, 0, 0);
2777 ctxt->depth--;
2778 if (rep == NULL) {
2779 if (ent->content != NULL)
2780 ent->content[0] = 0;
2781 goto int_error;
2782 }
2783 current = rep;
2784 while (*current != 0) { /* non input consuming loop */
2785 buffer[nbchars++] = *current++;
2786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788 goto int_error;
2790 }
2791 }
2792 xmlFree(rep);
2793 rep = NULL;
2794 }
2795 } else {
2796 COPY_BUF(l,buffer,nbchars,c);
2797 str += l;
2798 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2800 }
2801 }
2802 if (str < last)
2803 c = CUR_SCHAR(str, l);
2804 else
2805 c = 0;
2806 }
2807 buffer[nbchars] = 0;
2808 return(buffer);
2809
2810mem_error:
2811 xmlErrMemory(ctxt, NULL);
2812int_error:
2813 if (rep != NULL)
2814 xmlFree(rep);
2815 if (buffer != NULL)
2816 xmlFree(buffer);
2817 return(NULL);
2818}
2819
2838xmlChar *
2840 xmlChar end, xmlChar end2, xmlChar end3) {
2841 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843 end, end2, end3));
2844}
2845
2846/************************************************************************
2847 * *
2848 * Commodity functions, cleanup needed ? *
2849 * *
2850 ************************************************************************/
2851
2864static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865 int blank_chars) {
2866 int i, ret;
2867 xmlNodePtr lastChild;
2868
2869 /*
2870 * Don't spend time trying to differentiate them, the same callback is
2871 * used !
2872 */
2873 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874 return(0);
2875
2876 /*
2877 * Check for xml:space value.
2878 */
2879 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880 (*(ctxt->space) == -2))
2881 return(0);
2882
2883 /*
2884 * Check that the string is made of blanks
2885 */
2886 if (blank_chars == 0) {
2887 for (i = 0;i < len;i++)
2888 if (!(IS_BLANK_CH(str[i]))) return(0);
2889 }
2890
2891 /*
2892 * Look if the element is mixed content in the DTD if available
2893 */
2894 if (ctxt->node == NULL) return(0);
2895 if (ctxt->myDoc != NULL) {
2896 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897 if (ret == 0) return(1);
2898 if (ret == 1) return(0);
2899 }
2900
2901 /*
2902 * Otherwise, heuristic :-\
2903 */
2904 if ((RAW != '<') && (RAW != 0xD)) return(0);
2905 if ((ctxt->node->children == NULL) &&
2906 (RAW == '<') && (NXT(1) == '/')) return(0);
2907
2908 lastChild = xmlGetLastChild(ctxt->node);
2909 if (lastChild == NULL) {
2910 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911 (ctxt->node->content != NULL)) return(0);
2912 } else if (xmlNodeIsText(lastChild))
2913 return(0);
2914 else if ((ctxt->node->children != NULL) &&
2915 (xmlNodeIsText(ctxt->node->children)))
2916 return(0);
2917 return(1);
2918}
2919
2920/************************************************************************
2921 * *
2922 * Extra stuff for namespace support *
2923 * Relates to http://www.w3.org/TR/WD-xml-names *
2924 * *
2925 ************************************************************************/
2926
2945xmlChar *
2948 xmlChar *buffer = NULL;
2949 int len = 0;
2950 int max = XML_MAX_NAMELEN;
2951 xmlChar *ret = NULL;
2952 const xmlChar *cur = name;
2953 int c;
2954
2955 if (prefix == NULL) return(NULL);
2956 *prefix = NULL;
2957
2958 if (cur == NULL) return(NULL);
2959
2960#ifndef XML_XML_NAMESPACE
2961 /* xml: prefix is not really a namespace */
2962 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963 (cur[2] == 'l') && (cur[3] == ':'))
2964 return(xmlStrdup(name));
2965#endif
2966
2967 /* nasty but well=formed */
2968 if (cur[0] == ':')
2969 return(xmlStrdup(name));
2970
2971 c = *cur++;
2972 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973 buf[len++] = c;
2974 c = *cur++;
2975 }
2976 if (len >= max) {
2977 /*
2978 * Okay someone managed to make a huge name, so he's ready to pay
2979 * for the processing speed.
2980 */
2981 max = len * 2;
2982
2983 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984 if (buffer == NULL) {
2985 xmlErrMemory(ctxt, NULL);
2986 return(NULL);
2987 }
2988 memcpy(buffer, buf, len);
2989 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990 if (len + 10 > max) {
2991 xmlChar *tmp;
2992
2993 max *= 2;
2994 tmp = (xmlChar *) xmlRealloc(buffer,
2995 max * sizeof(xmlChar));
2996 if (tmp == NULL) {
2997 xmlFree(buffer);
2998 xmlErrMemory(ctxt, NULL);
2999 return(NULL);
3000 }
3001 buffer = tmp;
3002 }
3003 buffer[len++] = c;
3004 c = *cur++;
3005 }
3006 buffer[len] = 0;
3007 }
3008
3009 if ((c == ':') && (*cur == 0)) {
3010 if (buffer != NULL)
3011 xmlFree(buffer);
3012 *prefix = NULL;
3013 return(xmlStrdup(name));
3014 }
3015
3016 if (buffer == NULL)
3017 ret = xmlStrndup(buf, len);
3018 else {
3019 ret = buffer;
3020 buffer = NULL;
3022 }
3023
3024
3025 if (c == ':') {
3026 c = *cur;
3027 *prefix = ret;
3028 if (c == 0) {
3029 return(xmlStrndup(BAD_CAST "", 0));
3030 }
3031 len = 0;
3032
3033 /*
3034 * Check that the first character is proper to start
3035 * a new name
3036 */
3037 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038 ((c >= 0x41) && (c <= 0x5A)) ||
3039 (c == '_') || (c == ':'))) {
3040 int l;
3041 int first = CUR_SCHAR(cur, l);
3042
3043 if (!IS_LETTER(first) && (first != '_')) {
3044 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045 "Name %s is not XML Namespace compliant\n",
3046 name);
3047 }
3048 }
3049 cur++;
3050
3051 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052 buf[len++] = c;
3053 c = *cur++;
3054 }
3055 if (len >= max) {
3056 /*
3057 * Okay someone managed to make a huge name, so he's ready to pay
3058 * for the processing speed.
3059 */
3060 max = len * 2;
3061
3062 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063 if (buffer == NULL) {
3064 xmlErrMemory(ctxt, NULL);
3065 return(NULL);
3066 }
3067 memcpy(buffer, buf, len);
3068 while (c != 0) { /* tested bigname2.xml */
3069 if (len + 10 > max) {
3070 xmlChar *tmp;
3071
3072 max *= 2;
3073 tmp = (xmlChar *) xmlRealloc(buffer,
3074 max * sizeof(xmlChar));
3075 if (tmp == NULL) {
3076 xmlErrMemory(ctxt, NULL);
3077 xmlFree(buffer);
3078 return(NULL);
3079 }
3080 buffer = tmp;
3081 }
3082 buffer[len++] = c;
3083 c = *cur++;
3084 }
3085 buffer[len] = 0;
3086 }
3087
3088 if (buffer == NULL)
3089 ret = xmlStrndup(buf, len);
3090 else {
3091 ret = buffer;
3092 }
3093 }
3094
3095 return(ret);
3096}
3097
3098/************************************************************************
3099 * *
3100 * The parser itself *
3101 * Relates to http://www.w3.org/TR/REC-xml *
3102 * *
3103 ************************************************************************/
3104
3105/************************************************************************
3106 * *
3107 * Routines to parse Name, NCName and NmToken *
3108 * *
3109 ************************************************************************/
3110#ifdef DEBUG
3111static unsigned long nbParseName = 0;
3112static unsigned long nbParseNmToken = 0;
3113static unsigned long nbParseNCName = 0;
3114static unsigned long nbParseNCNameComplex = 0;
3115static unsigned long nbParseNameComplex = 0;
3116static unsigned long nbParseStringName = 0;
3117#endif
3118
3119/*
3120 * The two following functions are related to the change of accepted
3121 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122 * They correspond to the modified production [4] and the new production [4a]
3123 * changes in that revision. Also note that the macros used for the
3124 * productions Letter, Digit, CombiningChar and Extender are not needed
3125 * anymore.
3126 * We still keep compatibility to pre-revision5 parsing semantic if the
3127 * new XML_PARSE_OLD10 option is given to the parser.
3128 */
3129static int
3131 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132 /*
3133 * Use the new checks of production [4] [4a] amd [5] of the
3134 * Update 5 of XML-1.0
3135 */
3136 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137 (((c >= 'a') && (c <= 'z')) ||
3138 ((c >= 'A') && (c <= 'Z')) ||
3139 (c == '_') || (c == ':') ||
3140 ((c >= 0xC0) && (c <= 0xD6)) ||
3141 ((c >= 0xD8) && (c <= 0xF6)) ||
3142 ((c >= 0xF8) && (c <= 0x2FF)) ||
3143 ((c >= 0x370) && (c <= 0x37D)) ||
3144 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145 ((c >= 0x200C) && (c <= 0x200D)) ||
3146 ((c >= 0x2070) && (c <= 0x218F)) ||
3147 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 return(1);
3153 } else {
3154 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155 return(1);
3156 }
3157 return(0);
3158}
3159
3160static int
3162 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163 /*
3164 * Use the new checks of production [4] [4a] amd [5] of the
3165 * Update 5 of XML-1.0
3166 */
3167 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168 (((c >= 'a') && (c <= 'z')) ||
3169 ((c >= 'A') && (c <= 'Z')) ||
3170 ((c >= '0') && (c <= '9')) || /* !start */
3171 (c == '_') || (c == ':') ||
3172 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173 ((c >= 0xC0) && (c <= 0xD6)) ||
3174 ((c >= 0xD8) && (c <= 0xF6)) ||
3175 ((c >= 0xF8) && (c <= 0x2FF)) ||
3176 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181 ((c >= 0x2070) && (c <= 0x218F)) ||
3182 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186 ((c >= 0x10000) && (c <= 0xEFFFF))))
3187 return(1);
3188 } else {
3189 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190 (c == '.') || (c == '-') ||
3191 (c == '_') || (c == ':') ||
3192 (IS_COMBINING(c)) ||
3193 (IS_EXTENDER(c)))
3194 return(1);
3195 }
3196 return(0);
3197}
3198
3200 int *len, int *alloc, int normalize);
3201
3202static const xmlChar *
3204 int len = 0, l;
3205 int c;
3206 int count = 0;
3207 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3210
3211#ifdef DEBUG
3212 nbParseNameComplex++;
3213#endif
3214
3215 /*
3216 * Handler for more complex cases
3217 */
3218 GROW;
3219 if (ctxt->instate == XML_PARSER_EOF)
3220 return(NULL);
3221 c = CUR_CHAR(l);
3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223 /*
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3226 */
3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 (!(((c >= 'a') && (c <= 'z')) ||
3229 ((c >= 'A') && (c <= 'Z')) ||
3230 (c == '_') || (c == ':') ||
3231 ((c >= 0xC0) && (c <= 0xD6)) ||
3232 ((c >= 0xD8) && (c <= 0xF6)) ||
3233 ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 ((c >= 0x370) && (c <= 0x37D)) ||
3235 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 ((c >= 0x200C) && (c <= 0x200D)) ||
3237 ((c >= 0x2070) && (c <= 0x218F)) ||
3238 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 return(NULL);
3244 }
3245 len += l;
3246 NEXTL(l);
3247 c = CUR_CHAR(l);
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 (((c >= 'a') && (c <= 'z')) ||
3250 ((c >= 'A') && (c <= 'Z')) ||
3251 ((c >= '0') && (c <= '9')) || /* !start */
3252 (c == '_') || (c == ':') ||
3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 ((c >= 0x370) && (c <= 0x37D)) ||
3259 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 ((c >= 0x200C) && (c <= 0x200D)) ||
3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 ((c >= 0x2070) && (c <= 0x218F)) ||
3263 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 ((c >= 0x10000) && (c <= 0xEFFFF))
3268 )) {
3269 if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 count = 0;
3271 GROW;
3272 if (ctxt->instate == XML_PARSER_EOF)
3273 return(NULL);
3274 }
3275 if (len <= INT_MAX - l)
3276 len += l;
3277 NEXTL(l);
3278 c = CUR_CHAR(l);
3279 }
3280 } else {
3281 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 (!IS_LETTER(c) && (c != '_') &&
3283 (c != ':'))) {
3284 return(NULL);
3285 }
3286 len += l;
3287 NEXTL(l);
3288 c = CUR_CHAR(l);
3289
3290 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 (c == '.') || (c == '-') ||
3293 (c == '_') || (c == ':') ||
3294 (IS_COMBINING(c)) ||
3295 (IS_EXTENDER(c)))) {
3296 if (count++ > XML_PARSER_CHUNK_SIZE) {
3297 count = 0;
3298 GROW;
3299 if (ctxt->instate == XML_PARSER_EOF)
3300 return(NULL);
3301 }
3302 if (len <= INT_MAX - l)
3303 len += l;
3304 NEXTL(l);
3305 c = CUR_CHAR(l);
3306 }
3307 }
3308 if (len > maxLength) {
3309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310 return(NULL);
3311 }
3312 if (ctxt->input->cur - ctxt->input->base < len) {
3313 /*
3314 * There were a couple of bugs where PERefs lead to to a change
3315 * of the buffer. Check the buffer size to avoid passing an invalid
3316 * pointer to xmlDictLookup.
3317 */
3319 "unexpected change of input buffer");
3320 return (NULL);
3321 }
3322 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3325}
3326
3343const xmlChar *
3345 const xmlChar *in;
3346 const xmlChar *ret;
3347 size_t count = 0;
3348 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3351
3352 GROW;
3353
3354#ifdef DEBUG
3355 nbParseName++;
3356#endif
3357
3358 /*
3359 * Accelerator for simple ASCII names
3360 */
3361 in = ctxt->input->cur;
3362 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 (*in == '_') || (*in == ':')) {
3365 in++;
3366 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367 ((*in >= 0x41) && (*in <= 0x5A)) ||
3368 ((*in >= 0x30) && (*in <= 0x39)) ||
3369 (*in == '_') || (*in == '-') ||
3370 (*in == ':') || (*in == '.'))
3371 in++;
3372 if ((*in > 0) && (*in < 0x80)) {
3373 count = in - ctxt->input->cur;
3374 if (count > maxLength) {
3375 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376 return(NULL);
3377 }
3378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379 ctxt->input->cur = in;
3380 ctxt->input->col += count;
3381 if (ret == NULL)
3382 xmlErrMemory(ctxt, NULL);
3383 return(ret);
3384 }
3385 }
3386 /* accelerator for special cases */
3387 return(xmlParseNameComplex(ctxt));
3388}
3389
3390static const xmlChar *
3392 int len = 0, l;
3393 int c;
3394 int count = 0;
3395 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398 size_t startPosition = 0;
3399
3400#ifdef DEBUG
3401 nbParseNCNameComplex++;
3402#endif
3403
3404 /*
3405 * Handler for more complex cases
3406 */
3407 GROW;
3408 startPosition = CUR_PTR - BASE_PTR;
3409 c = CUR_CHAR(l);
3410 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412 return(NULL);
3413 }
3414
3415 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417 if (count++ > XML_PARSER_CHUNK_SIZE) {
3418 count = 0;
3419 GROW;
3420 if (ctxt->instate == XML_PARSER_EOF)
3421 return(NULL);
3422 }
3423 if (len <= INT_MAX - l)
3424 len += l;
3425 NEXTL(l);
3426 c = CUR_CHAR(l);
3427 if (c == 0) {
3428 count = 0;
3429 /*
3430 * when shrinking to extend the buffer we really need to preserve
3431 * the part of the name we already parsed. Hence rolling back
3432 * by current length.
3433 */
3434 ctxt->input->cur -= l;
3435 GROW;
3436 if (ctxt->instate == XML_PARSER_EOF)
3437 return(NULL);
3438 ctxt->input->cur += l;
3439 c = CUR_CHAR(l);
3440 }
3441 }
3442 if (len > maxLength) {
3443 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444 return(NULL);
3445 }
3446 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3447}
3448
3464static const xmlChar *
3466 const xmlChar *in, *e;
3467 const xmlChar *ret;
3468 size_t count = 0;
3469 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3472
3473#ifdef DEBUG
3474 nbParseNCName++;
3475#endif
3476
3477 /*
3478 * Accelerator for simple ASCII names
3479 */
3480 in = ctxt->input->cur;
3481 e = ctxt->input->end;
3482 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 (*in == '_')) && (in < e)) {
3485 in++;
3486 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487 ((*in >= 0x41) && (*in <= 0x5A)) ||
3488 ((*in >= 0x30) && (*in <= 0x39)) ||
3489 (*in == '_') || (*in == '-') ||
3490 (*in == '.')) && (in < e))
3491 in++;
3492 if (in >= e)
3493 goto complex;
3494 if ((*in > 0) && (*in < 0x80)) {
3495 count = in - ctxt->input->cur;
3496 if (count > maxLength) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498 return(NULL);
3499 }
3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->input->col += count;
3503 if (ret == NULL) {
3504 xmlErrMemory(ctxt, NULL);
3505 }
3506 return(ret);
3507 }
3508 }
3509complex:
3510 return(xmlParseNCNameComplex(ctxt));
3511}
3512
3524static const xmlChar *
3526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
3528 const xmlChar *ret;
3529
3530 GROW;
3531 if (ctxt->instate == XML_PARSER_EOF)
3532 return(NULL);
3533
3534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
3536 ++in;
3537 ++cmp;
3538 }
3539 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540 /* success */
3541 ctxt->input->col += in - ctxt->input->cur;
3542 ctxt->input->cur = in;
3543 return (const xmlChar*) 1;
3544 }
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
3547 /* strings coming from the dictionary direct compare possible */
3548 if (ret == other) {
3549 return (const xmlChar*) 1;
3550 }
3551 return ret;
3552}
3553
3572static xmlChar *
3575 const xmlChar *cur = *str;
3576 int len = 0, l;
3577 int c;
3578 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3581
3582#ifdef DEBUG
3583 nbParseStringName++;
3584#endif
3585
3586 c = CUR_SCHAR(cur, l);
3587 if (!xmlIsNameStartChar(ctxt, c)) {
3588 return(NULL);
3589 }
3590
3591 COPY_BUF(l,buf,len,c);
3592 cur += l;
3593 c = CUR_SCHAR(cur, l);
3594 while (xmlIsNameChar(ctxt, c)) {
3595 COPY_BUF(l,buf,len,c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599 /*
3600 * Okay someone managed to make a huge name, so he's ready to pay
3601 * for the processing speed.
3602 */
3603 xmlChar *buffer;
3604 int max = len * 2;
3605
3606 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607 if (buffer == NULL) {
3608 xmlErrMemory(ctxt, NULL);
3609 return(NULL);
3610 }
3611 memcpy(buffer, buf, len);
3612 while (xmlIsNameChar(ctxt, c)) {
3613 if (len + 10 > max) {
3614 xmlChar *tmp;
3615
3616 max *= 2;
3617 tmp = (xmlChar *) xmlRealloc(buffer,
3618 max * sizeof(xmlChar));
3619 if (tmp == NULL) {
3620 xmlErrMemory(ctxt, NULL);
3621 xmlFree(buffer);
3622 return(NULL);
3623 }
3624 buffer = tmp;
3625 }
3627 cur += l;
3628 c = CUR_SCHAR(cur, l);
3629 if (len > maxLength) {
3630 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631 xmlFree(buffer);
3632 return(NULL);
3633 }
3634 }
3635 buffer[len] = 0;
3636 *str = cur;
3637 return(buffer);
3638 }
3639 }
3640 if (len > maxLength) {
3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642 return(NULL);
3643 }
3644 *str = cur;
3645 return(xmlStrndup(buf, len));
3646}
3647
3661xmlChar *
3664 int len = 0, l;
3665 int c;
3666 int count = 0;
3667 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3670
3671#ifdef DEBUG
3672 nbParseNmToken++;
3673#endif
3674
3675 GROW;
3676 if (ctxt->instate == XML_PARSER_EOF)
3677 return(NULL);
3678 c = CUR_CHAR(l);
3679
3680 while (xmlIsNameChar(ctxt, c)) {
3681 if (count++ > XML_PARSER_CHUNK_SIZE) {
3682 count = 0;
3683 GROW;
3684 }
3685 COPY_BUF(l,buf,len,c);
3686 NEXTL(l);
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 count = 0;
3690 GROW;
3691 if (ctxt->instate == XML_PARSER_EOF)
3692 return(NULL);
3693 c = CUR_CHAR(l);
3694 }
3695 if (len >= XML_MAX_NAMELEN) {
3696 /*
3697 * Okay someone managed to make a huge token, so he's ready to pay
3698 * for the processing speed.
3699 */
3700 xmlChar *buffer;
3701 int max = len * 2;
3702
3703 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704 if (buffer == NULL) {
3705 xmlErrMemory(ctxt, NULL);
3706 return(NULL);
3707 }
3708 memcpy(buffer, buf, len);
3709 while (xmlIsNameChar(ctxt, c)) {
3710 if (count++ > XML_PARSER_CHUNK_SIZE) {
3711 count = 0;
3712 GROW;
3713 if (ctxt->instate == XML_PARSER_EOF) {
3714 xmlFree(buffer);
3715 return(NULL);
3716 }
3717 }
3718 if (len + 10 > max) {
3719 xmlChar *tmp;
3720
3721 max *= 2;
3722 tmp = (xmlChar *) xmlRealloc(buffer,
3723 max * sizeof(xmlChar));
3724 if (tmp == NULL) {
3725 xmlErrMemory(ctxt, NULL);
3726 xmlFree(buffer);
3727 return(NULL);
3728 }
3729 buffer = tmp;
3730 }
3732 NEXTL(l);
3733 c = CUR_CHAR(l);
3734 if (len > maxLength) {
3735 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736 xmlFree(buffer);
3737 return(NULL);
3738 }
3739 }
3740 buffer[len] = 0;
3741 return(buffer);
3742 }
3743 }
3744 if (len == 0)
3745 return(NULL);
3746 if (len > maxLength) {
3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748 return(NULL);
3749 }
3750 return(xmlStrndup(buf, len));
3751}
3752
3766xmlChar *
3768 xmlChar *buf = NULL;
3769 int len = 0;
3771 int c, l;
3772 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3775 xmlChar stop;
3776 xmlChar *ret = NULL;
3777 const xmlChar *cur = NULL;
3779
3780 if (RAW == '"') stop = '"';
3781 else if (RAW == '\'') stop = '\'';
3782 else {
3784 return(NULL);
3785 }
3786 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787 if (buf == NULL) {
3788 xmlErrMemory(ctxt, NULL);
3789 return(NULL);
3790 }
3791
3792 /*
3793 * The content of the entity definition is copied in a buffer.
3794 */
3795
3797 input = ctxt->input;
3798 GROW;
3799 if (ctxt->instate == XML_PARSER_EOF)
3800 goto error;
3801 NEXT;
3802 c = CUR_CHAR(l);
3803 /*
3804 * NOTE: 4.4.5 Included in Literal
3805 * When a parameter entity reference appears in a literal entity
3806 * value, ... a single or double quote character in the replacement
3807 * text is always treated as a normal data character and will not
3808 * terminate the literal.
3809 * In practice it means we stop the loop only when back at parsing
3810 * the initial entity and the quote is found
3811 */
3812 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814 if (len + 5 >= size) {
3815 xmlChar *tmp;
3816
3817 size *= 2;
3818 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819 if (tmp == NULL) {
3820 xmlErrMemory(ctxt, NULL);
3821 goto error;
3822 }
3823 buf = tmp;
3824 }
3825 COPY_BUF(l,buf,len,c);
3826 NEXTL(l);
3827
3828 GROW;
3829 c = CUR_CHAR(l);
3830 if (c == 0) {
3831 GROW;
3832 c = CUR_CHAR(l);
3833 }
3834
3835 if (len > maxLength) {
3836 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837 "entity value too long\n");
3838 goto error;
3839 }
3840 }
3841 buf[len] = 0;
3842 if (ctxt->instate == XML_PARSER_EOF)
3843 goto error;
3844 if (c != stop) {
3846 goto error;
3847 }
3848 NEXT;
3849
3850 /*
3851 * Raise problem w.r.t. '&' and '%' being used in non-entities
3852 * reference constructs. Note Charref will be handled in
3853 * xmlStringDecodeEntities()
3854 */
3855 cur = buf;
3856 while (*cur != 0) { /* non input consuming */
3857 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858 xmlChar *name;
3859 xmlChar tmp = *cur;
3860 int nameOk = 0;
3861
3862 cur++;
3863 name = xmlParseStringName(ctxt, &cur);
3864 if (name != NULL) {
3865 nameOk = 1;
3866 xmlFree(name);
3867 }
3868 if ((nameOk == 0) || (*cur != ';')) {
3869 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870 "EntityValue: '%c' forbidden except for entities references\n",
3871 tmp);
3872 goto error;
3873 }
3874 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875 (ctxt->inputNr == 1)) {
3877 goto error;
3878 }
3879 if (*cur == 0)
3880 break;
3881 }
3882 cur++;
3883 }
3884
3885 /*
3886 * Then PEReference entities are substituted.
3887 *
3888 * NOTE: 4.4.7 Bypassed
3889 * When a general entity reference appears in the EntityValue in
3890 * an entity declaration, it is bypassed and left as is.
3891 * so XML_SUBSTITUTE_REF is not set here.
3892 */
3893 ++ctxt->depth;
3895 0, 0, 0);
3896 --ctxt->depth;
3897 if (orig != NULL) {
3898 *orig = buf;
3899 buf = NULL;
3900 }
3901
3902error:
3903 if (buf != NULL)
3904 xmlFree(buf);
3905 return(ret);
3906}
3907
3920static xmlChar *
3922 xmlChar limit = 0;
3923 xmlChar *buf = NULL;
3924 xmlChar *rep = NULL;
3925 size_t len = 0;
3926 size_t buf_size = 0;
3927 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3930 int c, l, in_space = 0;
3931 xmlChar *current = NULL;
3932 xmlEntityPtr ent;
3933
3934 if (NXT(0) == '"') {
3936 limit = '"';
3937 NEXT;
3938 } else if (NXT(0) == '\'') {
3939 limit = '\'';
3941 NEXT;
3942 } else {
3944 return(NULL);
3945 }
3946
3947 /*
3948 * allocate a translation buffer.
3949 */
3950 buf_size = XML_PARSER_BUFFER_SIZE;
3951 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952 if (buf == NULL) goto mem_error;
3953
3954 /*
3955 * OK loop until we reach one of the ending char or a size limit.
3956 */
3957 c = CUR_CHAR(l);
3958 while (((NXT(0) != limit) && /* checked */
3959 (IS_CHAR(c)) && (c != '<')) &&
3960 (ctxt->instate != XML_PARSER_EOF)) {
3961 if (c == '&') {
3962 in_space = 0;
3963 if (NXT(1) == '#') {
3964 int val = xmlParseCharRef(ctxt);
3965
3966 if (val == '&') {
3967 if (ctxt->replaceEntities) {
3968 if (len + 10 > buf_size) {
3969 growBuffer(buf, 10);
3970 }
3971 buf[len++] = '&';
3972 } else {
3973 /*
3974 * The reparsing will be done in xmlStringGetNodeList()
3975 * called by the attribute() function in SAX.c
3976 */
3977 if (len + 10 > buf_size) {
3978 growBuffer(buf, 10);
3979 }
3980 buf[len++] = '&';
3981 buf[len++] = '#';
3982 buf[len++] = '3';
3983 buf[len++] = '8';
3984 buf[len++] = ';';
3985 }
3986 } else if (val != 0) {
3987 if (len + 10 > buf_size) {
3988 growBuffer(buf, 10);
3989 }
3990 len += xmlCopyChar(0, &buf[len], val);
3991 }
3992 } else {
3993 ent = xmlParseEntityRef(ctxt);
3994 ctxt->nbentities++;
3995 if (ent != NULL)
3996 ctxt->nbentities += ent->owner;
3997 if ((ent != NULL) &&
3999 if (len + 10 > buf_size) {
4000 growBuffer(buf, 10);
4001 }
4002 if ((ctxt->replaceEntities == 0) &&
4003 (ent->content[0] == '&')) {
4004 buf[len++] = '&';
4005 buf[len++] = '#';
4006 buf[len++] = '3';
4007 buf[len++] = '8';
4008 buf[len++] = ';';
4009 } else {
4010 buf[len++] = ent->content[0];
4011 }
4012 } else if ((ent != NULL) &&
4013 (ctxt->replaceEntities != 0)) {
4015 ++ctxt->depth;
4016 rep = xmlStringDecodeEntities(ctxt, ent->content,
4018 0, 0, 0);
4019 --ctxt->depth;
4020 if (rep != NULL) {
4021 current = rep;
4022 while (*current != 0) { /* non input consuming */
4023 if ((*current == 0xD) || (*current == 0xA) ||
4024 (*current == 0x9)) {
4025 buf[len++] = 0x20;
4026 current++;
4027 } else
4028 buf[len++] = *current++;
4029 if (len + 10 > buf_size) {
4030 growBuffer(buf, 10);
4031 }
4032 }
4033 xmlFree(rep);
4034 rep = NULL;
4035 }
4036 } else {
4037 if (len + 10 > buf_size) {
4038 growBuffer(buf, 10);
4039 }
4040 if (ent->content != NULL)
4041 buf[len++] = ent->content[0];
4042 }
4043 } else if (ent != NULL) {
4044 int i = xmlStrlen(ent->name);
4045 const xmlChar *cur = ent->name;
4046
4047 /*
4048 * This may look absurd but is needed to detect
4049 * entities problems
4050 */
4051 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052 (ent->content != NULL) && (ent->checked == 0)) {
4053 unsigned long oldnbent = ctxt->nbentities, diff;
4054
4055 ++ctxt->depth;
4056 rep = xmlStringDecodeEntities(ctxt, ent->content,
4057 XML_SUBSTITUTE_REF, 0, 0, 0);
4058 --ctxt->depth;
4059
4060 diff = ctxt->nbentities - oldnbent + 1;
4061 if (diff > INT_MAX / 2)
4062 diff = INT_MAX / 2;
4063 ent->checked = diff * 2;
4064 if (rep != NULL) {
4065 if (xmlStrchr(rep, '<'))
4066 ent->checked |= 1;
4067 xmlFree(rep);
4068 rep = NULL;
4069 } else {
4070 ent->content[0] = 0;
4071 }
4072 }
4073
4074 /*
4075 * Just output the reference
4076 */
4077 buf[len++] = '&';
4078 while (len + i + 10 > buf_size) {
4079 growBuffer(buf, i + 10);
4080 }
4081 for (;i > 0;i--)
4082 buf[len++] = *cur++;
4083 buf[len++] = ';';
4084 }
4085 }
4086 } else {
4087 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088 if ((len != 0) || (!normalize)) {
4089 if ((!normalize) || (!in_space)) {
4090 COPY_BUF(l,buf,len,0x20);
4091 while (len + 10 > buf_size) {
4092 growBuffer(buf, 10);
4093 }
4094 }
4095 in_space = 1;
4096 }
4097 } else {
4098 in_space = 0;
4099 COPY_BUF(l,buf,len,c);
4100 if (len + 10 > buf_size) {
4101 growBuffer(buf, 10);
4102 }
4103 }
4104 NEXTL(l);
4105 }
4106 GROW;
4107 c = CUR_CHAR(l);
4108 if (len > maxLength) {
4109 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110 "AttValue length too long\n");
4111 goto mem_error;
4112 }
4113 }
4114 if (ctxt->instate == XML_PARSER_EOF)
4115 goto error;
4116
4117 if ((in_space) && (normalize)) {
4118 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4119 }
4120 buf[len] = 0;
4121 if (RAW == '<') {
4123 } else if (RAW != limit) {
4124 if ((c != 0) && (!IS_CHAR(c))) {
4125 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126 "invalid character in attribute value\n");
4127 } else {
4128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129 "AttValue: ' expected\n");
4130 }
4131 } else
4132 NEXT;
4133
4134 if (attlen != NULL) *attlen = (int) len;
4135 return(buf);
4136
4137mem_error:
4138 xmlErrMemory(ctxt, NULL);
4139error:
4140 if (buf != NULL)
4141 xmlFree(buf);
4142 if (rep != NULL)
4143 xmlFree(rep);
4144 return(NULL);
4145}
4146
4181xmlChar *
4183 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4185}
4186
4198xmlChar *
4200 xmlChar *buf = NULL;
4201 int len = 0;
4203 int cur, l;
4204 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4207 xmlChar stop;
4208 int state = ctxt->instate;
4209 int count = 0;
4210
4211 SHRINK;
4212 if (RAW == '"') {
4213 NEXT;
4214 stop = '"';
4215 } else if (RAW == '\'') {
4216 NEXT;
4217 stop = '\'';
4218 } else {
4220 return(NULL);
4221 }
4222
4223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224 if (buf == NULL) {
4225 xmlErrMemory(ctxt, NULL);
4226 return(NULL);
4227 }
4229 cur = CUR_CHAR(l);
4230 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231 if (len + 5 >= size) {
4232 xmlChar *tmp;
4233
4234 size *= 2;
4235 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236 if (tmp == NULL) {
4237 xmlFree(buf);
4238 xmlErrMemory(ctxt, NULL);
4240 return(NULL);
4241 }
4242 buf = tmp;
4243 }
4244 count++;
4245 if (count > 50) {
4246 SHRINK;
4247 GROW;
4248 count = 0;
4249 if (ctxt->instate == XML_PARSER_EOF) {
4250 xmlFree(buf);
4251 return(NULL);
4252 }
4253 }
4254 COPY_BUF(l,buf,len,cur);
4255 NEXTL(l);
4256 cur = CUR_CHAR(l);
4257 if (cur == 0) {
4258 GROW;
4259 SHRINK;
4260 cur = CUR_CHAR(l);
4261 }
4262 if (len > maxLength) {
4263 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264 xmlFree(buf);
4266 return(NULL);
4267 }
4268 }
4269 buf[len] = 0;
4271 if (!IS_CHAR(cur)) {
4273 } else {
4274 NEXT;
4275 }
4276 return(buf);
4277}
4278
4290xmlChar *
4292 xmlChar *buf = NULL;
4293 int len = 0;
4295 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4298 xmlChar cur;
4299 xmlChar stop;
4300 int count = 0;
4301 xmlParserInputState oldstate = ctxt->instate;
4302
4303 SHRINK;
4304 if (RAW == '"') {
4305 NEXT;
4306 stop = '"';
4307 } else if (RAW == '\'') {
4308 NEXT;
4309 stop = '\'';
4310 } else {
4312 return(NULL);
4313 }
4314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315 if (buf == NULL) {
4316 xmlErrMemory(ctxt, NULL);
4317 return(NULL);
4318 }
4320 cur = CUR;
4321 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322 if (len + 1 >= size) {
4323 xmlChar *tmp;
4324
4325 size *= 2;
4326 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327 if (tmp == NULL) {
4328 xmlErrMemory(ctxt, NULL);
4329 xmlFree(buf);
4330 return(NULL);
4331 }
4332 buf = tmp;
4333 }
4334 buf[len++] = cur;
4335 count++;
4336 if (count > 50) {
4337 SHRINK;
4338 GROW;
4339 count = 0;
4340 if (ctxt->instate == XML_PARSER_EOF) {
4341 xmlFree(buf);
4342 return(NULL);
4343 }
4344 }
4345 NEXT;
4346 cur = CUR;
4347 if (cur == 0) {
4348 GROW;
4349 SHRINK;
4350 cur = CUR;
4351 }
4352 if (len > maxLength) {
4353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354 xmlFree(buf);
4355 return(NULL);
4356 }
4357 }
4358 buf[len] = 0;
4359 if (cur != stop) {
4361 } else {
4362 NEXT;
4363 }
4364 ctxt->instate = oldstate;
4365 return(buf);
4366}
4367
4368static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4369
4370/*
4371 * used for the test in the inner loop of the char data testing
4372 */
4373static const unsigned char test_char_data[256] = {
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4406};
4407
4424void
4426 const xmlChar *in;
4427 int nbchar = 0;
4428 int line = ctxt->input->line;
4429 int col = ctxt->input->col;
4430 int ccol;
4431
4432 SHRINK;
4433 GROW;
4434 /*
4435 * Accelerated common case where input don't need to be
4436 * modified before passing it to the handler.
4437 */
4438 if (!cdata) {
4439 in = ctxt->input->cur;
4440 do {
4441get_more_space:
4442 while (*in == 0x20) { in++; ctxt->input->col++; }
4443 if (*in == 0xA) {
4444 do {
4445 ctxt->input->line++; ctxt->input->col = 1;
4446 in++;
4447 } while (*in == 0xA);
4448 goto get_more_space;
4449 }
4450 if (*in == '<') {
4451 nbchar = in - ctxt->input->cur;
4452 if (nbchar > 0) {
4453 const xmlChar *tmp = ctxt->input->cur;
4454 ctxt->input->cur = in;
4455
4456 if ((ctxt->sax != NULL) &&
4457 (ctxt->sax->ignorableWhitespace !=
4458 ctxt->sax->characters)) {
4459 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460 if (ctxt->sax->ignorableWhitespace != NULL)
4461 ctxt->sax->ignorableWhitespace(ctxt->userData,
4462 tmp, nbchar);
4463 } else {
4464 if (ctxt->sax->characters != NULL)
4465 ctxt->sax->characters(ctxt->userData,
4466 tmp, nbchar);
4467 if (*ctxt->space == -1)
4468 *ctxt->space = -2;
4469 }
4470 } else if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->characters != NULL)) {
4472 ctxt->sax->characters(ctxt->userData,
4473 tmp, nbchar);
4474 }
4475 }
4476 return;
4477 }
4478
4479get_more:
4480 ccol = ctxt->input->col;
4481 while (test_char_data[*in]) {
4482 in++;
4483 ccol++;
4484 }
4485 ctxt->input->col = ccol;
4486 if (*in == 0xA) {
4487 do {
4488 ctxt->input->line++; ctxt->input->col = 1;
4489 in++;
4490 } while (*in == 0xA);
4491 goto get_more;
4492 }
4493 if (*in == ']') {
4494 if ((in[1] == ']') && (in[2] == '>')) {
4496 ctxt->input->cur = in + 1;
4497 return;
4498 }
4499 in++;
4500 ctxt->input->col++;
4501 goto get_more;
4502 }
4503 nbchar = in - ctxt->input->cur;
4504 if (nbchar > 0) {
4505 if ((ctxt->sax != NULL) &&
4506 (ctxt->sax->ignorableWhitespace !=
4507 ctxt->sax->characters) &&
4508 (IS_BLANK_CH(*ctxt->input->cur))) {
4509 const xmlChar *tmp = ctxt->input->cur;
4510 ctxt->input->cur = in;
4511
4512 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513 if (ctxt->sax->ignorableWhitespace != NULL)
4514 ctxt->sax->ignorableWhitespace(ctxt->userData,
4515 tmp, nbchar);
4516 } else {
4517 if (ctxt->sax->characters != NULL)
4518 ctxt->sax->characters(ctxt->userData,
4519 tmp, nbchar);
4520 if (*ctxt->space == -1)
4521 *ctxt->space = -2;
4522 }
4523 line = ctxt->input->line;
4524 col = ctxt->input->col;
4525 } else if (ctxt->sax != NULL) {
4526 if (ctxt->sax->characters != NULL)
4527 ctxt->sax->characters(ctxt->userData,
4528 ctxt->input->cur, nbchar);
4529 line = ctxt->input->line;
4530 col = ctxt->input->col;
4531 }
4532 /* something really bad happened in the SAX callback */
4533 if (ctxt->instate != XML_PARSER_CONTENT)
4534 return;
4535 }
4536 ctxt->input->cur = in;
4537 if (*in == 0xD) {
4538 in++;
4539 if (*in == 0xA) {
4540 ctxt->input->cur = in;
4541 in++;
4542 ctxt->input->line++; ctxt->input->col = 1;
4543 continue; /* while */
4544 }
4545 in--;
4546 }
4547 if (*in == '<') {
4548 return;
4549 }
4550 if (*in == '&') {
4551 return;
4552 }
4553 SHRINK;
4554 GROW;
4555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
4557 in = ctxt->input->cur;
4558 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559 nbchar = 0;
4560 }
4561 ctxt->input->line = line;
4562 ctxt->input->col = col;
4563 xmlParseCharDataComplex(ctxt, cdata);
4564}
4565
4575static void
4578 int nbchar = 0;
4579 int cur, l;
4580 int count = 0;
4581
4582 SHRINK;
4583 GROW;
4584 cur = CUR_CHAR(l);
4585 while ((cur != '<') && /* checked */
4586 (cur != '&') &&
4587 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588 if ((cur == ']') && (NXT(1) == ']') &&
4589 (NXT(2) == '>')) {
4590 if (cdata) break;
4591 else {
4593 }
4594 }
4595 COPY_BUF(l,buf,nbchar,cur);
4596 /* move current position before possible calling of ctxt->sax->characters */
4597 NEXTL(l);
4598 cur = CUR_CHAR(l);
4599 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600 buf[nbchar] = 0;
4601
4602 /*
4603 * OK the segment is to be consumed as chars.
4604 */
4605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606 if (areBlanks(ctxt, buf, nbchar, 0)) {
4607 if (ctxt->sax->ignorableWhitespace != NULL)
4608 ctxt->sax->ignorableWhitespace(ctxt->userData,
4609 buf, nbchar);
4610 } else {
4611 if (ctxt->sax->characters != NULL)
4612 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613 if ((ctxt->sax->characters !=
4614 ctxt->sax->ignorableWhitespace) &&
4615 (*ctxt->space == -1))
4616 *ctxt->space = -2;
4617 }
4618 }
4619 nbchar = 0;
4620 /* something really bad happened in the SAX callback */
4621 if (ctxt->instate != XML_PARSER_CONTENT)
4622 return;
4623 }
4624 count++;
4625 if (count > 50) {
4626 SHRINK;
4627 GROW;
4628 count = 0;
4629 if (ctxt->instate == XML_PARSER_EOF)
4630 return;
4631 }
4632 }
4633 if (nbchar != 0) {
4634 buf[nbchar] = 0;
4635 /*
4636 * OK the segment is to be consumed as chars.
4637 */
4638 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639 if (areBlanks(ctxt, buf, nbchar, 0)) {
4640 if (ctxt->sax->ignorableWhitespace != NULL)
4641 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642 } else {
4643 if (ctxt->sax->characters != NULL)
4644 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646 (*ctxt->space == -1))
4647 *ctxt->space = -2;
4648 }
4649 }
4650 }
4651 if ((cur != 0) && (!IS_CHAR(cur))) {
4652 /* Generate the error and skip the offending character */
4653 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654 "PCDATA invalid Char value %d\n",
4655 cur);
4656 NEXTL(l);
4657 }
4658}
4659
4682xmlChar *
4684 xmlChar *URI = NULL;
4685
4686 SHRINK;
4687
4688 *publicID = NULL;
4689 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690 SKIP(6);
4691 if (SKIP_BLANKS == 0) {
4692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693 "Space required after 'SYSTEM'\n");
4694 }
4695 URI = xmlParseSystemLiteral(ctxt);
4696 if (URI == NULL) {
4698 }
4699 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700 SKIP(6);
4701 if