ReactOS  0.4.13-dev-259-g5ca9c9c
parser.c
Go to the documentation of this file.
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  * implemented on top of the SAX interfaces
4  *
5  * References:
6  * The XML specification:
7  * http://www.w3.org/TR/REC-xml
8  * Original 1.0 version:
9  * http://www.w3.org/TR/1998/REC-xml-19980210
10  * XML second edition working draft
11  * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
92 
93 static xmlParserCtxtPtr
95  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  * *
101  * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102  * *
103  ************************************************************************/
104 
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107 
108 /*
109  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110  * replacement over the size in byte of the input indicates that you have
111  * and eponential behaviour. A value of 10 correspond to at least 3 entity
112  * replacement per byte of input.
113  */
114 #define XML_PARSER_NON_LINEAR 10
115 
116 /*
117  * xmlParserEntityCheck
118  *
119  * Function to check non-linear entity expansion behaviour
120  * This is here to detect and stop exponential linear entity expansion
121  * This is not a limitation of the parser but a safety
122  * boundary feature. It can be disabled with the XML_PARSE_HUGE
123  * parser option.
124  */
125 static int
127  xmlEntityPtr ent, size_t replacement)
128 {
129  size_t consumed = 0;
130 
131  if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132  return (0);
133  if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134  return (1);
135 
136  /*
137  * This may look absurd but is needed to detect
138  * entities problems
139  */
140  if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141  (ent->content != NULL) && (ent->checked == 0) &&
142  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143  unsigned long oldnbent = ctxt->nbentities;
144  xmlChar *rep;
145 
146  ent->checked = 1;
147 
148  ++ctxt->depth;
149  rep = xmlStringDecodeEntities(ctxt, ent->content,
150  XML_SUBSTITUTE_REF, 0, 0, 0);
151  --ctxt->depth;
152  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153  ent->content[0] = 0;
154  }
155 
156  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157  if (rep != NULL) {
158  if (xmlStrchr(rep, '<'))
159  ent->checked |= 1;
160  xmlFree(rep);
161  rep = NULL;
162  }
163  }
164  if (replacement != 0) {
165  if (replacement < XML_MAX_TEXT_LENGTH)
166  return(0);
167 
168  /*
169  * If the volume of entity copy reaches 10 times the
170  * amount of parsed data and over the large text threshold
171  * then that's very likely to be an abuse.
172  */
173  if (ctxt->input != NULL) {
174  consumed = ctxt->input->consumed +
175  (ctxt->input->cur - ctxt->input->base);
176  }
177  consumed += ctxt->sizeentities;
178 
179  if (replacement < XML_PARSER_NON_LINEAR * consumed)
180  return(0);
181  } else if (size != 0) {
182  /*
183  * Do the check based on the replacement size of the entity
184  */
186  return(0);
187 
188  /*
189  * A limit on the amount of text data reasonably used
190  */
191  if (ctxt->input != NULL) {
192  consumed = ctxt->input->consumed +
193  (ctxt->input->cur - ctxt->input->base);
194  }
195  consumed += ctxt->sizeentities;
196 
197  if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198  (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199  return (0);
200  } else if (ent != NULL) {
201  /*
202  * use the number of parsed entities in the replacement
203  */
204  size = ent->checked / 2;
205 
206  /*
207  * The amount of data parsed counting entities size only once
208  */
209  if (ctxt->input != NULL) {
210  consumed = ctxt->input->consumed +
211  (ctxt->input->cur - ctxt->input->base);
212  }
213  consumed += ctxt->sizeentities;
214 
215  /*
216  * Check the density of entities for the amount of data
217  * knowing an entity reference will take at least 3 bytes
218  */
219  if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220  return (0);
221  } else {
222  /*
223  * strange we got no data for checking
224  */
225  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
227  (ctxt->nbentities <= 10000))
228  return (0);
229  }
231  return (1);
232 }
233 
242 unsigned int xmlParserMaxDepth = 256;
243 
244 
245 
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250 
260 #define XML_PARSER_CHUNK_SIZE 100
261 
262 /*
263  * List of XML prefixed PI allowed by W3C specs
264  */
265 
266 static const char *xmlW3CPIs[] = {
267  "xml-stylesheet",
268  "xml-model",
269  NULL
270 };
271 
272 
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
275  const xmlChar **str);
276 
277 static xmlParserErrors
280  void *user_data, int depth, const xmlChar *URL,
281  const xmlChar *ID, xmlNodePtr *list);
282 
283 static int
285  const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289  xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291 
292 static xmlParserErrors
294  const xmlChar *string, void *user_data, xmlNodePtr *lst);
295 
296 static int
298 
299 /************************************************************************
300  * *
301  * Some factorized error routines *
302  * *
303  ************************************************************************/
304 
313 static void
315  const xmlChar * localname)
316 {
317  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318  (ctxt->instate == XML_PARSER_EOF))
319  return;
320  if (ctxt != NULL)
322 
323  if (prefix == NULL)
324  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
326  (const char *) localname, NULL, NULL, 0, 0,
327  "Attribute %s redefined\n", localname);
328  else
329  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
331  (const char *) prefix, (const char *) localname,
332  NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333  localname);
334  if (ctxt != NULL) {
335  ctxt->wellFormed = 0;
336  if (ctxt->recovery == 0)
337  ctxt->disableSAX = 1;
338  }
339 }
340 
349 static void
351 {
352  const char *errmsg;
353 
354  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355  (ctxt->instate == XML_PARSER_EOF))
356  return;
357  switch (error) {
359  errmsg = "CharRef: invalid hexadecimal value";
360  break;
362  errmsg = "CharRef: invalid decimal value";
363  break;
365  errmsg = "CharRef: invalid value";
366  break;
368  errmsg = "internal error";
369  break;
371  errmsg = "PEReference at end of document";
372  break;
374  errmsg = "PEReference in prolog";
375  break;
377  errmsg = "PEReference in epilog";
378  break;
380  errmsg = "PEReference: no name";
381  break;
383  errmsg = "PEReference: expecting ';'";
384  break;
385  case XML_ERR_ENTITY_LOOP:
386  errmsg = "Detected an entity reference loop";
387  break;
389  errmsg = "EntityValue: \" or ' expected";
390  break;
392  errmsg = "PEReferences forbidden in internal subset";
393  break;
395  errmsg = "EntityValue: \" or ' expected";
396  break;
398  errmsg = "AttValue: \" or ' expected";
399  break;
401  errmsg = "Unescaped '<' not allowed in attributes values";
402  break;
404  errmsg = "SystemLiteral \" or ' expected";
405  break;
407  errmsg = "Unfinished System or Public ID \" or ' expected";
408  break;
410  errmsg = "Sequence ']]>' not allowed in content";
411  break;
413  errmsg = "SYSTEM or PUBLIC, the URI is missing";
414  break;
416  errmsg = "PUBLIC, the Public Identifier is missing";
417  break;
419  errmsg = "Comment must not contain '--' (double-hyphen)";
420  break;
422  errmsg = "xmlParsePI : no target name";
423  break;
425  errmsg = "Invalid PI name";
426  break;
428  errmsg = "NOTATION: Name expected here";
429  break;
431  errmsg = "'>' required to close NOTATION declaration";
432  break;
434  errmsg = "Entity value required";
435  break;
437  errmsg = "Fragment not allowed";
438  break;
440  errmsg = "'(' required to start ATTLIST enumeration";
441  break;
443  errmsg = "NmToken expected in ATTLIST enumeration";
444  break;
446  errmsg = "')' required to finish ATTLIST enumeration";
447  break;
449  errmsg = "MixedContentDecl : '|' or ')*' expected";
450  break;
452  errmsg = "MixedContentDecl : '#PCDATA' expected";
453  break;
455  errmsg = "ContentDecl : Name or '(' expected";
456  break;
458  errmsg = "ContentDecl : ',' '|' or ')' expected";
459  break;
461  errmsg =
462  "PEReference: forbidden within markup decl in internal subset";
463  break;
464  case XML_ERR_GT_REQUIRED:
465  errmsg = "expected '>'";
466  break;
468  errmsg = "XML conditional section '[' expected";
469  break;
471  errmsg = "Content error in the external subset";
472  break;
474  errmsg =
475  "conditional section INCLUDE or IGNORE keyword expected";
476  break;
478  errmsg = "XML conditional section not closed";
479  break;
481  errmsg = "Text declaration '<?xml' required";
482  break;
484  errmsg = "parsing XML declaration: '?>' expected";
485  break;
487  errmsg = "external parsed entities cannot be standalone";
488  break;
490  errmsg = "EntityRef: expecting ';'";
491  break;
493  errmsg = "DOCTYPE improperly terminated";
494  break;
496  errmsg = "EndTag: '</' not found";
497  break;
499  errmsg = "expected '='";
500  break;
502  errmsg = "String not closed expecting \" or '";
503  break;
505  errmsg = "String not started expecting ' or \"";
506  break;
508  errmsg = "Invalid XML encoding name";
509  break;
511  errmsg = "standalone accepts only 'yes' or 'no'";
512  break;
514  errmsg = "Document is empty";
515  break;
517  errmsg = "Extra content at the end of the document";
518  break;
520  errmsg = "chunk is not well balanced";
521  break;
523  errmsg = "extra content at the end of well balanced chunk";
524  break;
526  errmsg = "Malformed declaration expecting version";
527  break;
529  errmsg = "Name too long use XML_PARSE_HUGE option";
530  break;
531 #if 0
532  case:
533  errmsg = "";
534  break;
535 #endif
536  default:
537  errmsg = "Unregistered error message";
538  }
539  if (ctxt != NULL)
540  ctxt->errNo = error;
541  if (info == NULL) {
542  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544  errmsg);
545  } else {
546  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548  errmsg, info);
549  }
550  if (ctxt != NULL) {
551  ctxt->wellFormed = 0;
552  if (ctxt->recovery == 0)
553  ctxt->disableSAX = 1;
554  }
555 }
556 
565 static void LIBXML_ATTR_FORMAT(3,0)
566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567  const char *msg)
568 {
569  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570  (ctxt->instate == XML_PARSER_EOF))
571  return;
572  if (ctxt != NULL)
573  ctxt->errNo = error;
574  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575  XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576  if (ctxt != NULL) {
577  ctxt->wellFormed = 0;
578  if (ctxt->recovery == 0)
579  ctxt->disableSAX = 1;
580  }
581 }
582 
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595  const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597  xmlStructuredErrorFunc schannel = NULL;
598 
599  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600  (ctxt->instate == XML_PARSER_EOF))
601  return;
602  if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603  (ctxt->sax->initialized == XML_SAX2_MAGIC))
604  schannel = ctxt->sax->serror;
605  if (ctxt != NULL) {
606  __xmlRaiseError(schannel,
607  (ctxt->sax) ? ctxt->sax->warning : NULL,
608  ctxt->userData,
609  ctxt, NULL, XML_FROM_PARSER, error,
610  XML_ERR_WARNING, NULL, 0,
611  (const char *) str1, (const char *) str2, NULL, 0, 0,
612  msg, (const char *) str1, (const char *) str2);
613  } else {
614  __xmlRaiseError(schannel, NULL, NULL,
615  ctxt, NULL, XML_FROM_PARSER, error,
616  XML_ERR_WARNING, NULL, 0,
617  (const char *) str1, (const char *) str2, NULL, 0, 0,
618  msg, (const char *) str1, (const char *) str2);
619  }
620 }
621 
631 static void LIBXML_ATTR_FORMAT(3,0)
632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633  const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635  xmlStructuredErrorFunc schannel = NULL;
636 
637  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638  (ctxt->instate == XML_PARSER_EOF))
639  return;
640  if (ctxt != NULL) {
641  ctxt->errNo = error;
642  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643  schannel = ctxt->sax->serror;
644  }
645  if (ctxt != NULL) {
646  __xmlRaiseError(schannel,
647  ctxt->vctxt.error, ctxt->vctxt.userData,
648  ctxt, NULL, XML_FROM_DTD, error,
649  XML_ERR_ERROR, NULL, 0, (const char *) str1,
650  (const char *) str2, NULL, 0, 0,
651  msg, (const char *) str1, (const char *) str2);
652  ctxt->valid = 0;
653  } else {
654  __xmlRaiseError(schannel, NULL, NULL,
655  ctxt, NULL, XML_FROM_DTD, error,
656  XML_ERR_ERROR, NULL, 0, (const char *) str1,
657  (const char *) str2, NULL, 0, 0,
658  msg, (const char *) str1, (const char *) str2);
659  }
660 }
661 
671 static void LIBXML_ATTR_FORMAT(3,0)
672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673  const char *msg, int val)
674 {
675  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676  (ctxt->instate == XML_PARSER_EOF))
677  return;
678  if (ctxt != NULL)
679  ctxt->errNo = error;
680  __xmlRaiseError(NULL, NULL, NULL,
682  NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683  if (ctxt != NULL) {
684  ctxt->wellFormed = 0;
685  if (ctxt->recovery == 0)
686  ctxt->disableSAX = 1;
687  }
688 }
689 
701 static void LIBXML_ATTR_FORMAT(3,0)
702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703  const char *msg, const xmlChar *str1, int val,
704  const xmlChar *str2)
705 {
706  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707  (ctxt->instate == XML_PARSER_EOF))
708  return;
709  if (ctxt != NULL)
710  ctxt->errNo = error;
711  __xmlRaiseError(NULL, NULL, NULL,
713  NULL, 0, (const char *) str1, (const char *) str2,
714  NULL, val, 0, msg, str1, val, str2);
715  if (ctxt != NULL) {
716  ctxt->wellFormed = 0;
717  if (ctxt->recovery == 0)
718  ctxt->disableSAX = 1;
719  }
720 }
721 
731 static void LIBXML_ATTR_FORMAT(3,0)
732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733  const char *msg, const xmlChar * val)
734 {
735  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736  (ctxt->instate == XML_PARSER_EOF))
737  return;
738  if (ctxt != NULL)
739  ctxt->errNo = error;
740  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
742  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743  val);
744  if (ctxt != NULL) {
745  ctxt->wellFormed = 0;
746  if (ctxt->recovery == 0)
747  ctxt->disableSAX = 1;
748  }
749 }
750 
760 static void LIBXML_ATTR_FORMAT(3,0)
761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762  const char *msg, const xmlChar * val)
763 {
764  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765  (ctxt->instate == XML_PARSER_EOF))
766  return;
767  if (ctxt != NULL)
768  ctxt->errNo = error;
769  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
771  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772  val);
773 }
774 
785 static void LIBXML_ATTR_FORMAT(3,0)
786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787  const char *msg,
788  const xmlChar * info1, const xmlChar * info2,
789  const xmlChar * info3)
790 {
791  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792  (ctxt->instate == XML_PARSER_EOF))
793  return;
794  if (ctxt != NULL)
795  ctxt->errNo = error;
796  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797  XML_ERR_ERROR, NULL, 0, (const char *) info1,
798  (const char *) info2, (const char *) info3, 0, 0, msg,
799  info1, info2, info3);
800  if (ctxt != NULL)
801  ctxt->nsWellFormed = 0;
802 }
803 
814 static void LIBXML_ATTR_FORMAT(3,0)
815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816  const char *msg,
817  const xmlChar * info1, const xmlChar * info2,
818  const xmlChar * info3)
819 {
820  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821  (ctxt->instate == XML_PARSER_EOF))
822  return;
823  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824  XML_ERR_WARNING, NULL, 0, (const char *) info1,
825  (const char *) info2, (const char *) info3, 0, 0, msg,
826  info1, info2, info3);
827 }
828 
829 /************************************************************************
830  * *
831  * Library wide options *
832  * *
833  ************************************************************************/
834 
845 int
847 {
848  switch (feature) {
849  case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851  return(1);
852 #else
853  return(0);
854 #endif
855  case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857  return(1);
858 #else
859  return(0);
860 #endif
861  case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863  return(1);
864 #else
865  return(0);
866 #endif
867  case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869  return(1);
870 #else
871  return(0);
872 #endif
873  case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875  return(1);
876 #else
877  return(0);
878 #endif
879  case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881  return(1);
882 #else
883  return(0);
884 #endif
885  case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887  return(1);
888 #else
889  return(0);
890 #endif
891  case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893  return(1);
894 #else
895  return(0);
896 #endif
897  case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899  return(1);
900 #else
901  return(0);
902 #endif
903  case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905  return(1);
906 #else
907  return(0);
908 #endif
909  case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911  return(1);
912 #else
913  return(0);
914 #endif
915  case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917  return(1);
918 #else
919  return(0);
920 #endif
921  case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923  return(1);
924 #else
925  return(0);
926 #endif
927  case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929  return(1);
930 #else
931  return(0);
932 #endif
933  case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935  return(1);
936 #else
937  return(0);
938 #endif
939  case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941  return(1);
942 #else
943  return(0);
944 #endif
945  case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947  return(1);
948 #else
949  return(0);
950 #endif
951  case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953  return(1);
954 #else
955  return(0);
956 #endif
957  case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959  return(1);
960 #else
961  return(0);
962 #endif
963  case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965  return(1);
966 #else
967  return(0);
968 #endif
969  case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971  return(1);
972 #else
973  return(0);
974 #endif
975  case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977  return(1);
978 #else
979  return(0);
980 #endif
981  case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983  return(1);
984 #else
985  return(0);
986 #endif
987  case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989  return(1);
990 #else
991  return(0);
992 #endif
993  case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995  return(1);
996 #else
997  return(0);
998 #endif
999  case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001  return(1);
1002 #else
1003  return(0);
1004 #endif
1005  case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007  return(1);
1008 #else
1009  return(0);
1010 #endif
1011  case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013  return(1);
1014 #else
1015  return(0);
1016 #endif
1017  case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019  return(1);
1020 #else
1021  return(0);
1022 #endif
1023  case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025  return(1);
1026 #else
1027  return(0);
1028 #endif
1029  case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031  return(1);
1032 #else
1033  return(0);
1034 #endif
1035  case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037  return(1);
1038 #else
1039  return(0);
1040 #endif
1041  case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043  return(1);
1044 #else
1045  return(0);
1046 #endif
1047  default:
1048  break;
1049  }
1050  return(0);
1051 }
1052 
1053 /************************************************************************
1054  * *
1055  * SAX2 defaulted attributes handling *
1056  * *
1057  ************************************************************************/
1058 
1065 static void
1067  if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069  if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070  ((ctxt->sax->startElementNs != NULL) ||
1071  (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073  ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075 
1076  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079  if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080  (ctxt->str_xml_ns == NULL)) {
1081  xmlErrMemory(ctxt, NULL);
1082  }
1083 }
1084 
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1088  int nbAttrs; /* number of defaulted attributes on that element */
1089  int maxAttrs; /* the size of the array */
1090 #if __STDC_VERSION__ >= 199901L
1091  /* Using a C99 flexible array member avoids UBSan errors. */
1092  const xmlChar *values[]; /* array of localname/prefix/values/external */
1093 #else
1094  const xmlChar *values[5];
1095 #endif
1096 };
1097 
1115 static xmlChar *
1117 {
1118  if ((src == NULL) || (dst == NULL))
1119  return(NULL);
1120 
1121  while (*src == 0x20) src++;
1122  while (*src != 0) {
1123  if (*src == 0x20) {
1124  while (*src == 0x20) src++;
1125  if (*src != 0)
1126  *dst++ = 0x20;
1127  } else {
1128  *dst++ = *src++;
1129  }
1130  }
1131  *dst = 0;
1132  if (dst == src)
1133  return(NULL);
1134  return(dst);
1135 }
1136 
1148 static const xmlChar *
1150 {
1151  int i;
1152  int remove_head = 0;
1153  int need_realloc = 0;
1154  const xmlChar *cur;
1155 
1156  if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157  return(NULL);
1158  i = *len;
1159  if (i <= 0)
1160  return(NULL);
1161 
1162  cur = src;
1163  while (*cur == 0x20) {
1164  cur++;
1165  remove_head++;
1166  }
1167  while (*cur != 0) {
1168  if (*cur == 0x20) {
1169  cur++;
1170  if ((*cur == 0x20) || (*cur == 0)) {
1171  need_realloc = 1;
1172  break;
1173  }
1174  } else
1175  cur++;
1176  }
1177  if (need_realloc) {
1178  xmlChar *ret;
1179 
1180  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181  if (ret == NULL) {
1182  xmlErrMemory(ctxt, NULL);
1183  return(NULL);
1184  }
1186  *len = (int) strlen((const char *)ret);
1187  return(ret);
1188  } else if (remove_head) {
1189  *len -= remove_head;
1190  memmove(src, src + remove_head, 1 + *len);
1191  return(src);
1192  }
1193  return(NULL);
1194 }
1195 
1205 static void
1207  const xmlChar *fullname,
1208  const xmlChar *fullattr,
1209  const xmlChar *value) {
1211  int len;
1212  const xmlChar *name;
1213  const xmlChar *prefix;
1214 
1215  /*
1216  * Allows to detect attribute redefinitions
1217  */
1218  if (ctxt->attsSpecial != NULL) {
1219  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220  return;
1221  }
1222 
1223  if (ctxt->attsDefault == NULL) {
1224  ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225  if (ctxt->attsDefault == NULL)
1226  goto mem_error;
1227  }
1228 
1229  /*
1230  * split the element name into prefix:localname , the string found
1231  * are within the DTD and then not associated to namespace names.
1232  */
1233  name = xmlSplitQName3(fullname, &len);
1234  if (name == NULL) {
1235  name = xmlDictLookup(ctxt->dict, fullname, -1);
1236  prefix = NULL;
1237  } else {
1238  name = xmlDictLookup(ctxt->dict, name, -1);
1239  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240  }
1241 
1242  /*
1243  * make sure there is some storage
1244  */
1245  defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246  if (defaults == NULL) {
1248  (4 * 5) * sizeof(const xmlChar *));
1249  if (defaults == NULL)
1250  goto mem_error;
1251  defaults->nbAttrs = 0;
1252  defaults->maxAttrs = 4;
1253  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254  defaults, NULL) < 0) {
1255  xmlFree(defaults);
1256  goto mem_error;
1257  }
1258  } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1260 
1262  (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263  if (temp == NULL)
1264  goto mem_error;
1265  defaults = temp;
1266  defaults->maxAttrs *= 2;
1267  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268  defaults, NULL) < 0) {
1269  xmlFree(defaults);
1270  goto mem_error;
1271  }
1272  }
1273 
1274  /*
1275  * Split the element name into prefix:localname , the string found
1276  * are within the DTD and hen not associated to namespace names.
1277  */
1278  name = xmlSplitQName3(fullattr, &len);
1279  if (name == NULL) {
1280  name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281  prefix = NULL;
1282  } else {
1283  name = xmlDictLookup(ctxt->dict, name, -1);
1284  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285  }
1286 
1287  defaults->values[5 * defaults->nbAttrs] = name;
1288  defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289  /* intern the string and precompute the end */
1290  len = xmlStrlen(value);
1291  value = xmlDictLookup(ctxt->dict, value, len);
1292  defaults->values[5 * defaults->nbAttrs + 2] = value;
1293  defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294  if (ctxt->external)
1295  defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296  else
1297  defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298  defaults->nbAttrs++;
1299 
1300  return;
1301 
1302 mem_error:
1303  xmlErrMemory(ctxt, NULL);
1304  return;
1305 }
1306 
1316 static void
1318  const xmlChar *fullname,
1319  const xmlChar *fullattr,
1320  int type)
1321 {
1322  if (ctxt->attsSpecial == NULL) {
1323  ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324  if (ctxt->attsSpecial == NULL)
1325  goto mem_error;
1326  }
1327 
1328  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329  return;
1330 
1331  xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332  (void *) (ptrdiff_t) type);
1333  return;
1334 
1335 mem_error:
1336  xmlErrMemory(ctxt, NULL);
1337  return;
1338 }
1339 
1345 static void
1346 xmlCleanSpecialAttrCallback(void *payload, void *data,
1347  const xmlChar *fullname, const xmlChar *fullattr,
1348  const xmlChar *unused ATTRIBUTE_UNUSED) {
1350 
1351  if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352  xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353  }
1354 }
1355 
1364 static void
1366 {
1367  if (ctxt->attsSpecial == NULL)
1368  return;
1369 
1371 
1372  if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373  xmlHashFree(ctxt->attsSpecial, NULL);
1374  ctxt->attsSpecial = NULL;
1375  }
1376  return;
1377 }
1378 
1437 int
1439 {
1440  const xmlChar *cur = lang, *nxt;
1441 
1442  if (cur == NULL)
1443  return (0);
1444  if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445  ((cur[0] == 'I') && (cur[1] == '-')) ||
1446  ((cur[0] == 'x') && (cur[1] == '-')) ||
1447  ((cur[0] == 'X') && (cur[1] == '-'))) {
1448  /*
1449  * Still allow IANA code and user code which were coming
1450  * from the previous version of the XML-1.0 specification
1451  * it's deprecated but we should not fail
1452  */
1453  cur += 2;
1454  while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455  ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456  cur++;
1457  return(cur[0] == 0);
1458  }
1459  nxt = cur;
1460  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462  nxt++;
1463  if (nxt - cur >= 4) {
1464  /*
1465  * Reserved
1466  */
1467  if ((nxt - cur > 8) || (nxt[0] != 0))
1468  return(0);
1469  return(1);
1470  }
1471  if (nxt - cur < 2)
1472  return(0);
1473  /* we got an ISO 639 code */
1474  if (nxt[0] == 0)
1475  return(1);
1476  if (nxt[0] != '-')
1477  return(0);
1478 
1479  nxt++;
1480  cur = nxt;
1481  /* now we can have extlang or script or region or variant */
1482  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483  goto region_m49;
1484 
1485  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487  nxt++;
1488  if (nxt - cur == 4)
1489  goto script;
1490  if (nxt - cur == 2)
1491  goto region;
1492  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493  goto variant;
1494  if (nxt - cur != 3)
1495  return(0);
1496  /* we parsed an extlang */
1497  if (nxt[0] == 0)
1498  return(1);
1499  if (nxt[0] != '-')
1500  return(0);
1501 
1502  nxt++;
1503  cur = nxt;
1504  /* now we can have script or region or variant */
1505  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506  goto region_m49;
1507 
1508  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510  nxt++;
1511  if (nxt - cur == 2)
1512  goto region;
1513  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514  goto variant;
1515  if (nxt - cur != 4)
1516  return(0);
1517  /* we parsed a script */
1518 script:
1519  if (nxt[0] == 0)
1520  return(1);
1521  if (nxt[0] != '-')
1522  return(0);
1523 
1524  nxt++;
1525  cur = nxt;
1526  /* now we can have region or variant */
1527  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528  goto region_m49;
1529 
1530  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532  nxt++;
1533 
1534  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535  goto variant;
1536  if (nxt - cur != 2)
1537  return(0);
1538  /* we parsed a region */
1539 region:
1540  if (nxt[0] == 0)
1541  return(1);
1542  if (nxt[0] != '-')
1543  return(0);
1544 
1545  nxt++;
1546  cur = nxt;
1547  /* now we can just have a variant */
1548  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550  nxt++;
1551 
1552  if ((nxt - cur < 5) || (nxt - cur > 8))
1553  return(0);
1554 
1555  /* we parsed a variant */
1556 variant:
1557  if (nxt[0] == 0)
1558  return(1);
1559  if (nxt[0] != '-')
1560  return(0);
1561  /* extensions and private use subtags not checked */
1562  return (1);
1563 
1564 region_m49:
1565  if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566  ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567  nxt += 3;
1568  goto region;
1569  }
1570  return(0);
1571 }
1572 
1573 /************************************************************************
1574  * *
1575  * Parser stacks related functions and macros *
1576  * *
1577  ************************************************************************/
1578 
1580  const xmlChar ** str);
1581 
1582 #ifdef SAX2
1583 
1594 static int
1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596 {
1597  if (ctxt->options & XML_PARSE_NSCLEAN) {
1598  int i;
1599  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600  if (ctxt->nsTab[i] == prefix) {
1601  /* in scope */
1602  if (ctxt->nsTab[i + 1] == URL)
1603  return(-2);
1604  /* out of scope keep it */
1605  break;
1606  }
1607  }
1608  }
1609  if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610  ctxt->nsMax = 10;
1611  ctxt->nsNr = 0;
1612  ctxt->nsTab = (const xmlChar **)
1613  xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614  if (ctxt->nsTab == NULL) {
1615  xmlErrMemory(ctxt, NULL);
1616  ctxt->nsMax = 0;
1617  return (-1);
1618  }
1619  } else if (ctxt->nsNr >= ctxt->nsMax) {
1620  const xmlChar ** tmp;
1621  ctxt->nsMax *= 2;
1622  tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623  ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624  if (tmp == NULL) {
1625  xmlErrMemory(ctxt, NULL);
1626  ctxt->nsMax /= 2;
1627  return (-1);
1628  }
1629  ctxt->nsTab = tmp;
1630  }
1631  ctxt->nsTab[ctxt->nsNr++] = prefix;
1632  ctxt->nsTab[ctxt->nsNr++] = URL;
1633  return (ctxt->nsNr);
1634 }
1644 static int
1646 {
1647  int i;
1648 
1649  if (ctxt->nsTab == NULL) return(0);
1650  if (ctxt->nsNr < nr) {
1651  xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652  nr = ctxt->nsNr;
1653  }
1654  if (ctxt->nsNr <= 0)
1655  return (0);
1656 
1657  for (i = 0;i < nr;i++) {
1658  ctxt->nsNr--;
1659  ctxt->nsTab[ctxt->nsNr] = NULL;
1660  }
1661  return(nr);
1662 }
1663 #endif
1664 
1665 static int
1667  const xmlChar **atts;
1668  int *attallocs;
1669  int maxatts;
1670 
1671  if (ctxt->atts == NULL) {
1672  maxatts = 55; /* allow for 10 attrs by default */
1673  atts = (const xmlChar **)
1674  xmlMalloc(maxatts * sizeof(xmlChar *));
1675  if (atts == NULL) goto mem_error;
1676  ctxt->atts = atts;
1677  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678  if (attallocs == NULL) goto mem_error;
1679  ctxt->attallocs = attallocs;
1680  ctxt->maxatts = maxatts;
1681  } else if (nr + 5 > ctxt->maxatts) {
1682  maxatts = (nr + 5) * 2;
1683  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684  maxatts * sizeof(const xmlChar *));
1685  if (atts == NULL) goto mem_error;
1686  ctxt->atts = atts;
1687  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688  (maxatts / 5) * sizeof(int));
1689  if (attallocs == NULL) goto mem_error;
1690  ctxt->attallocs = attallocs;
1691  ctxt->maxatts = maxatts;
1692  }
1693  return(ctxt->maxatts);
1694 mem_error:
1695  xmlErrMemory(ctxt, NULL);
1696  return(-1);
1697 }
1698 
1708 int
1710 {
1711  if ((ctxt == NULL) || (value == NULL))
1712  return(-1);
1713  if (ctxt->inputNr >= ctxt->inputMax) {
1714  ctxt->inputMax *= 2;
1715  ctxt->inputTab =
1717  ctxt->inputMax *
1718  sizeof(ctxt->inputTab[0]));
1719  if (ctxt->inputTab == NULL) {
1720  xmlErrMemory(ctxt, NULL);
1722  ctxt->inputMax /= 2;
1723  value = NULL;
1724  return (-1);
1725  }
1726  }
1727  ctxt->inputTab[ctxt->inputNr] = value;
1728  ctxt->input = value;
1729  return (ctxt->inputNr++);
1730 }
1741 {
1743 
1744  if (ctxt == NULL)
1745  return(NULL);
1746  if (ctxt->inputNr <= 0)
1747  return (NULL);
1748  ctxt->inputNr--;
1749  if (ctxt->inputNr > 0)
1750  ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751  else
1752  ctxt->input = NULL;
1753  ret = ctxt->inputTab[ctxt->inputNr];
1754  ctxt->inputTab[ctxt->inputNr] = NULL;
1755  return (ret);
1756 }
1766 int
1768 {
1769  if (ctxt == NULL) return(0);
1770  if (ctxt->nodeNr >= ctxt->nodeMax) {
1771  xmlNodePtr *tmp;
1772 
1773  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774  ctxt->nodeMax * 2 *
1775  sizeof(ctxt->nodeTab[0]));
1776  if (tmp == NULL) {
1777  xmlErrMemory(ctxt, NULL);
1778  return (-1);
1779  }
1780  ctxt->nodeTab = tmp;
1781  ctxt->nodeMax *= 2;
1782  }
1783  if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1788  xmlHaltParser(ctxt);
1789  return(-1);
1790  }
1791  ctxt->nodeTab[ctxt->nodeNr] = value;
1792  ctxt->node = value;
1793  return (ctxt->nodeNr++);
1794 }
1795 
1804 xmlNodePtr
1806 {
1807  xmlNodePtr ret;
1808 
1809  if (ctxt == NULL) return(NULL);
1810  if (ctxt->nodeNr <= 0)
1811  return (NULL);
1812  ctxt->nodeNr--;
1813  if (ctxt->nodeNr > 0)
1814  ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815  else
1816  ctxt->node = NULL;
1817  ret = ctxt->nodeTab[ctxt->nodeNr];
1818  ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819  return (ret);
1820 }
1821 
1822 #ifdef LIBXML_PUSH_ENABLED
1823 
1834 static int
1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836  const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837 {
1838  if (ctxt->nameNr >= ctxt->nameMax) {
1839  const xmlChar * *tmp;
1840  void **tmp2;
1841  ctxt->nameMax *= 2;
1842  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843  ctxt->nameMax *
1844  sizeof(ctxt->nameTab[0]));
1845  if (tmp == NULL) {
1846  ctxt->nameMax /= 2;
1847  goto mem_error;
1848  }
1849  ctxt->nameTab = tmp;
1850  tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851  ctxt->nameMax * 3 *
1852  sizeof(ctxt->pushTab[0]));
1853  if (tmp2 == NULL) {
1854  ctxt->nameMax /= 2;
1855  goto mem_error;
1856  }
1857  ctxt->pushTab = tmp2;
1858  }
1859  ctxt->nameTab[ctxt->nameNr] = value;
1860  ctxt->name = value;
1861  ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862  ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863  ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864  return (ctxt->nameNr++);
1865 mem_error:
1866  xmlErrMemory(ctxt, NULL);
1867  return (-1);
1868 }
1877 static const xmlChar *
1878 nameNsPop(xmlParserCtxtPtr ctxt)
1879 {
1880  const xmlChar *ret;
1881 
1882  if (ctxt->nameNr <= 0)
1883  return (NULL);
1884  ctxt->nameNr--;
1885  if (ctxt->nameNr > 0)
1886  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887  else
1888  ctxt->name = NULL;
1889  ret = ctxt->nameTab[ctxt->nameNr];
1890  ctxt->nameTab[ctxt->nameNr] = NULL;
1891  return (ret);
1892 }
1893 #endif /* LIBXML_PUSH_ENABLED */
1894 
1904 int
1906 {
1907  if (ctxt == NULL) return (-1);
1908 
1909  if (ctxt->nameNr >= ctxt->nameMax) {
1910  const xmlChar * *tmp;
1911  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912  ctxt->nameMax * 2 *
1913  sizeof(ctxt->nameTab[0]));
1914  if (tmp == NULL) {
1915  goto mem_error;
1916  }
1917  ctxt->nameTab = tmp;
1918  ctxt->nameMax *= 2;
1919  }
1920  ctxt->nameTab[ctxt->nameNr] = value;
1921  ctxt->name = value;
1922  return (ctxt->nameNr++);
1923 mem_error:
1924  xmlErrMemory(ctxt, NULL);
1925  return (-1);
1926 }
1935 const xmlChar *
1937 {
1938  const xmlChar *ret;
1939 
1940  if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941  return (NULL);
1942  ctxt->nameNr--;
1943  if (ctxt->nameNr > 0)
1944  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945  else
1946  ctxt->name = NULL;
1947  ret = ctxt->nameTab[ctxt->nameNr];
1948  ctxt->nameTab[ctxt->nameNr] = NULL;
1949  return (ret);
1950 }
1951 
1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953  if (ctxt->spaceNr >= ctxt->spaceMax) {
1954  int *tmp;
1955 
1956  ctxt->spaceMax *= 2;
1957  tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959  if (tmp == NULL) {
1960  xmlErrMemory(ctxt, NULL);
1961  ctxt->spaceMax /=2;
1962  return(-1);
1963  }
1964  ctxt->spaceTab = tmp;
1965  }
1966  ctxt->spaceTab[ctxt->spaceNr] = val;
1967  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968  return(ctxt->spaceNr++);
1969 }
1970 
1971 static int spacePop(xmlParserCtxtPtr ctxt) {
1972  int ret;
1973  if (ctxt->spaceNr <= 0) return(0);
1974  ctxt->spaceNr--;
1975  if (ctxt->spaceNr > 0)
1976  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977  else
1978  ctxt->space = &ctxt->spaceTab[0];
1979  ret = ctxt->spaceTab[ctxt->spaceNr];
1980  ctxt->spaceTab[ctxt->spaceNr] = -1;
1981  return(ret);
1982 }
1983 
1984 /*
1985  * Macros for accessing the content. Those should be used only by the parser,
1986  * and not exported.
1987  *
1988  * Dirty macros, i.e. one often need to make assumption on the context to
1989  * use them
1990  *
1991  * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992  * To be used with extreme caution since operations consuming
1993  * characters may move the input buffer to a different location !
1994  * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995  * This should be used internally by the parser
1996  * only to compare to ASCII values otherwise it would break when
1997  * running with UTF-8 encoding.
1998  * RAW same as CUR but in the input buffer, bypass any token
1999  * extraction that may have been done
2000  * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001  * to compare on ASCII based substring.
2002  * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003  * strings without newlines within the parser.
2004  * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005  * defined char within the parser.
2006  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007  *
2008  * NEXT Skip to the next character, this does the proper decoding
2009  * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010  * NEXTL(l) Skip the current unicode character of l xmlChars long.
2011  * CUR_CHAR(l) returns the current unicode character (int), set l
2012  * to the number of xmlChars used for the encoding [0-5].
2013  * CUR_SCHAR same but operate on a string instead of the context
2014  * COPY_BUF copy the current unicode char to the target buffer, increment
2015  * the index
2016  * GROW, SHRINK handling of input buffers
2017  */
2018 
2019 #define RAW (*ctxt->input->cur)
2020 #define CUR (*ctxt->input->cur)
2021 #define NXT(val) ctxt->input->cur[(val)]
2022 #define CUR_PTR ctxt->input->cur
2023 #define BASE_PTR ctxt->input->base
2024 
2025 #define CMP4( s, c1, c2, c3, c4 ) \
2026  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027  ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2029  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038  ((unsigned char *) s)[ 8 ] == c9 )
2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041  ((unsigned char *) s)[ 9 ] == c10 )
2042 
2043 #define SKIP(val) do { \
2044  ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2045  if (*ctxt->input->cur == 0) \
2046  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2047  } while (0)
2048 
2049 #define SKIPL(val) do { \
2050  int skipl; \
2051  for(skipl=0; skipl<val; skipl++) { \
2052  if (*(ctxt->input->cur) == '\n') { \
2053  ctxt->input->line++; ctxt->input->col = 1; \
2054  } else ctxt->input->col++; \
2055  ctxt->nbChars++; \
2056  ctxt->input->cur++; \
2057  } \
2058  if (*ctxt->input->cur == 0) \
2059  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2060  } while (0)
2061 
2062 #define SHRINK if ((ctxt->progressive == 0) && \
2063  (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065  xmlSHRINK (ctxt);
2066 
2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068  xmlParserInputShrink(ctxt->input);
2069  if (*ctxt->input->cur == 0)
2071 }
2072 
2073 #define GROW if ((ctxt->progressive == 0) && \
2074  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075  xmlGROW (ctxt);
2076 
2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078  unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079  unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080 
2081  if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082  (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083  ((ctxt->input->buf) &&
2085  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087  xmlHaltParser(ctxt);
2088  return;
2089  }
2091  if ((ctxt->input->cur > ctxt->input->end) ||
2092  (ctxt->input->cur < ctxt->input->base)) {
2093  xmlHaltParser(ctxt);
2094  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095  return;
2096  }
2097  if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2099 }
2100 
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102 
2103 #define NEXT xmlNextChar(ctxt)
2104 
2105 #define NEXT1 { \
2106  ctxt->input->col++; \
2107  ctxt->input->cur++; \
2108  ctxt->nbChars++; \
2109  if (*ctxt->input->cur == 0) \
2110  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111  }
2112 
2113 #define NEXTL(l) do { \
2114  if (*(ctxt->input->cur) == '\n') { \
2115  ctxt->input->line++; ctxt->input->col = 1; \
2116  } else ctxt->input->col++; \
2117  ctxt->input->cur += l; \
2118  } while (0)
2119 
2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122 
2123 #define COPY_BUF(l,b,i,v) \
2124  if (l == 1) b[i++] = (xmlChar) v; \
2125  else i += xmlCopyCharMultiByte(&b[i],v)
2126 
2137 int
2139  int res = 0;
2140 
2141  /*
2142  * It's Okay to use CUR/NEXT here since all the blanks are on
2143  * the ASCII range.
2144  */
2145  if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146  const xmlChar *cur;
2147  /*
2148  * if we are in the document content, go really fast
2149  */
2150  cur = ctxt->input->cur;
2151  while (IS_BLANK_CH(*cur)) {
2152  if (*cur == '\n') {
2153  ctxt->input->line++; ctxt->input->col = 1;
2154  } else {
2155  ctxt->input->col++;
2156  }
2157  cur++;
2158  res++;
2159  if (*cur == 0) {
2160  ctxt->input->cur = cur;
2162  cur = ctxt->input->cur;
2163  }
2164  }
2165  ctxt->input->cur = cur;
2166  } else {
2167  int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168 
2169  while (1) {
2170  if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171  NEXT;
2172  } else if (CUR == '%') {
2173  /*
2174  * Need to handle support of entities branching here
2175  */
2176  if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177  break;
2178  xmlParsePEReference(ctxt);
2179  } else if (CUR == 0) {
2180  if (ctxt->inputNr <= 1)
2181  break;
2182  xmlPopInput(ctxt);
2183  } else {
2184  break;
2185  }
2186 
2187  /*
2188  * Also increase the counter when entering or exiting a PERef.
2189  * The spec says: "When a parameter-entity reference is recognized
2190  * in the DTD and included, its replacement text MUST be enlarged
2191  * by the attachment of one leading and one following space (#x20)
2192  * character."
2193  */
2194  res++;
2195  }
2196  }
2197  return(res);
2198 }
2199 
2200 /************************************************************************
2201  * *
2202  * Commodity functions to handle entities *
2203  * *
2204  ************************************************************************/
2205 
2215 xmlChar
2217  if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2220  "Popping input %d\n", ctxt->inputNr);
2221  if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222  (ctxt->instate != XML_PARSER_EOF))
2224  "Unfinished entity outside the DTD");
2226  if (*ctxt->input->cur == 0)
2228  return(CUR);
2229 }
2230 
2240 int
2242  int ret;
2243  if (input == NULL) return(-1);
2244 
2245  if (xmlParserDebugEntities) {
2246  if ((ctxt->input != NULL) && (ctxt->input->filename))
2248  "%s(%d): ", ctxt->input->filename,
2249  ctxt->input->line);
2251  "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252  }
2253  if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254  (ctxt->inputNr > 1024)) {
2256  while (ctxt->inputNr > 1)
2258  return(-1);
2259  }
2260  ret = inputPush(ctxt, input);
2261  if (ctxt->instate == XML_PARSER_EOF)
2262  return(-1);
2263  GROW;
2264  return(ret);
2265 }
2266 
2282 int
2284  unsigned int val = 0;
2285  int count = 0;
2286  unsigned int outofrange = 0;
2287 
2288  /*
2289  * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290  */
2291  if ((RAW == '&') && (NXT(1) == '#') &&
2292  (NXT(2) == 'x')) {
2293  SKIP(3);
2294  GROW;
2295  while (RAW != ';') { /* loop blocked by count */
2296  if (count++ > 20) {
2297  count = 0;
2298  GROW;
2299  if (ctxt->instate == XML_PARSER_EOF)
2300  return(0);
2301  }
2302  if ((RAW >= '0') && (RAW <= '9'))
2303  val = val * 16 + (CUR - '0');
2304  else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305  val = val * 16 + (CUR - 'a') + 10;
2306  else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307  val = val * 16 + (CUR - 'A') + 10;
2308  else {
2310  val = 0;
2311  break;
2312  }
2313  if (val > 0x10FFFF)
2314  outofrange = val;
2315 
2316  NEXT;
2317  count++;
2318  }
2319  if (RAW == ';') {
2320  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321  ctxt->input->col++;
2322  ctxt->nbChars ++;
2323  ctxt->input->cur++;
2324  }
2325  } else if ((RAW == '&') && (NXT(1) == '#')) {
2326  SKIP(2);
2327  GROW;
2328  while (RAW != ';') { /* loop blocked by count */
2329  if (count++ > 20) {
2330  count = 0;
2331  GROW;
2332  if (ctxt->instate == XML_PARSER_EOF)
2333  return(0);
2334  }
2335  if ((RAW >= '0') && (RAW <= '9'))
2336  val = val * 10 + (CUR - '0');
2337  else {
2339  val = 0;
2340  break;
2341  }
2342  if (val > 0x10FFFF)
2343  outofrange = val;
2344 
2345  NEXT;
2346  count++;
2347  }
2348  if (RAW == ';') {
2349  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350  ctxt->input->col++;
2351  ctxt->nbChars ++;
2352  ctxt->input->cur++;
2353  }
2354  } else {
2356  }
2357 
2358  /*
2359  * [ WFC: Legal Character ]
2360  * Characters referred to using character references must match the
2361  * production for Char.
2362  */
2363  if ((IS_CHAR(val) && (outofrange == 0))) {
2364  return(val);
2365  } else {
2366  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367  "xmlParseCharRef: invalid xmlChar value %d\n",
2368  val);
2369  }
2370  return(0);
2371 }
2372 
2391 static int
2393  const xmlChar *ptr;
2394  xmlChar cur;
2395  unsigned int val = 0;
2396  unsigned int outofrange = 0;
2397 
2398  if ((str == NULL) || (*str == NULL)) return(0);
2399  ptr = *str;
2400  cur = *ptr;
2401  if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402  ptr += 3;
2403  cur = *ptr;
2404  while (cur != ';') { /* Non input consuming loop */
2405  if ((cur >= '0') && (cur <= '9'))
2406  val = val * 16 + (cur - '0');
2407  else if ((cur >= 'a') && (cur <= 'f'))
2408  val = val * 16 + (cur - 'a') + 10;
2409  else if ((cur >= 'A') && (cur <= 'F'))
2410  val = val * 16 + (cur - 'A') + 10;
2411  else {
2413  val = 0;
2414  break;
2415  }
2416  if (val > 0x10FFFF)
2417  outofrange = val;
2418 
2419  ptr++;
2420  cur = *ptr;
2421  }
2422  if (cur == ';')
2423  ptr++;
2424  } else if ((cur == '&') && (ptr[1] == '#')){
2425  ptr += 2;
2426  cur = *ptr;
2427  while (cur != ';') { /* Non input consuming loops */
2428  if ((cur >= '0') && (cur <= '9'))
2429  val = val * 10 + (cur - '0');
2430  else {
2432  val = 0;
2433  break;
2434  }
2435  if (val > 0x10FFFF)
2436  outofrange = val;
2437 
2438  ptr++;
2439  cur = *ptr;
2440  }
2441  if (cur == ';')
2442  ptr++;
2443  } else {
2445  return(0);
2446  }
2447  *str = ptr;
2448 
2449  /*
2450  * [ WFC: Legal Character ]
2451  * Characters referred to using character references must match the
2452  * production for Char.
2453  */
2454  if ((IS_CHAR(val) && (outofrange == 0))) {
2455  return(val);
2456  } else {
2457  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459  val);
2460  }
2461  return(0);
2462 }
2463 
2496 void
2498  switch(ctxt->instate) {
2500  return;
2501  case XML_PARSER_COMMENT:
2502  return;
2503  case XML_PARSER_START_TAG:
2504  return;
2505  case XML_PARSER_END_TAG:
2506  return;
2507  case XML_PARSER_EOF:
2509  return;
2510  case XML_PARSER_PROLOG:
2511  case XML_PARSER_START:
2512  case XML_PARSER_MISC:
2514  return;
2516  case XML_PARSER_CONTENT:
2518  case XML_PARSER_PI:
2521  /* we just ignore it there */
2522  return;
2523  case XML_PARSER_EPILOG:
2525  return;
2527  /*
2528  * NOTE: in the case of entity values, we don't do the
2529  * substitution here since we need the literal
2530  * entity value to be able to save the internal
2531  * subset of the document.
2532  * This will be handled by xmlStringDecodeEntities
2533  */
2534  return;
2535  case XML_PARSER_DTD:
2536  /*
2537  * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538  * In the internal DTD subset, parameter-entity references
2539  * can occur only where markup declarations can occur, not
2540  * within markup declarations.
2541  * In that case this is handled in xmlParseMarkupDecl
2542  */
2543  if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544  return;
2545  if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546  return;
2547  break;
2548  case XML_PARSER_IGNORE:
2549  return;
2550  }
2551 
2552  xmlParsePEReference(ctxt);
2553 }
2554 
2555 /*
2556  * Macro used to grow the current buffer.
2557  * buffer##_size is expected to be a size_t
2558  * mem_error: is expected to handle memory allocation failures
2559  */
2560 #define growBuffer(buffer, n) { \
2561  xmlChar *tmp; \
2562  size_t new_size = buffer##_size * 2 + n; \
2563  if (new_size < buffer##_size) goto mem_error; \
2564  tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2565  if (tmp == NULL) goto mem_error; \
2566  buffer = tmp; \
2567  buffer##_size = new_size; \
2568 }
2569 
2589 xmlChar *
2591  int what, xmlChar end, xmlChar end2, xmlChar end3) {
2592  xmlChar *buffer = NULL;
2593  size_t buffer_size = 0;
2594  size_t nbchars = 0;
2595 
2596  xmlChar *current = NULL;
2597  xmlChar *rep = NULL;
2598  const xmlChar *last;
2599  xmlEntityPtr ent;
2600  int c,l;
2601 
2602  if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603  return(NULL);
2604  last = str + len;
2605 
2606  if (((ctxt->depth > 40) &&
2607  ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608  (ctxt->depth > 1024)) {
2610  return(NULL);
2611  }
2612 
2613  /*
2614  * allocate a translation buffer.
2615  */
2618  if (buffer == NULL) goto mem_error;
2619 
2620  /*
2621  * OK loop until we reach one of the ending char or a size limit.
2622  * we are operating on already parsed values.
2623  */
2624  if (str < last)
2625  c = CUR_SCHAR(str, l);
2626  else
2627  c = 0;
2628  while ((c != 0) && (c != end) && /* non input consuming loop */
2629  (c != end2) && (c != end3)) {
2630 
2631  if (c == 0) break;
2632  if ((c == '&') && (str[1] == '#')) {
2633  int val = xmlParseStringCharRef(ctxt, &str);
2634  if (val == 0)
2635  goto int_error;
2636  COPY_BUF(0,buffer,nbchars,val);
2637  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2639  }
2640  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2643  "String decoding Entity Reference: %.30s\n",
2644  str);
2645  ent = xmlParseStringEntityRef(ctxt, &str);
2646  xmlParserEntityCheck(ctxt, 0, ent, 0);
2647  if (ent != NULL)
2648  ctxt->nbentities += ent->checked / 2;
2649  if ((ent != NULL) &&
2651  if (ent->content != NULL) {
2652  COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2655  }
2656  } else {
2657  xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658  "predefined entity has no content\n");
2659  goto int_error;
2660  }
2661  } else if ((ent != NULL) && (ent->content != NULL)) {
2662  ctxt->depth++;
2663  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664  0, 0, 0);
2665  ctxt->depth--;
2666  if (rep == NULL)
2667  goto int_error;
2668 
2669  current = rep;
2670  while (*current != 0) { /* non input consuming loop */
2671  buffer[nbchars++] = *current++;
2672  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674  goto int_error;
2676  }
2677  }
2678  xmlFree(rep);
2679  rep = NULL;
2680  } else if (ent != NULL) {
2681  int i = xmlStrlen(ent->name);
2682  const xmlChar *cur = ent->name;
2683 
2684  buffer[nbchars++] = '&';
2685  if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2687  }
2688  for (;i > 0;i--)
2689  buffer[nbchars++] = *cur++;
2690  buffer[nbchars++] = ';';
2691  }
2692  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2695  "String decoding PE Reference: %.30s\n", str);
2696  ent = xmlParseStringPEReference(ctxt, &str);
2697  xmlParserEntityCheck(ctxt, 0, ent, 0);
2698  if (ent != NULL)
2699  ctxt->nbentities += ent->checked / 2;
2700  if (ent != NULL) {
2701  if (ent->content == NULL) {
2702  /*
2703  * Note: external parsed entities will not be loaded,
2704  * it is not required for a non-validating parser to
2705  * complete external PEreferences coming from the
2706  * internal subset
2707  */
2708  if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709  ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710  (ctxt->validate != 0)) {
2711  xmlLoadEntityContent(ctxt, ent);
2712  } else {
2713  xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714  "not validating will not read content for PE entity %s\n",
2715  ent->name, NULL);
2716  }
2717  }
2718  ctxt->depth++;
2719  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720  0, 0, 0);
2721  ctxt->depth--;
2722  if (rep == NULL)
2723  goto int_error;
2724  current = rep;
2725  while (*current != 0) { /* non input consuming loop */
2726  buffer[nbchars++] = *current++;
2727  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729  goto int_error;
2731  }
2732  }
2733  xmlFree(rep);
2734  rep = NULL;
2735  }
2736  } else {
2737  COPY_BUF(l,buffer,nbchars,c);
2738  str += l;
2739  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2741  }
2742  }
2743  if (str < last)
2744  c = CUR_SCHAR(str, l);
2745  else
2746  c = 0;
2747  }
2748  buffer[nbchars] = 0;
2749  return(buffer);
2750 
2751 mem_error:
2752  xmlErrMemory(ctxt, NULL);
2753 int_error:
2754  if (rep != NULL)
2755  xmlFree(rep);
2756  if (buffer != NULL)
2757  xmlFree(buffer);
2758  return(NULL);
2759 }
2760 
2779 xmlChar *
2781  xmlChar end, xmlChar end2, xmlChar end3) {
2782  if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783  return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784  end, end2, end3));
2785 }
2786 
2787 /************************************************************************
2788  * *
2789  * Commodity functions, cleanup needed ? *
2790  * *
2791  ************************************************************************/
2792 
2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806  int blank_chars) {
2807  int i, ret;
2808  xmlNodePtr lastChild;
2809 
2810  /*
2811  * Don't spend time trying to differentiate them, the same callback is
2812  * used !
2813  */
2814  if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815  return(0);
2816 
2817  /*
2818  * Check for xml:space value.
2819  */
2820  if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821  (*(ctxt->space) == -2))
2822  return(0);
2823 
2824  /*
2825  * Check that the string is made of blanks
2826  */
2827  if (blank_chars == 0) {
2828  for (i = 0;i < len;i++)
2829  if (!(IS_BLANK_CH(str[i]))) return(0);
2830  }
2831 
2832  /*
2833  * Look if the element is mixed content in the DTD if available
2834  */
2835  if (ctxt->node == NULL) return(0);
2836  if (ctxt->myDoc != NULL) {
2837  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838  if (ret == 0) return(1);
2839  if (ret == 1) return(0);
2840  }
2841 
2842  /*
2843  * Otherwise, heuristic :-\
2844  */
2845  if ((RAW != '<') && (RAW != 0xD)) return(0);
2846  if ((ctxt->node->children == NULL) &&
2847  (RAW == '<') && (NXT(1) == '/')) return(0);
2848 
2849  lastChild = xmlGetLastChild(ctxt->node);
2850  if (lastChild == NULL) {
2851  if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852  (ctxt->node->content != NULL)) return(0);
2853  } else if (xmlNodeIsText(lastChild))
2854  return(0);
2855  else if ((ctxt->node->children != NULL) &&
2856  (xmlNodeIsText(ctxt->node->children)))
2857  return(0);
2858  return(1);
2859 }
2860 
2861 /************************************************************************
2862  * *
2863  * Extra stuff for namespace support *
2864  * Relates to http://www.w3.org/TR/WD-xml-names *
2865  * *
2866  ************************************************************************/
2867 
2886 xmlChar *
2889  xmlChar *buffer = NULL;
2890  int len = 0;
2891  int max = XML_MAX_NAMELEN;
2892  xmlChar *ret = NULL;
2893  const xmlChar *cur = name;
2894  int c;
2895 
2896  if (prefix == NULL) return(NULL);
2897  *prefix = NULL;
2898 
2899  if (cur == NULL) return(NULL);
2900 
2901 #ifndef XML_XML_NAMESPACE
2902  /* xml: prefix is not really a namespace */
2903  if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904  (cur[2] == 'l') && (cur[3] == ':'))
2905  return(xmlStrdup(name));
2906 #endif
2907 
2908  /* nasty but well=formed */
2909  if (cur[0] == ':')
2910  return(xmlStrdup(name));
2911 
2912  c = *cur++;
2913  while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914  buf[len++] = c;
2915  c = *cur++;
2916  }
2917  if (len >= max) {
2918  /*
2919  * Okay someone managed to make a huge name, so he's ready to pay
2920  * for the processing speed.
2921  */
2922  max = len * 2;
2923 
2924  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925  if (buffer == NULL) {
2926  xmlErrMemory(ctxt, NULL);
2927  return(NULL);
2928  }
2929  memcpy(buffer, buf, len);
2930  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931  if (len + 10 > max) {
2932  xmlChar *tmp;
2933 
2934  max *= 2;
2935  tmp = (xmlChar *) xmlRealloc(buffer,
2936  max * sizeof(xmlChar));
2937  if (tmp == NULL) {
2938  xmlFree(buffer);
2939  xmlErrMemory(ctxt, NULL);
2940  return(NULL);
2941  }
2942  buffer = tmp;
2943  }
2944  buffer[len++] = c;
2945  c = *cur++;
2946  }
2947  buffer[len] = 0;
2948  }
2949 
2950  if ((c == ':') && (*cur == 0)) {
2951  if (buffer != NULL)
2952  xmlFree(buffer);
2953  *prefix = NULL;
2954  return(xmlStrdup(name));
2955  }
2956 
2957  if (buffer == NULL)
2958  ret = xmlStrndup(buf, len);
2959  else {
2960  ret = buffer;
2961  buffer = NULL;
2962  max = XML_MAX_NAMELEN;
2963  }
2964 
2965 
2966  if (c == ':') {
2967  c = *cur;
2968  *prefix = ret;
2969  if (c == 0) {
2970  return(xmlStrndup(BAD_CAST "", 0));
2971  }
2972  len = 0;
2973 
2974  /*
2975  * Check that the first character is proper to start
2976  * a new name
2977  */
2978  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979  ((c >= 0x41) && (c <= 0x5A)) ||
2980  (c == '_') || (c == ':'))) {
2981  int l;
2982  int first = CUR_SCHAR(cur, l);
2983 
2984  if (!IS_LETTER(first) && (first != '_')) {
2985  xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986  "Name %s is not XML Namespace compliant\n",
2987  name);
2988  }
2989  }
2990  cur++;
2991 
2992  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993  buf[len++] = c;
2994  c = *cur++;
2995  }
2996  if (len >= max) {
2997  /*
2998  * Okay someone managed to make a huge name, so he's ready to pay
2999  * for the processing speed.
3000  */
3001  max = len * 2;
3002 
3003  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004  if (buffer == NULL) {
3005  xmlErrMemory(ctxt, NULL);
3006  return(NULL);
3007  }
3008  memcpy(buffer, buf, len);
3009  while (c != 0) { /* tested bigname2.xml */
3010  if (len + 10 > max) {
3011  xmlChar *tmp;
3012 
3013  max *= 2;
3014  tmp = (xmlChar *) xmlRealloc(buffer,
3015  max * sizeof(xmlChar));
3016  if (tmp == NULL) {
3017  xmlErrMemory(ctxt, NULL);
3018  xmlFree(buffer);
3019  return(NULL);
3020  }
3021  buffer = tmp;
3022  }
3023  buffer[len++] = c;
3024  c = *cur++;
3025  }
3026  buffer[len] = 0;
3027  }
3028 
3029  if (buffer == NULL)
3030  ret = xmlStrndup(buf, len);
3031  else {
3032  ret = buffer;
3033  }
3034  }
3035 
3036  return(ret);
3037 }
3038 
3039 /************************************************************************
3040  * *
3041  * The parser itself *
3042  * Relates to http://www.w3.org/TR/REC-xml *
3043  * *
3044  ************************************************************************/
3045 
3046 /************************************************************************
3047  * *
3048  * Routines to parse Name, NCName and NmToken *
3049  * *
3050  ************************************************************************/
3051 #ifdef DEBUG
3052 static unsigned long nbParseName = 0;
3053 static unsigned long nbParseNmToken = 0;
3054 static unsigned long nbParseNCName = 0;
3055 static unsigned long nbParseNCNameComplex = 0;
3056 static unsigned long nbParseNameComplex = 0;
3057 static unsigned long nbParseStringName = 0;
3058 #endif
3059 
3060 /*
3061  * The two following functions are related to the change of accepted
3062  * characters for Name and NmToken in the Revision 5 of XML-1.0
3063  * They correspond to the modified production [4] and the new production [4a]
3064  * changes in that revision. Also note that the macros used for the
3065  * productions Letter, Digit, CombiningChar and Extender are not needed
3066  * anymore.
3067  * We still keep compatibility to pre-revision5 parsing semantic if the
3068  * new XML_PARSE_OLD10 option is given to the parser.
3069  */
3070 static int
3072  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073  /*
3074  * Use the new checks of production [4] [4a] amd [5] of the
3075  * Update 5 of XML-1.0
3076  */
3077  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078  (((c >= 'a') && (c <= 'z')) ||
3079  ((c >= 'A') && (c <= 'Z')) ||
3080  (c == '_') || (c == ':') ||
3081  ((c >= 0xC0) && (c <= 0xD6)) ||
3082  ((c >= 0xD8) && (c <= 0xF6)) ||
3083  ((c >= 0xF8) && (c <= 0x2FF)) ||
3084  ((c >= 0x370) && (c <= 0x37D)) ||
3085  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086  ((c >= 0x200C) && (c <= 0x200D)) ||
3087  ((c >= 0x2070) && (c <= 0x218F)) ||
3088  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092  ((c >= 0x10000) && (c <= 0xEFFFF))))
3093  return(1);
3094  } else {
3095  if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096  return(1);
3097  }
3098  return(0);
3099 }
3100 
3101 static int
3103  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104  /*
3105  * Use the new checks of production [4] [4a] amd [5] of the
3106  * Update 5 of XML-1.0
3107  */
3108  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109  (((c >= 'a') && (c <= 'z')) ||
3110  ((c >= 'A') && (c <= 'Z')) ||
3111  ((c >= '0') && (c <= '9')) || /* !start */
3112  (c == '_') || (c == ':') ||
3113  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114  ((c >= 0xC0) && (c <= 0xD6)) ||
3115  ((c >= 0xD8) && (c <= 0xF6)) ||
3116  ((c >= 0xF8) && (c <= 0x2FF)) ||
3117  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118  ((c >= 0x370) && (c <= 0x37D)) ||
3119  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120  ((c >= 0x200C) && (c <= 0x200D)) ||
3121  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122  ((c >= 0x2070) && (c <= 0x218F)) ||
3123  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127  ((c >= 0x10000) && (c <= 0xEFFFF))))
3128  return(1);
3129  } else {
3130  if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131  (c == '.') || (c == '-') ||
3132  (c == '_') || (c == ':') ||
3133  (IS_COMBINING(c)) ||
3134  (IS_EXTENDER(c)))
3135  return(1);
3136  }
3137  return(0);
3138 }
3139 
3141  int *len, int *alloc, int normalize);
3142 
3143 static const xmlChar *
3145  int len = 0, l;
3146  int c;
3147  int count = 0;
3148 
3149 #ifdef DEBUG
3150  nbParseNameComplex++;
3151 #endif
3152 
3153  /*
3154  * Handler for more complex cases
3155  */
3156  GROW;
3157  if (ctxt->instate == XML_PARSER_EOF)
3158  return(NULL);
3159  c = CUR_CHAR(l);
3160  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161  /*
3162  * Use the new checks of production [4] [4a] amd [5] of the
3163  * Update 5 of XML-1.0
3164  */
3165  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166  (!(((c >= 'a') && (c <= 'z')) ||
3167  ((c >= 'A') && (c <= 'Z')) ||
3168  (c == '_') || (c == ':') ||
3169  ((c >= 0xC0) && (c <= 0xD6)) ||
3170  ((c >= 0xD8) && (c <= 0xF6)) ||
3171  ((c >= 0xF8) && (c <= 0x2FF)) ||
3172  ((c >= 0x370) && (c <= 0x37D)) ||
3173  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174  ((c >= 0x200C) && (c <= 0x200D)) ||
3175  ((c >= 0x2070) && (c <= 0x218F)) ||
3176  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180  ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181  return(NULL);
3182  }
3183  len += l;
3184  NEXTL(l);
3185  c = CUR_CHAR(l);
3186  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187  (((c >= 'a') && (c <= 'z')) ||
3188  ((c >= 'A') && (c <= 'Z')) ||
3189  ((c >= '0') && (c <= '9')) || /* !start */
3190  (c == '_') || (c == ':') ||
3191  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192  ((c >= 0xC0) && (c <= 0xD6)) ||
3193  ((c >= 0xD8) && (c <= 0xF6)) ||
3194  ((c >= 0xF8) && (c <= 0x2FF)) ||
3195  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196  ((c >= 0x370) && (c <= 0x37D)) ||
3197  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198  ((c >= 0x200C) && (c <= 0x200D)) ||
3199  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200  ((c >= 0x2070) && (c <= 0x218F)) ||
3201  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205  ((c >= 0x10000) && (c <= 0xEFFFF))
3206  )) {
3207  if (count++ > XML_PARSER_CHUNK_SIZE) {
3208  count = 0;
3209  GROW;
3210  if (ctxt->instate == XML_PARSER_EOF)
3211  return(NULL);
3212  }
3213  len += l;
3214  NEXTL(l);
3215  c = CUR_CHAR(l);
3216  }
3217  } else {
3218  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219  (!IS_LETTER(c) && (c != '_') &&
3220  (c != ':'))) {
3221  return(NULL);
3222  }
3223  len += l;
3224  NEXTL(l);
3225  c = CUR_CHAR(l);
3226 
3227  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228  ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229  (c == '.') || (c == '-') ||
3230  (c == '_') || (c == ':') ||
3231  (IS_COMBINING(c)) ||
3232  (IS_EXTENDER(c)))) {
3233  if (count++ > XML_PARSER_CHUNK_SIZE) {
3234  count = 0;
3235  GROW;
3236  if (ctxt->instate == XML_PARSER_EOF)
3237  return(NULL);
3238  }
3239  len += l;
3240  NEXTL(l);
3241  c = CUR_CHAR(l);
3242  }
3243  }
3244  if ((len > XML_MAX_NAME_LENGTH) &&
3245  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247  return(NULL);
3248  }
3249  if (ctxt->input->cur - ctxt->input->base < len) {
3250  /*
3251  * There were a couple of bugs where PERefs lead to to a change
3252  * of the buffer. Check the buffer size to avoid passing an invalid
3253  * pointer to xmlDictLookup.
3254  */
3256  "unexpected change of input buffer");
3257  return (NULL);
3258  }
3259  if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262 }
3263 
3280 const xmlChar *
3282  const xmlChar *in;
3283  const xmlChar *ret;
3284  int count = 0;
3285 
3286  GROW;
3287 
3288 #ifdef DEBUG
3289  nbParseName++;
3290 #endif
3291 
3292  /*
3293  * Accelerator for simple ASCII names
3294  */
3295  in = ctxt->input->cur;
3296  if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297  ((*in >= 0x41) && (*in <= 0x5A)) ||
3298  (*in == '_') || (*in == ':')) {
3299  in++;
3300  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301  ((*in >= 0x41) && (*in <= 0x5A)) ||
3302  ((*in >= 0x30) && (*in <= 0x39)) ||
3303  (*in == '_') || (*in == '-') ||
3304  (*in == ':') || (*in == '.'))
3305  in++;
3306  if ((*in > 0) && (*in < 0x80)) {
3307  count = in - ctxt->input->cur;
3308  if ((count > XML_MAX_NAME_LENGTH) &&
3309  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311  return(NULL);
3312  }
3313  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3314  ctxt->input->cur = in;
3315  ctxt->nbChars += count;
3316  ctxt->input->col += count;
3317  if (ret == NULL)
3318  xmlErrMemory(ctxt, NULL);
3319  return(ret);
3320  }
3321  }
3322  /* accelerator for special cases */
3323  return(xmlParseNameComplex(ctxt));
3324 }
3325 
3326 static const xmlChar *
3328  int len = 0, l;
3329  int c;
3330  int count = 0;
3331  size_t startPosition = 0;
3332 
3333 #ifdef DEBUG
3334  nbParseNCNameComplex++;
3335 #endif
3336 
3337  /*
3338  * Handler for more complex cases
3339  */
3340  GROW;
3341  startPosition = CUR_PTR - BASE_PTR;
3342  c = CUR_CHAR(l);
3343  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345  return(NULL);
3346  }
3347 
3348  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349  (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3350  if (count++ > XML_PARSER_CHUNK_SIZE) {
3351  if ((len > XML_MAX_NAME_LENGTH) &&
3352  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354  return(NULL);
3355  }
3356  count = 0;
3357  GROW;
3358  if (ctxt->instate == XML_PARSER_EOF)
3359  return(NULL);
3360  }
3361  len += l;
3362  NEXTL(l);
3363  c = CUR_CHAR(l);
3364  if (c == 0) {
3365  count = 0;
3366  /*
3367  * when shrinking to extend the buffer we really need to preserve
3368  * the part of the name we already parsed. Hence rolling back
3369  * by current lenght.
3370  */
3371  ctxt->input->cur -= l;
3372  GROW;
3373  if (ctxt->instate == XML_PARSER_EOF)
3374  return(NULL);
3375  ctxt->input->cur += l;
3376  c = CUR_CHAR(l);
3377  }
3378  }
3379  if ((len > XML_MAX_NAME_LENGTH) &&
3380  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382  return(NULL);
3383  }
3384  return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3385 }
3386 
3402 static const xmlChar *
3404  const xmlChar *in, *e;
3405  const xmlChar *ret;
3406  int count = 0;
3407 
3408 #ifdef DEBUG
3409  nbParseNCName++;
3410 #endif
3411 
3412  /*
3413  * Accelerator for simple ASCII names
3414  */
3415  in = ctxt->input->cur;
3416  e = ctxt->input->end;
3417  if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418  ((*in >= 0x41) && (*in <= 0x5A)) ||
3419  (*in == '_')) && (in < e)) {
3420  in++;
3421  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422  ((*in >= 0x41) && (*in <= 0x5A)) ||
3423  ((*in >= 0x30) && (*in <= 0x39)) ||
3424  (*in == '_') || (*in == '-') ||
3425  (*in == '.')) && (in < e))
3426  in++;
3427  if (in >= e)
3428  goto complex;
3429  if ((*in > 0) && (*in < 0x80)) {
3430  count = in - ctxt->input->cur;
3431  if ((count > XML_MAX_NAME_LENGTH) &&
3432  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434  return(NULL);
3435  }
3436  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437  ctxt->input->cur = in;
3438  ctxt->nbChars += count;
3439  ctxt->input->col += count;
3440  if (ret == NULL) {
3441  xmlErrMemory(ctxt, NULL);
3442  }
3443  return(ret);
3444  }
3445  }
3446 complex:
3447  return(xmlParseNCNameComplex(ctxt));
3448 }
3449 
3461 static const xmlChar *
3463  register const xmlChar *cmp = other;
3464  register const xmlChar *in;
3465  const xmlChar *ret;
3466 
3467  GROW;
3468  if (ctxt->instate == XML_PARSER_EOF)
3469  return(NULL);
3470 
3471  in = ctxt->input->cur;
3472  while (*in != 0 && *in == *cmp) {
3473  ++in;
3474  ++cmp;
3475  ctxt->input->col++;
3476  }
3477  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3478  /* success */
3479  ctxt->input->cur = in;
3480  return (const xmlChar*) 1;
3481  }
3482  /* failure (or end of input buffer), check with full function */
3483  ret = xmlParseName (ctxt);
3484  /* strings coming from the dictionary direct compare possible */
3485  if (ret == other) {
3486  return (const xmlChar*) 1;
3487  }
3488  return ret;
3489 }
3490 
3509 static xmlChar *
3512  const xmlChar *cur = *str;
3513  int len = 0, l;
3514  int c;
3515 
3516 #ifdef DEBUG
3517  nbParseStringName++;
3518 #endif
3519 
3520  c = CUR_SCHAR(cur, l);
3521  if (!xmlIsNameStartChar(ctxt, c)) {
3522  return(NULL);
3523  }
3524 
3525  COPY_BUF(l,buf,len,c);
3526  cur += l;
3527  c = CUR_SCHAR(cur, l);
3528  while (xmlIsNameChar(ctxt, c)) {
3529  COPY_BUF(l,buf,len,c);
3530  cur += l;
3531  c = CUR_SCHAR(cur, l);
3532  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533  /*
3534  * Okay someone managed to make a huge name, so he's ready to pay
3535  * for the processing speed.
3536  */
3537  xmlChar *buffer;
3538  int max = len * 2;
3539 
3540  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3541  if (buffer == NULL) {
3542  xmlErrMemory(ctxt, NULL);
3543  return(NULL);
3544  }
3545  memcpy(buffer, buf, len);
3546  while (xmlIsNameChar(ctxt, c)) {
3547  if (len + 10 > max) {
3548  xmlChar *tmp;
3549 
3550  if ((len > XML_MAX_NAME_LENGTH) &&
3551  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553  xmlFree(buffer);
3554  return(NULL);
3555  }
3556  max *= 2;
3557  tmp = (xmlChar *) xmlRealloc(buffer,
3558  max * sizeof(xmlChar));
3559  if (tmp == NULL) {
3560  xmlErrMemory(ctxt, NULL);
3561  xmlFree(buffer);
3562  return(NULL);
3563  }
3564  buffer = tmp;
3565  }
3566  COPY_BUF(l,buffer,len,c);
3567  cur += l;
3568  c = CUR_SCHAR(cur, l);
3569  }
3570  buffer[len] = 0;
3571  *str = cur;
3572  return(buffer);
3573  }
3574  }
3575  if ((len > XML_MAX_NAME_LENGTH) &&
3576  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578  return(NULL);
3579  }
3580  *str = cur;
3581  return(xmlStrndup(buf, len));
3582 }
3583 
3597 xmlChar *
3600  int len = 0, l;
3601  int c;
3602  int count = 0;
3603 
3604 #ifdef DEBUG
3605  nbParseNmToken++;
3606 #endif
3607 
3608  GROW;
3609  if (ctxt->instate == XML_PARSER_EOF)
3610  return(NULL);
3611  c = CUR_CHAR(l);
3612 
3613  while (xmlIsNameChar(ctxt, c)) {
3614  if (count++ > XML_PARSER_CHUNK_SIZE) {
3615  count = 0;
3616  GROW;
3617  }
3618  COPY_BUF(l,buf,len,c);
3619  NEXTL(l);
3620  c = CUR_CHAR(l);
3621  if (c == 0) {
3622  count = 0;
3623  GROW;
3624  if (ctxt->instate == XML_PARSER_EOF)
3625  return(NULL);
3626  c = CUR_CHAR(l);
3627  }
3628  if (len >= XML_MAX_NAMELEN) {
3629  /*
3630  * Okay someone managed to make a huge token, so he's ready to pay
3631  * for the processing speed.
3632  */
3633  xmlChar *buffer;
3634  int max = len * 2;
3635 
3636  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3637  if (buffer == NULL) {
3638  xmlErrMemory(ctxt, NULL);
3639  return(NULL);
3640  }
3641  memcpy(buffer, buf, len);
3642  while (xmlIsNameChar(ctxt, c)) {
3643  if (count++ > XML_PARSER_CHUNK_SIZE) {
3644  count = 0;
3645  GROW;
3646  if (ctxt->instate == XML_PARSER_EOF) {
3647  xmlFree(buffer);
3648  return(NULL);
3649  }
3650  }
3651  if (len + 10 > max) {
3652  xmlChar *tmp;
3653 
3654  if ((max > XML_MAX_NAME_LENGTH) &&
3655  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657  xmlFree(buffer);
3658  return(NULL);
3659  }
3660  max *= 2;
3661  tmp = (xmlChar *) xmlRealloc(buffer,
3662  max * sizeof(xmlChar));
3663  if (tmp == NULL) {
3664  xmlErrMemory(ctxt, NULL);
3665  xmlFree(buffer);
3666  return(NULL);
3667  }
3668  buffer = tmp;
3669  }
3670  COPY_BUF(l,buffer,len,c);
3671  NEXTL(l);
3672  c = CUR_CHAR(l);
3673  }
3674  buffer[len] = 0;
3675  return(buffer);
3676  }
3677  }
3678  if (len == 0)
3679  return(NULL);
3680  if ((len > XML_MAX_NAME_LENGTH) &&
3681  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683  return(NULL);
3684  }
3685  return(xmlStrndup(buf, len));
3686 }
3687 
3701 xmlChar *
3703  xmlChar *buf = NULL;
3704  int len = 0;
3706  int c, l;
3707  xmlChar stop;
3708  xmlChar *ret = NULL;
3709  const xmlChar *cur = NULL;
3711 
3712  if (RAW == '"') stop = '"';
3713  else if (RAW == '\'') stop = '\'';
3714  else {
3716  return(NULL);
3717  }
3718  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3719  if (buf == NULL) {
3720  xmlErrMemory(ctxt, NULL);
3721  return(NULL);
3722  }
3723 
3724  /*
3725  * The content of the entity definition is copied in a buffer.
3726  */
3727 
3729  input = ctxt->input;
3730  GROW;
3731  if (ctxt->instate == XML_PARSER_EOF)
3732  goto error;
3733  NEXT;
3734  c = CUR_CHAR(l);
3735  /*
3736  * NOTE: 4.4.5 Included in Literal
3737  * When a parameter entity reference appears in a literal entity
3738  * value, ... a single or double quote character in the replacement
3739  * text is always treated as a normal data character and will not
3740  * terminate the literal.
3741  * In practice it means we stop the loop only when back at parsing
3742  * the initial entity and the quote is found
3743  */
3744  while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745  (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3746  if (len + 5 >= size) {
3747  xmlChar *tmp;
3748 
3749  size *= 2;
3750  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751  if (tmp == NULL) {
3752  xmlErrMemory(ctxt, NULL);
3753  goto error;
3754  }
3755  buf = tmp;
3756  }
3757  COPY_BUF(l,buf,len,c);
3758  NEXTL(l);
3759 
3760  GROW;
3761  c = CUR_CHAR(l);
3762  if (c == 0) {
3763  GROW;
3764  c = CUR_CHAR(l);
3765  }
3766  }
3767  buf[len] = 0;
3768  if (ctxt->instate == XML_PARSER_EOF)
3769  goto error;
3770  if (c != stop) {
3772  goto error;
3773  }
3774  NEXT;
3775 
3776  /*
3777  * Raise problem w.r.t. '&' and '%' being used in non-entities
3778  * reference constructs. Note Charref will be handled in
3779  * xmlStringDecodeEntities()
3780  */
3781  cur = buf;
3782  while (*cur != 0) { /* non input consuming */
3783  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784  xmlChar *name;
3785  xmlChar tmp = *cur;
3786  int nameOk = 0;
3787 
3788  cur++;
3789  name = xmlParseStringName(ctxt, &cur);
3790  if (name != NULL) {
3791  nameOk = 1;
3792  xmlFree(name);
3793  }
3794  if ((nameOk == 0) || (*cur != ';')) {
3795  xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3796  "EntityValue: '%c' forbidden except for entities references\n",
3797  tmp);
3798  goto error;
3799  }
3800  if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801  (ctxt->inputNr == 1)) {
3803  goto error;
3804  }
3805  if (*cur == 0)
3806  break;
3807  }
3808  cur++;
3809  }
3810 
3811  /*
3812  * Then PEReference entities are substituted.
3813  *
3814  * NOTE: 4.4.7 Bypassed
3815  * When a general entity reference appears in the EntityValue in
3816  * an entity declaration, it is bypassed and left as is.
3817  * so XML_SUBSTITUTE_REF is not set here.
3818  */
3819  ++ctxt->depth;
3821  0, 0, 0);
3822  --ctxt->depth;
3823  if (orig != NULL) {
3824  *orig = buf;
3825  buf = NULL;
3826  }
3827 
3828 error:
3829  if (buf != NULL)
3830  xmlFree(buf);
3831  return(ret);
3832 }
3833 
3846 static xmlChar *
3848  xmlChar limit = 0;
3849  xmlChar *buf = NULL;
3850  xmlChar *rep = NULL;
3851  size_t len = 0;
3852  size_t buf_size = 0;
3853  int c, l, in_space = 0;
3854  xmlChar *current = NULL;
3855  xmlEntityPtr ent;
3856 
3857  if (NXT(0) == '"') {
3859  limit = '"';
3860  NEXT;
3861  } else if (NXT(0) == '\'') {
3862  limit = '\'';
3864  NEXT;
3865  } else {
3867  return(NULL);
3868  }
3869 
3870  /*
3871  * allocate a translation buffer.
3872  */
3873  buf_size = XML_PARSER_BUFFER_SIZE;
3874  buf = (xmlChar *) xmlMallocAtomic(buf_size);
3875  if (buf == NULL) goto mem_error;
3876 
3877  /*
3878  * OK loop until we reach one of the ending char or a size limit.
3879  */
3880  c = CUR_CHAR(l);
3881  while (((NXT(0) != limit) && /* checked */
3882  (IS_CHAR(c)) && (c != '<')) &&
3883  (ctxt->instate != XML_PARSER_EOF)) {
3884  /*
3885  * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886  * special option is given
3887  */
3888  if ((len > XML_MAX_TEXT_LENGTH) &&
3889  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3891  "AttValue length too long\n");
3892  goto mem_error;
3893  }
3894  if (c == 0) break;
3895  if (c == '&') {
3896  in_space = 0;
3897  if (NXT(1) == '#') {
3898  int val = xmlParseCharRef(ctxt);
3899 
3900  if (val == '&') {
3901  if (ctxt->replaceEntities) {
3902  if (len + 10 > buf_size) {
3903  growBuffer(buf, 10);
3904  }
3905  buf[len++] = '&';
3906  } else {
3907  /*
3908  * The reparsing will be done in xmlStringGetNodeList()
3909  * called by the attribute() function in SAX.c
3910  */
3911  if (len + 10 > buf_size) {
3912  growBuffer(buf, 10);
3913  }
3914  buf[len++] = '&';
3915  buf[len++] = '#';
3916  buf[len++] = '3';
3917  buf[len++] = '8';
3918  buf[len++] = ';';
3919  }
3920  } else if (val != 0) {
3921  if (len + 10 > buf_size) {
3922  growBuffer(buf, 10);
3923  }
3924  len += xmlCopyChar(0, &buf[len], val);
3925  }
3926  } else {
3927  ent = xmlParseEntityRef(ctxt);
3928  ctxt->nbentities++;
3929  if (ent != NULL)
3930  ctxt->nbentities += ent->owner;
3931  if ((ent != NULL) &&
3933  if (len + 10 > buf_size) {
3934  growBuffer(buf, 10);
3935  }
3936  if ((ctxt->replaceEntities == 0) &&
3937  (ent->content[0] == '&')) {
3938  buf[len++] = '&';
3939  buf[len++] = '#';
3940  buf[len++] = '3';
3941  buf[len++] = '8';
3942  buf[len++] = ';';
3943  } else {
3944  buf[len++] = ent->content[0];
3945  }
3946  } else if ((ent != NULL) &&
3947  (ctxt->replaceEntities != 0)) {
3948  if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3949  ++ctxt->depth;
3950  rep = xmlStringDecodeEntities(ctxt, ent->content,
3952  0, 0, 0);
3953  --ctxt->depth;
3954  if (rep != NULL) {
3955  current = rep;
3956  while (*current != 0) { /* non input consuming */
3957  if ((*current == 0xD) || (*current == 0xA) ||
3958  (*current == 0x9)) {
3959  buf[len++] = 0x20;
3960  current++;
3961  } else
3962  buf[len++] = *current++;
3963  if (len + 10 > buf_size) {
3964  growBuffer(buf, 10);
3965  }
3966  }
3967  xmlFree(rep);
3968  rep = NULL;
3969  }
3970  } else {
3971  if (len + 10 > buf_size) {
3972  growBuffer(buf, 10);
3973  }
3974  if (ent->content != NULL)
3975  buf[len++] = ent->content[0];
3976  }
3977  } else if (ent != NULL) {
3978  int i = xmlStrlen(ent->name);
3979  const xmlChar *cur = ent->name;
3980 
3981  /*
3982  * This may look absurd but is needed to detect
3983  * entities problems
3984  */
3985  if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3986  (ent->content != NULL) && (ent->checked == 0)) {
3987  unsigned long oldnbent = ctxt->nbentities;
3988 
3989  ++ctxt->depth;
3990  rep = xmlStringDecodeEntities(ctxt, ent->content,
3991  XML_SUBSTITUTE_REF, 0, 0, 0);
3992  --ctxt->depth;
3993 
3994  ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
3995  if (rep != NULL) {
3996  if (xmlStrchr(rep, '<'))
3997  ent->checked |= 1;
3998  xmlFree(rep);
3999  rep = NULL;
4000  } else {
4001  ent->content[0] = 0;
4002  }
4003  }
4004 
4005  /*
4006  * Just output the reference
4007  */
4008  buf[len++] = '&';
4009  while (len + i + 10 > buf_size) {
4010  growBuffer(buf, i + 10);
4011  }
4012  for (;i > 0;i--)
4013  buf[len++] = *cur++;
4014  buf[len++] = ';';
4015  }
4016  }
4017  } else {
4018  if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4019  if ((len != 0) || (!normalize)) {
4020  if ((!normalize) || (!in_space)) {
4021  COPY_BUF(l,buf,len,0x20);
4022  while (len + 10 > buf_size) {
4023  growBuffer(buf, 10);
4024  }
4025  }
4026  in_space = 1;
4027  }
4028  } else {
4029  in_space = 0;
4030  COPY_BUF(l,buf,len,c);
4031  if (len + 10 > buf_size) {
4032  growBuffer(buf, 10);
4033  }
4034  }
4035  NEXTL(l);
4036  }
4037  GROW;
4038  c = CUR_CHAR(l);
4039  }
4040  if (ctxt->instate == XML_PARSER_EOF)
4041  goto error;
4042 
4043  if ((in_space) && (normalize)) {
4044  while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4045  }
4046  buf[len] = 0;
4047  if (RAW == '<') {
4049  } else if (RAW != limit) {
4050  if ((c != 0) && (!IS_CHAR(c))) {
4051  xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052  "invalid character in attribute value\n");
4053  } else {
4054  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055  "AttValue: ' expected\n");
4056  }
4057  } else
4058  NEXT;
4059 
4060  /*
4061  * There we potentially risk an overflow, don't allow attribute value of
4062  * length more than INT_MAX it is a very reasonnable assumption !
4063  */
4064  if (len >= INT_MAX) {
4065  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066  "AttValue length too long\n");
4067  goto mem_error;
4068  }
4069 
4070  if (attlen != NULL) *attlen = (int) len;
4071  return(buf);
4072 
4073 mem_error:
4074  xmlErrMemory(ctxt, NULL);
4075 error:
4076  if (buf != NULL)
4077  xmlFree(buf);
4078  if (rep != NULL)
4079  xmlFree(rep);
4080  return(NULL);
4081 }
4082 
4117 xmlChar *
4119  if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4120  return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4121 }
4122 
4134 xmlChar *
4136  xmlChar *buf = NULL;
4137  int len = 0;
4139  int cur, l;
4140  xmlChar stop;
4141  int state = ctxt->instate;
4142  int count = 0;
4143 
4144  SHRINK;
4145  if (RAW == '"') {
4146  NEXT;
4147  stop = '"';
4148  } else if (RAW == '\'') {
4149  NEXT;
4150  stop = '\'';
4151  } else {
4153  return(NULL);
4154  }
4155 
4156  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157  if (buf == NULL) {
4158  xmlErrMemory(ctxt, NULL);
4159  return(NULL);
4160  }
4162  cur = CUR_CHAR(l);
4163  while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4164  if (len + 5 >= size) {
4165  xmlChar *tmp;
4166 
4167  if ((size > XML_MAX_NAME_LENGTH) &&
4168  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170  xmlFree(buf);
4171  ctxt->instate = (xmlParserInputState) state;
4172  return(NULL);
4173  }
4174  size *= 2;
4175  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176  if (tmp == NULL) {
4177  xmlFree(buf);
4178  xmlErrMemory(ctxt, NULL);
4179  ctxt->instate = (xmlParserInputState) state;
4180  return(NULL);
4181  }
4182  buf = tmp;
4183  }
4184  count++;
4185  if (count > 50) {
4186  GROW;
4187  count = 0;
4188  if (ctxt->instate == XML_PARSER_EOF) {
4189  xmlFree(buf);
4190  return(NULL);
4191  }
4192  }
4193  COPY_BUF(l,buf,len,cur);
4194  NEXTL(l);
4195  cur = CUR_CHAR(l);
4196  if (cur == 0) {
4197  GROW;
4198  SHRINK;
4199  cur = CUR_CHAR(l);
4200  }
4201  }
4202  buf[len] = 0;
4203  ctxt->instate = (xmlParserInputState) state;
4204  if (!IS_CHAR(cur)) {
4206  } else {
4207  NEXT;
4208  }
4209  return(buf);
4210 }
4211 
4223 xmlChar *
4225  xmlChar *buf = NULL;
4226  int len = 0;
4228  xmlChar cur;
4229  xmlChar stop;
4230  int count = 0;
4231  xmlParserInputState oldstate = ctxt->instate;
4232 
4233  SHRINK;
4234  if (RAW == '"') {
4235  NEXT;
4236  stop = '"';
4237  } else if (RAW == '\'') {
4238  NEXT;
4239  stop = '\'';
4240  } else {
4242  return(NULL);
4243  }
4244  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245  if (buf == NULL) {
4246  xmlErrMemory(ctxt, NULL);
4247  return(NULL);
4248  }
4250  cur = CUR;
4251  while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4252  if (len + 1 >= size) {
4253  xmlChar *tmp;
4254 
4255  if ((size > XML_MAX_NAME_LENGTH) &&
4256  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258  xmlFree(buf);
4259  return(NULL);
4260  }
4261  size *= 2;
4262  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263  if (tmp == NULL) {
4264  xmlErrMemory(ctxt, NULL);
4265  xmlFree(buf);
4266  return(NULL);
4267  }
4268  buf = tmp;
4269  }
4270  buf[len++] = cur;
4271  count++;
4272  if (count > 50) {
4273  GROW;
4274  count = 0;
4275  if (ctxt->instate == XML_PARSER_EOF) {
4276  xmlFree(buf);
4277  return(NULL);
4278  }
4279  }
4280  NEXT;
4281  cur = CUR;
4282  if (cur == 0) {
4283  GROW;
4284  SHRINK;
4285  cur = CUR;
4286  }
4287  }
4288  buf[len] = 0;
4289  if (cur != stop) {
4291  } else {
4292  NEXT;
4293  }
4294  ctxt->instate = oldstate;
4295  return(buf);
4296 }
4297 
4298 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4299 
4300 /*
4301  * used for the test in the inner loop of the char data testing
4302  */
4303 static const unsigned char test_char_data[256] = {
4304  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305  0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311  0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336 };
4337 
4354 void
4356  const xmlChar *in;
4357  int nbchar = 0;
4358  int line = ctxt->input->line;
4359  int col = ctxt->input->col;
4360  int ccol;
4361 
4362  SHRINK;
4363  GROW;
4364  /*
4365  * Accelerated common case where input don't need to be
4366  * modified before passing it to the handler.
4367  */
4368  if (!cdata) {
4369  in = ctxt->input->cur;
4370  do {
4371 get_more_space:
4372  while (*in == 0x20) { in++; ctxt->input->col++; }
4373  if (*in == 0xA) {
4374  do {
4375  ctxt->input->line++; ctxt->input->col = 1;
4376  in++;
4377  } while (*in == 0xA);
4378  goto get_more_space;
4379  }
4380  if (*in == '<') {
4381  nbchar = in - ctxt->input->cur;
4382  if (nbchar > 0) {
4383  const xmlChar *tmp = ctxt->input->cur;
4384  ctxt->input->cur = in;
4385 
4386  if ((ctxt->sax != NULL) &&
4387  (ctxt->sax->ignorableWhitespace !=
4388  ctxt->sax->characters)) {
4389  if (areBlanks(ctxt, tmp, nbchar, 1)) {
4390  if (ctxt->sax->ignorableWhitespace != NULL)
4391  ctxt->sax->ignorableWhitespace(ctxt->userData,
4392  tmp, nbchar);
4393  } else {
4394  if (ctxt->sax->characters != NULL)
4395  ctxt->sax->characters(ctxt->userData,
4396  tmp, nbchar);
4397  if (*ctxt->space == -1)
4398  *ctxt->space = -2;
4399  }
4400  } else if ((ctxt->sax != NULL) &&
4401  (ctxt->sax->characters != NULL)) {
4402  ctxt->sax->characters(ctxt->userData,
4403  tmp, nbchar);
4404  }
4405  }
4406  return;
4407  }
4408 
4409 get_more:
4410  ccol = ctxt->input->col;
4411  while (test_char_data[*in]) {
4412  in++;
4413  ccol++;
4414  }
4415  ctxt->input->col = ccol;
4416  if (*in == 0xA) {
4417  do {
4418  ctxt->input->line++; ctxt->input->col = 1;
4419  in++;
4420  } while (*in == 0xA);
4421  goto get_more;
4422  }
4423  if (*in == ']') {
4424  if ((in[1] == ']') && (in[2] == '>')) {
4426  ctxt->input->cur = in + 1;
4427  return;
4428  }
4429  in++;
4430  ctxt->input->col++;
4431  goto get_more;
4432  }
4433  nbchar = in - ctxt->input->cur;
4434  if (nbchar > 0) {
4435  if ((ctxt->sax != NULL) &&
4436  (ctxt->sax->ignorableWhitespace !=
4437  ctxt->sax->characters) &&
4438  (IS_BLANK_CH(*ctxt->input->cur))) {
4439  const xmlChar *tmp = ctxt->input->cur;
4440  ctxt->input->cur = in;
4441 
4442  if (areBlanks(ctxt, tmp, nbchar, 0)) {
4443  if (ctxt->sax->ignorableWhitespace != NULL)
4444  ctxt->sax->ignorableWhitespace(ctxt->userData,
4445  tmp, nbchar);
4446  } else {
4447  if (ctxt->sax->characters != NULL)
4448  ctxt->sax->characters(ctxt->userData,
4449  tmp, nbchar);
4450  if (*ctxt->space == -1)
4451  *ctxt->space = -2;
4452  }
4453  line = ctxt->input->line;
4454  col = ctxt->input->col;
4455  } else if (ctxt->sax != NULL) {
4456  if (ctxt->sax->characters != NULL)
4457  ctxt->sax->characters(ctxt->userData,
4458  ctxt->input->cur, nbchar);
4459  line = ctxt->input->line;
4460  col = ctxt->input->col;
4461  }
4462  /* something really bad happened in the SAX callback */
4463  if (ctxt->instate != XML_PARSER_CONTENT)
4464  return;
4465  }
4466  ctxt->input->cur = in;
4467  if (*in == 0xD) {
4468  in++;
4469  if (*in == 0xA) {
4470  ctxt->input->cur = in;
4471  in++;
4472  ctxt->input->line++; ctxt->input->col = 1;
4473  continue; /* while */
4474  }
4475  in--;
4476  }
4477  if (*in == '<') {
4478  return;
4479  }
4480  if (*in == '&') {
4481  return;
4482  }
4483  SHRINK;
4484  GROW;
4485  if (ctxt->instate == XML_PARSER_EOF)
4486  return;
4487  in = ctxt->input->cur;
4488  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489  nbchar = 0;
4490  }
4491  ctxt->input->line = line;
4492  ctxt->input->col = col;
4493  xmlParseCharDataComplex(ctxt, cdata);
4494 }
4495 
4505 static void
4508  int nbchar = 0;
4509  int cur, l;
4510  int count = 0;
4511 
4512  SHRINK;
4513  GROW;
4514  cur = CUR_CHAR(l);
4515  while ((cur != '<') && /* checked */
4516  (cur != '&') &&
4517  (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4518  if ((cur == ']') && (NXT(1) == ']') &&
4519  (NXT(2) == '>')) {
4520  if (cdata) break;
4521  else {
4523  }
4524  }
4525  COPY_BUF(l,buf,nbchar,cur);
4526  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4527  buf[nbchar] = 0;
4528 
4529  /*
4530  * OK the segment is to be consumed as chars.
4531  */
4532  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4533  if (areBlanks(ctxt, buf, nbchar, 0)) {
4534  if (ctxt->sax->ignorableWhitespace != NULL)
4535  ctxt->sax->ignorableWhitespace(ctxt->userData,
4536  buf, nbchar);
4537  } else {
4538  if (ctxt->sax->characters != NULL)
4539  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4540  if ((ctxt->sax->characters !=
4541  ctxt->sax->ignorableWhitespace) &&
4542  (*ctxt->space == -1))
4543  *ctxt->space = -2;
4544  }
4545  }
4546  nbchar = 0;
4547  /* something really bad happened in the SAX callback */
4548  if (ctxt->instate != XML_PARSER_CONTENT)
4549  return;
4550  }
4551  count++;
4552  if (count > 50) {
4553  GROW;
4554  count = 0;
4555  if (ctxt->instate == XML_PARSER_EOF)
4556  return;
4557  }
4558  NEXTL(l);
4559  cur = CUR_CHAR(l);
4560  }
4561  if (nbchar != 0) {
4562  buf[nbchar] = 0;
4563  /*
4564  * OK the segment is to be consumed as chars.
4565  */
4566  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567  if (areBlanks(ctxt, buf, nbchar, 0)) {
4568  if (ctxt->sax->ignorableWhitespace != NULL)
4569  ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570  } else {
4571  if (ctxt->sax->characters != NULL)
4572  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4573  if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574  (*ctxt->space == -1))
4575  *ctxt->space = -2;
4576  }
4577  }
4578  }
4579  if ((cur != 0) && (!IS_CHAR(cur))) {
4580  /* Generate the error and skip the offending character */
4581  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582  "PCDATA invalid Char value %d\n",
4583  cur);
4584  NEXTL(l);
4585  }
4586 }
4587 
4610 xmlChar *
4612  xmlChar *URI = NULL;
4613 
4614  SHRINK;
4615 
4616  *publicID = NULL;
4617  if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4618  SKIP(6);
4619  if (SKIP_BLANKS == 0) {
4620  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621  "Space required after 'SYSTEM'\n");
4622  }
4623  URI = xmlParseSystemLiteral(ctxt);
4624  if (URI == NULL) {
4626  }
4627  } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4628  SKIP(6);
4629  if (SKIP_BLANKS == 0) {
4630  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631  "Space required after 'PUBLIC'\n");
4632  }
4633  *publicID = xmlParsePubidLiteral(ctxt);
4634  if (*publicID == NULL) {
4636  }
4637  if (strict) {
4638  /*
4639  * We don't handle [83] so "S SystemLiteral" is required.
4640  */
4641  if (SKIP_BLANKS == 0) {
4642  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643  "Space required after the Public Identifier\n");
4644  }
4645  } else {
4646  /*
4647  * We handle [83] so we return immediately, if
4648  * "S SystemLiteral" is not detected. We skip blanks if no
4649  * system literal was found, but this is harmless since we must
4650  * be at the end of a NotationDecl.
4651  */
4652  if (SKIP_BLANKS == 0) return(NULL);
4653  if ((CUR != '\'') && (CUR != '"')) return(NULL);
4654  }
4655  URI = xmlParseSystemLiteral(ctxt);
4656  if (URI == NULL) {
4658  }
4659  }
4660  return(URI);
4661 }
4662 
4677 static void
4679  size_t len, size_t size) {
4680  int q, ql;
4681  int r, rl;
4682  int cur, l;
4683  size_t count = 0;
4684  int inputid;
4685 
4686  inputid = ctxt->input->id;
4687 
4688  if (buf == NULL) {
4689  len = 0;
4691  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692  if (buf == NULL) {
4693  xmlErrMemory(ctxt, NULL);
4694  return;
4695  }
4696  }
4697  GROW; /* Assure there's enough input data */
4698  q = CUR_CHAR(ql);
4699  if (q == 0)
4700  goto not_terminated;
4701  if (!IS_CHAR(q)) {
4702  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703  "xmlParseComment: invalid xmlChar value %d\n",
4704  q);
4705  xmlFree (buf);
4706  return;
4707  }
4708  NEXTL(ql);
4709  r = CUR_CHAR(rl);
4710  if (r == 0)
4711  goto not_terminated;
4712  if (!IS_CHAR(r)) {
4713  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714  "xmlParseComment: invalid xmlChar value %d\n",
4715  q);
4716  xmlFree (buf);
4717  return;
4718  }
4719  NEXTL(rl);
4720  cur = CUR_CHAR(l);
4721  if (cur == 0)
4722  goto not_terminated;
4723  while (IS_CHAR(cur) && /* checked */
4724  ((cur != '>') ||
4725  (r != '-') || (q != '-'))) {
4726  if ((r == '-') && (q == '-')) {
4728  }
4729  if ((len > XML_MAX_TEXT_LENGTH) &&
4730  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732  "Comment too big found", NULL);
4733  xmlFree (buf);
4734  return;
4735  }
4736  if (len + 5 >= size) {
4737  xmlChar *new_buf;
4738  size_t new_size;
4739 
4740  new_size = size * 2;
4741  new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4742  if (new_buf == NULL) {
4743  xmlFree (buf);
4744  xmlErrMemory(ctxt, NULL);
4745  return;
4746  }
4747  buf = new_buf;
4748  size = new_size;
4749  }
4750  COPY_BUF(ql,buf,len,q);
4751  q = r;
4752  ql = rl;
4753  r = cur;
4754  rl = l;
4755 
4756  count++;
4757  if (count > 50) {
4758  GROW;
4759  count = 0;
4760  if (ctxt->instate == XML_PARSER_EOF) {
4761  xmlFree(buf);
4762  return;
4763  }
4764  }
4765  NEXTL(l);
4766  cur = CUR_CHAR(l);
4767  if (cur == 0) {
4768  SHRINK;
4769  GROW;
4770  cur = CUR_CHAR(l);
4771  }
4772  }
4773  buf[len] = 0;
4774  if (cur == 0) {
4775  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4776  "Comment not terminated \n<!--%.50s\n", buf);
4777  } else if (!IS_CHAR(cur)) {
4778  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779  "xmlParseComment: invalid xmlChar value %d\n",
4780  cur);
4781  } else {
4782  if (inputid != ctxt->input->id) {
4783  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4784  "Comment doesn't start and stop in the same"
4785  " entity\n");
4786  }
4787  NEXT;
4788  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789  (!ctxt->disableSAX))
4790  ctxt->sax->comment(ctxt->userData, buf);
4791  }
4792  xmlFree(buf);
4793  return;
4794 not_terminated:
4795  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796  "Comment not terminated\n", NULL);
4797  xmlFree(buf);
4798  return;
4799 }
4800 
4811 void
4813  xmlChar *buf = NULL;
4814  size_t size = XML_PARSER_BUFFER_SIZE;
4815  size_t len = 0;
4817  const xmlChar *in;
4818  size_t nbchar = 0;
4819  int ccol;
4820  int inputid;
4821 
4822  /*
4823  * Check that there is a comment right here.
4824  */
4825  if ((RAW != '<') || (NXT(1) != '!') ||
4826  (NXT(2) != '-') || (NXT(3) != '-')) return;
4827  state = ctxt->instate;
4828  ctxt->instate = XML_PARSER_COMMENT;
4829  inputid = ctxt->input->id;
4830  SKIP(4);
4831  SHRINK;
4832  GROW;
4833 
4834  /*
4835  * Accelerated common case where input don't need to be
4836  * modified before passing it to the handler.
4837  */
4838  in = ctxt->input->cur;
4839  do {
4840  if (*in == 0xA) {
4841  do {
4842  ctxt->input->line++; ctxt->input->col = 1;
4843  in++;
4844  } while (*in == 0xA);
4845  }
4846 get_more:
4847  ccol = ctxt->input->col;
4848  while (((*in > '-') && (*in <= 0x7F)) ||
4849  ((*in >= 0x20) && (*in < '-')) ||
4850  (*in == 0x09)) {
4851  in++;
4852  ccol++;
4853  }
4854  ctxt->input->col = ccol;
4855  if (*in == 0xA) {
4856  do {
4857  ctxt->input->line++; ctxt->input->col = 1;
4858  in++;
4859  } while (*in == 0xA);
4860  goto get_more;
4861  }
4862  nbchar = in - ctxt->input->cur;
4863  /*
4864  * save current set of data
4865  */
4866  if (nbchar > 0) {
4867  if ((ctxt->sax != NULL) &&
4868  (ctxt->sax->comment != NULL)) {
4869  if (buf == NULL) {
4870  if ((*in == '-') && (in[1] == '-'))
4871  size = nbchar + 1;
4872  else
4873  size = XML_PARSER_BUFFER_SIZE + nbchar;
4874  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875  if (buf == NULL) {
4876  xmlErrMemory(ctxt, NULL);
4877  ctxt->instate = state;
4878  return;
4879  }
4880  len = 0;
4881  } else if (len + nbchar + 1 >= size) {
4882  xmlChar *new_buf;
4883  size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884  new_buf = (xmlChar *) xmlRealloc(buf,
4885  size * sizeof(xmlChar));
4886  if (new_buf == NULL) {
4887  xmlFree (buf);
4888  xmlErrMemory(ctxt, NULL);
4889  ctxt->instate = state;
4890  return;
4891  }
4892  buf = new_buf;
4893  }
4894  memcpy(&buf[len], ctxt->input->cur, nbchar);
4895  len += nbchar;
4896  buf[len] = 0;
4897  }
4898  }
4899  if ((len > XML_MAX_TEXT_LENGTH) &&
4900  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902  "Comment too big found", NULL);
4903  xmlFree (buf);
4904  return;
4905  }
4906  ctxt->input->cur = in;
4907  if (*in == 0xA) {
4908  in++;
4909  ctxt->input->line++; ctxt->input->col = 1;
4910  }
4911  if (*in == 0xD) {
4912  in++;
4913  if (*in == 0xA) {
4914  ctxt->input->cur = in;
4915  in++;
4916  ctxt->input->line++; ctxt->input->col = 1;
4917  continue; /* while */
4918  }
4919  in--;
4920  }
4921  SHRINK;
4922  GROW;
4923  if (ctxt->instate == XML_PARSER_EOF) {
4924  xmlFree(buf);
4925  return;
4926  }
4927  in = ctxt->input->cur;
4928  if (*in == '-') {
4929  if (in[1] == '-') {
4930  if (in[2] == '>') {
4931  if (ctxt->input->id != inputid) {
4932  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4933  "comment doesn't start and stop in the"
4934  " same entity\n");
4935  }
4936  SKIP(3);
4937  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938  (!ctxt->disableSAX)) {
4939  if (buf != NULL)
4940  ctxt->sax->comment(ctxt->userData, buf);
4941  else
4942  ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943  }
4944  if (buf != NULL)
4945  xmlFree(buf);
4946  if (ctxt->instate != XML_PARSER_EOF)
4947  ctxt->instate = state;
4948  return;
4949  }
4950  if (buf != NULL) {
4951  xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952  "Double hyphen within comment: "
4953  "<!--%.50s\n",
4954  buf);
4955  } else
4956  xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957  "Double hyphen within comment\n", NULL);
4958  in++;
4959  ctxt->input->col++;
4960  }
4961  in++;
4962  ctxt->input->col++;
4963  goto get_more;
4964  }
4965  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4967  ctxt->instate = state;
4968  return;
4969 }
4970 
4971 
4983 const xmlChar *
4985  const xmlChar *name;
4986 
4987  name = xmlParseName(ctxt);
4988  if ((name != NULL) &&
4989  ((name[0] == 'x') || (name[0] == 'X')) &&
4990  ((name[1] == 'm') || (name[1] == 'M')) &&
4991  ((name[2] == 'l') || (name[2] == 'L'))) {
4992  int i;
4993  if ((name[0] == 'x') && (name[1] == 'm') &&
4994  (name[2] == 'l') && (name[3] == 0)) {
4995  xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4996  "XML declaration allowed only at the start of the document\n");
4997  return(name);
4998  } else if (name[3] == 0) {
5000  return(name);
5001  }
5002  for (i = 0;;i++) {
5003  if (xmlW3CPIs[i] == NULL) break;
5004  if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005  return(name);
5006  }
5007  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008  "xmlParsePITarget: invalid name prefix 'xml'\n",
5009  NULL, NULL);
5010  }
5011  if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5012  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5013  "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5014  }
5015  return(name);
5016 }
5017 
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 
5034 static void
5035 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036  xmlChar *URL = NULL;
5037  const xmlChar *tmp, *base;
5038  xmlChar marker;
5039 
5040  tmp = catalog;
5041  while (IS_BLANK_CH(*tmp)) tmp++;
5042  if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043  goto error;
5044  tmp += 7;
5045  while (IS_BLANK_CH(*tmp)) tmp++;
5046  if (*tmp != '=') {
5047  return;
5048  }
5049  tmp++;
5050  while (IS_BLANK_CH(*tmp)) tmp++;
5051  marker = *tmp;
5052  if ((marker != '\'') && (marker != '"'))
5053  goto error;
5054  tmp++;
5055  base = tmp;
5056  while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057  if (*tmp == 0)
5058  goto error;
5059  URL = xmlStrndup(base, tmp - base);
5060  tmp++;
5061  while (IS_BLANK_CH(*tmp)) tmp++;
5062  if (*tmp != 0)
5063  goto error;
5064 
5065  if (URL != NULL) {
5066  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067  xmlFree(URL);
5068  }
5069  return;
5070 
5071 error:
5072  xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073  "Catalog PI syntax error: %s\n",
5074  catalog, NULL);
5075  if (URL != NULL)
5076  xmlFree(URL);
5077 }
5078 #endif
5079 
5091 void
5093  xmlChar *buf = NULL;
5094  size_t len = 0;
5095  size_t size = XML_PARSER_BUFFER_SIZE;
5096  int cur, l;
5097  const xmlChar *target;
5099  int count = 0;
5100 
5101  if ((RAW == '<') && (NXT(1) == '?')) {
5102  int inputid = ctxt->input->id;
5103  state = ctxt->instate;
5104  ctxt->instate = XML_PARSER_PI;
5105  /*
5106  * this is a Processing Instruction.
5107  */
5108  SKIP(2);
5109  SHRINK;
5110 
5111  /*
5112  * Parse the target name and check for special support like
5113  * namespace.
5114  */
5115  target = xmlParsePITarget(ctxt);
5116  if (target != NULL) {
5117  if ((RAW == '?') && (NXT(1) == '>')) {
5118  if (inputid != ctxt->input->id) {
5119  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5120  "PI declaration doesn't start and stop in"
5121  " the same entity\n");
5122  }
5123  SKIP(2);
5124 
5125  /*
5126  * SAX: PI detected.
5127  */
5128  if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129  (ctxt->sax->processingInstruction != NULL))
5130  ctxt->sax->processingInstruction(ctxt->userData,
5131  target, NULL);
5132  if (ctxt->instate != XML_PARSER_EOF)
5133  ctxt->instate = state;
5134  return;
5135  }
5136  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5137  if (buf == NULL) {
5138  xmlErrMemory(ctxt, NULL);
5139  ctxt->instate = state;
5140  return;
5141  }
5142  if (SKIP_BLANKS == 0) {
5143  xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144  "ParsePI: PI %s space expected\n", target);
5145  }
5146  cur = CUR_CHAR(l);
5147  while (IS_CHAR(cur) && /* checked */
5148  ((cur != '?') || (NXT(1) != '>'))) {
5149  if (len + 5 >= size) {
5150  xmlChar *tmp;
5151  size_t new_size = size * 2;
5152  tmp = (xmlChar *) xmlRealloc(buf, new_size);
5153  if (tmp == NULL) {
5154  xmlErrMemory(ctxt, NULL);
5155  xmlFree(buf);
5156  ctxt->instate = state;
5157  return;
5158  }
5159  buf = tmp;
5160  size = new_size;
5161  }
5162  count++;
5163  if (count > 50) {
5164  GROW;
5165  if (ctxt->instate == XML_PARSER_EOF) {
5166  xmlFree(buf);
5167  return;
5168  }
5169  count = 0;
5170  if ((len > XML_MAX_TEXT_LENGTH) &&
5171  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172  xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173  "PI %s too big found", target);
5174  xmlFree(buf);
5175  ctxt->instate = state;
5176  return;
5177  }
5178  }
5179  COPY_BUF(l,buf,len,cur);
5180  NEXTL(l);
5181  cur = CUR_CHAR(l);
5182  if (cur == 0) {
5183  SHRINK;
5184  GROW;
5185  cur = CUR_CHAR(l);
5186  }
5187  }
5188  if ((len > XML_MAX_TEXT_LENGTH) &&
5189  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190  xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191  "PI %s too big found", target);
5192