ReactOS  0.4.15-dev-5452-g3c95c95
parser.c
Go to the documentation of this file.
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  * implemented on top of the SAX interfaces
4  *
5  * References:
6  * The XML specification:
7  * http://www.w3.org/TR/REC-xml
8  * Original 1.0 version:
9  * http://www.w3.org/TR/1998/REC-xml-19980210
10  * XML second edition working draft
11  * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
72 #endif
73 
74 #include "buf.h"
75 #include "enc.h"
76 
77 struct _xmlStartTag {
78  const xmlChar *prefix;
79  const xmlChar *URI;
80  int line;
81  int nsNr;
82 };
83 
84 static void
86 
87 static xmlParserCtxtPtr
89  const xmlChar *base, xmlParserCtxtPtr pctx);
90 
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92 
93 static int
95 
96 static void
98 
99 /************************************************************************
100  * *
101  * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102  * *
103  ************************************************************************/
104 
105 #define XML_MAX_HUGE_LENGTH 1000000000
106 
107 #define XML_PARSER_BIG_ENTITY 1000
108 #define XML_PARSER_LOT_ENTITY 5000
109 
110 /*
111  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112  * replacement over the size in byte of the input indicates that you have
113  * and exponential behaviour. A value of 10 correspond to at least 3 entity
114  * replacement per byte of input.
115  */
116 #define XML_PARSER_NON_LINEAR 10
117 
118 /*
119  * xmlParserEntityCheck
120  *
121  * Function to check non-linear entity expansion behaviour
122  * This is here to detect and stop exponential linear entity expansion
123  * This is not a limitation of the parser but a safety
124  * boundary feature. It can be disabled with the XML_PARSE_HUGE
125  * parser option.
126  */
127 static int
129  xmlEntityPtr ent, size_t replacement)
130 {
131  size_t consumed = 0;
132  int i;
133 
134  if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135  return (0);
136  if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137  return (1);
138 
139  /*
140  * This may look absurd but is needed to detect
141  * entities problems
142  */
143  if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144  (ent->content != NULL) && (ent->checked == 0) &&
145  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146  unsigned long oldnbent = ctxt->nbentities, diff;
147  xmlChar *rep;
148 
149  ent->checked = 1;
150 
151  ++ctxt->depth;
152  rep = xmlStringDecodeEntities(ctxt, ent->content,
153  XML_SUBSTITUTE_REF, 0, 0, 0);
154  --ctxt->depth;
155  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156  ent->content[0] = 0;
157  }
158 
159  diff = ctxt->nbentities - oldnbent + 1;
160  if (diff > INT_MAX / 2)
161  diff = INT_MAX / 2;
162  ent->checked = diff * 2;
163  if (rep != NULL) {
164  if (xmlStrchr(rep, '<'))
165  ent->checked |= 1;
166  xmlFree(rep);
167  rep = NULL;
168  }
169  }
170 
171  /*
172  * Prevent entity exponential check, not just replacement while
173  * parsing the DTD
174  * The check is potentially costly so do that only once in a thousand
175  */
176  if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177  (ctxt->nbentities % 1024 == 0)) {
178  for (i = 0;i < ctxt->inputNr;i++) {
179  consumed += ctxt->inputTab[i]->consumed +
180  (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
181  }
182  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
184  ctxt->instate = XML_PARSER_EOF;
185  return (1);
186  }
187  consumed = 0;
188  }
189 
190 
191 
192  if (replacement != 0) {
193  if (replacement < XML_MAX_TEXT_LENGTH)
194  return(0);
195 
196  /*
197  * If the volume of entity copy reaches 10 times the
198  * amount of parsed data and over the large text threshold
199  * then that's very likely to be an abuse.
200  */
201  if (ctxt->input != NULL) {
202  consumed = ctxt->input->consumed +
203  (ctxt->input->cur - ctxt->input->base);
204  }
205  consumed += ctxt->sizeentities;
206 
207  if (replacement < XML_PARSER_NON_LINEAR * consumed)
208  return(0);
209  } else if (size != 0) {
210  /*
211  * Do the check based on the replacement size of the entity
212  */
214  return(0);
215 
216  /*
217  * A limit on the amount of text data reasonably used
218  */
219  if (ctxt->input != NULL) {
220  consumed = ctxt->input->consumed +
221  (ctxt->input->cur - ctxt->input->base);
222  }
223  consumed += ctxt->sizeentities;
224 
225  if ((size < XML_PARSER_NON_LINEAR * consumed) &&
226  (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
227  return (0);
228  } else if (ent != NULL) {
229  /*
230  * use the number of parsed entities in the replacement
231  */
232  size = ent->checked / 2;
233 
234  /*
235  * The amount of data parsed counting entities size only once
236  */
237  if (ctxt->input != NULL) {
238  consumed = ctxt->input->consumed +
239  (ctxt->input->cur - ctxt->input->base);
240  }
241  consumed += ctxt->sizeentities;
242 
243  /*
244  * Check the density of entities for the amount of data
245  * knowing an entity reference will take at least 3 bytes
246  */
247  if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
248  return (0);
249  } else {
250  /*
251  * strange we got no data for checking
252  */
253  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
255  (ctxt->nbentities <= 10000))
256  return (0);
257  }
259  return (1);
260 }
261 
270 unsigned int xmlParserMaxDepth = 256;
271 
272 
273 
274 #define SAX2 1
275 #define XML_PARSER_BIG_BUFFER_SIZE 300
276 #define XML_PARSER_BUFFER_SIZE 100
277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278 
288 #define XML_PARSER_CHUNK_SIZE 100
289 
290 /*
291  * List of XML prefixed PI allowed by W3C specs
292  */
293 
294 static const char* const xmlW3CPIs[] = {
295  "xml-stylesheet",
296  "xml-model",
297  NULL
298 };
299 
300 
301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
303  const xmlChar **str);
304 
305 static xmlParserErrors
308  void *user_data, int depth, const xmlChar *URL,
309  const xmlChar *ID, xmlNodePtr *list);
310 
311 static int
313  const char *encoding);
314 #ifdef LIBXML_LEGACY_ENABLED
315 static void
316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317  xmlNodePtr lastNode);
318 #endif /* LIBXML_LEGACY_ENABLED */
319 
320 static xmlParserErrors
322  const xmlChar *string, void *user_data, xmlNodePtr *lst);
323 
324 static int
326 
327 /************************************************************************
328  * *
329  * Some factorized error routines *
330  * *
331  ************************************************************************/
332 
341 static void
343  const xmlChar * localname)
344 {
345  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346  (ctxt->instate == XML_PARSER_EOF))
347  return;
348  if (ctxt != NULL)
350 
351  if (prefix == NULL)
352  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
354  (const char *) localname, NULL, NULL, 0, 0,
355  "Attribute %s redefined\n", localname);
356  else
357  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
359  (const char *) prefix, (const char *) localname,
360  NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361  localname);
362  if (ctxt != NULL) {
363  ctxt->wellFormed = 0;
364  if (ctxt->recovery == 0)
365  ctxt->disableSAX = 1;
366  }
367 }
368 
377 static void
379 {
380  const char *errmsg;
381 
382  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383  (ctxt->instate == XML_PARSER_EOF))
384  return;
385  switch (error) {
387  errmsg = "CharRef: invalid hexadecimal value";
388  break;
390  errmsg = "CharRef: invalid decimal value";
391  break;
393  errmsg = "CharRef: invalid value";
394  break;
396  errmsg = "internal error";
397  break;
399  errmsg = "PEReference at end of document";
400  break;
402  errmsg = "PEReference in prolog";
403  break;
405  errmsg = "PEReference in epilog";
406  break;
408  errmsg = "PEReference: no name";
409  break;
411  errmsg = "PEReference: expecting ';'";
412  break;
413  case XML_ERR_ENTITY_LOOP:
414  errmsg = "Detected an entity reference loop";
415  break;
417  errmsg = "EntityValue: \" or ' expected";
418  break;
420  errmsg = "PEReferences forbidden in internal subset";
421  break;
423  errmsg = "EntityValue: \" or ' expected";
424  break;
426  errmsg = "AttValue: \" or ' expected";
427  break;
429  errmsg = "Unescaped '<' not allowed in attributes values";
430  break;
432  errmsg = "SystemLiteral \" or ' expected";
433  break;
435  errmsg = "Unfinished System or Public ID \" or ' expected";
436  break;
438  errmsg = "Sequence ']]>' not allowed in content";
439  break;
441  errmsg = "SYSTEM or PUBLIC, the URI is missing";
442  break;
444  errmsg = "PUBLIC, the Public Identifier is missing";
445  break;
447  errmsg = "Comment must not contain '--' (double-hyphen)";
448  break;
450  errmsg = "xmlParsePI : no target name";
451  break;
453  errmsg = "Invalid PI name";
454  break;
456  errmsg = "NOTATION: Name expected here";
457  break;
459  errmsg = "'>' required to close NOTATION declaration";
460  break;
462  errmsg = "Entity value required";
463  break;
465  errmsg = "Fragment not allowed";
466  break;
468  errmsg = "'(' required to start ATTLIST enumeration";
469  break;
471  errmsg = "NmToken expected in ATTLIST enumeration";
472  break;
474  errmsg = "')' required to finish ATTLIST enumeration";
475  break;
477  errmsg = "MixedContentDecl : '|' or ')*' expected";
478  break;
480  errmsg = "MixedContentDecl : '#PCDATA' expected";
481  break;
483  errmsg = "ContentDecl : Name or '(' expected";
484  break;
486  errmsg = "ContentDecl : ',' '|' or ')' expected";
487  break;
489  errmsg =
490  "PEReference: forbidden within markup decl in internal subset";
491  break;
492  case XML_ERR_GT_REQUIRED:
493  errmsg = "expected '>'";
494  break;
496  errmsg = "XML conditional section '[' expected";
497  break;
499  errmsg = "Content error in the external subset";
500  break;
502  errmsg =
503  "conditional section INCLUDE or IGNORE keyword expected";
504  break;
506  errmsg = "XML conditional section not closed";
507  break;
509  errmsg = "Text declaration '<?xml' required";
510  break;
512  errmsg = "parsing XML declaration: '?>' expected";
513  break;
515  errmsg = "external parsed entities cannot be standalone";
516  break;
518  errmsg = "EntityRef: expecting ';'";
519  break;
521  errmsg = "DOCTYPE improperly terminated";
522  break;
524  errmsg = "EndTag: '</' not found";
525  break;
527  errmsg = "expected '='";
528  break;
530  errmsg = "String not closed expecting \" or '";
531  break;
533  errmsg = "String not started expecting ' or \"";
534  break;
536  errmsg = "Invalid XML encoding name";
537  break;
539  errmsg = "standalone accepts only 'yes' or 'no'";
540  break;
542  errmsg = "Document is empty";
543  break;
545  errmsg = "Extra content at the end of the document";
546  break;
548  errmsg = "chunk is not well balanced";
549  break;
551  errmsg = "extra content at the end of well balanced chunk";
552  break;
554  errmsg = "Malformed declaration expecting version";
555  break;
557  errmsg = "Name too long";
558  break;
559 #if 0
560  case:
561  errmsg = "";
562  break;
563 #endif
564  default:
565  errmsg = "Unregistered error message";
566  }
567  if (ctxt != NULL)
568  ctxt->errNo = error;
569  if (info == NULL) {
570  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572  errmsg);
573  } else {
574  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576  errmsg, info);
577  }
578  if (ctxt != NULL) {
579  ctxt->wellFormed = 0;
580  if (ctxt->recovery == 0)
581  ctxt->disableSAX = 1;
582  }
583 }
584 
593 static void LIBXML_ATTR_FORMAT(3,0)
594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595  const char *msg)
596 {
597  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598  (ctxt->instate == XML_PARSER_EOF))
599  return;
600  if (ctxt != NULL)
601  ctxt->errNo = error;
602  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603  XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604  if (ctxt != NULL) {
605  ctxt->wellFormed = 0;
606  if (ctxt->recovery == 0)
607  ctxt->disableSAX = 1;
608  }
609 }
610 
621 static void LIBXML_ATTR_FORMAT(3,0)
622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623  const char *msg, const xmlChar *str1, const xmlChar *str2)
624 {
625  xmlStructuredErrorFunc schannel = NULL;
626 
627  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628  (ctxt->instate == XML_PARSER_EOF))
629  return;
630  if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631  (ctxt->sax->initialized == XML_SAX2_MAGIC))
632  schannel = ctxt->sax->serror;
633  if (ctxt != NULL) {
634  __xmlRaiseError(schannel,
635  (ctxt->sax) ? ctxt->sax->warning : NULL,
636  ctxt->userData,
637  ctxt, NULL, XML_FROM_PARSER, error,
638  XML_ERR_WARNING, NULL, 0,
639  (const char *) str1, (const char *) str2, NULL, 0, 0,
640  msg, (const char *) str1, (const char *) str2);
641  } else {
642  __xmlRaiseError(schannel, NULL, NULL,
643  ctxt, NULL, XML_FROM_PARSER, error,
644  XML_ERR_WARNING, NULL, 0,
645  (const char *) str1, (const char *) str2, NULL, 0, 0,
646  msg, (const char *) str1, (const char *) str2);
647  }
648 }
649 
659 static void LIBXML_ATTR_FORMAT(3,0)
660 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661  const char *msg, const xmlChar *str1, const xmlChar *str2)
662 {
663  xmlStructuredErrorFunc schannel = NULL;
664 
665  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666  (ctxt->instate == XML_PARSER_EOF))
667  return;
668  if (ctxt != NULL) {
669  ctxt->errNo = error;
670  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671  schannel = ctxt->sax->serror;
672  }
673  if (ctxt != NULL) {
674  __xmlRaiseError(schannel,
675  ctxt->vctxt.error, ctxt->vctxt.userData,
676  ctxt, NULL, XML_FROM_DTD, error,
677  XML_ERR_ERROR, NULL, 0, (const char *) str1,
678  (const char *) str2, NULL, 0, 0,
679  msg, (const char *) str1, (const char *) str2);
680  ctxt->valid = 0;
681  } else {
682  __xmlRaiseError(schannel, NULL, NULL,
683  ctxt, NULL, XML_FROM_DTD, error,
684  XML_ERR_ERROR, NULL, 0, (const char *) str1,
685  (const char *) str2, NULL, 0, 0,
686  msg, (const char *) str1, (const char *) str2);
687  }
688 }
689 
699 static void LIBXML_ATTR_FORMAT(3,0)
700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701  const char *msg, int val)
702 {
703  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704  (ctxt->instate == XML_PARSER_EOF))
705  return;
706  if (ctxt != NULL)
707  ctxt->errNo = error;
708  __xmlRaiseError(NULL, NULL, NULL,
710  NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711  if (ctxt != NULL) {
712  ctxt->wellFormed = 0;
713  if (ctxt->recovery == 0)
714  ctxt->disableSAX = 1;
715  }
716 }
717 
729 static void LIBXML_ATTR_FORMAT(3,0)
730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731  const char *msg, const xmlChar *str1, int val,
732  const xmlChar *str2)
733 {
734  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735  (ctxt->instate == XML_PARSER_EOF))
736  return;
737  if (ctxt != NULL)
738  ctxt->errNo = error;
739  __xmlRaiseError(NULL, NULL, NULL,
741  NULL, 0, (const char *) str1, (const char *) str2,
742  NULL, val, 0, msg, str1, val, str2);
743  if (ctxt != NULL) {
744  ctxt->wellFormed = 0;
745  if (ctxt->recovery == 0)
746  ctxt->disableSAX = 1;
747  }
748 }
749 
759 static void LIBXML_ATTR_FORMAT(3,0)
760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761  const char *msg, const xmlChar * val)
762 {
763  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764  (ctxt->instate == XML_PARSER_EOF))
765  return;
766  if (ctxt != NULL)
767  ctxt->errNo = error;
768  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771  val);
772  if (ctxt != NULL) {
773  ctxt->wellFormed = 0;
774  if (ctxt->recovery == 0)
775  ctxt->disableSAX = 1;
776  }
777 }
778 
788 static void LIBXML_ATTR_FORMAT(3,0)
789 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790  const char *msg, const xmlChar * val)
791 {
792  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793  (ctxt->instate == XML_PARSER_EOF))
794  return;
795  if (ctxt != NULL)
796  ctxt->errNo = error;
797  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
799  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800  val);
801 }
802 
813 static void LIBXML_ATTR_FORMAT(3,0)
814 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
815  const char *msg,
816  const xmlChar * info1, const xmlChar * info2,
817  const xmlChar * info3)
818 {
819  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820  (ctxt->instate == XML_PARSER_EOF))
821  return;
822  if (ctxt != NULL)
823  ctxt->errNo = error;
824  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825  XML_ERR_ERROR, NULL, 0, (const char *) info1,
826  (const char *) info2, (const char *) info3, 0, 0, msg,
827  info1, info2, info3);
828  if (ctxt != NULL)
829  ctxt->nsWellFormed = 0;
830 }
831 
842 static void LIBXML_ATTR_FORMAT(3,0)
843 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
844  const char *msg,
845  const xmlChar * info1, const xmlChar * info2,
846  const xmlChar * info3)
847 {
848  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849  (ctxt->instate == XML_PARSER_EOF))
850  return;
851  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852  XML_ERR_WARNING, NULL, 0, (const char *) info1,
853  (const char *) info2, (const char *) info3, 0, 0, msg,
854  info1, info2, info3);
855 }
856 
857 /************************************************************************
858  * *
859  * Library wide options *
860  * *
861  ************************************************************************/
862 
873 int
875 {
876  switch (feature) {
877  case XML_WITH_THREAD:
878 #ifdef LIBXML_THREAD_ENABLED
879  return(1);
880 #else
881  return(0);
882 #endif
883  case XML_WITH_TREE:
884 #ifdef LIBXML_TREE_ENABLED
885  return(1);
886 #else
887  return(0);
888 #endif
889  case XML_WITH_OUTPUT:
890 #ifdef LIBXML_OUTPUT_ENABLED
891  return(1);
892 #else
893  return(0);
894 #endif
895  case XML_WITH_PUSH:
896 #ifdef LIBXML_PUSH_ENABLED
897  return(1);
898 #else
899  return(0);
900 #endif
901  case XML_WITH_READER:
902 #ifdef LIBXML_READER_ENABLED
903  return(1);
904 #else
905  return(0);
906 #endif
907  case XML_WITH_PATTERN:
908 #ifdef LIBXML_PATTERN_ENABLED
909  return(1);
910 #else
911  return(0);
912 #endif
913  case XML_WITH_WRITER:
914 #ifdef LIBXML_WRITER_ENABLED
915  return(1);
916 #else
917  return(0);
918 #endif
919  case XML_WITH_SAX1:
920 #ifdef LIBXML_SAX1_ENABLED
921  return(1);
922 #else
923  return(0);
924 #endif
925  case XML_WITH_FTP:
926 #ifdef LIBXML_FTP_ENABLED
927  return(1);
928 #else
929  return(0);
930 #endif
931  case XML_WITH_HTTP:
932 #ifdef LIBXML_HTTP_ENABLED
933  return(1);
934 #else
935  return(0);
936 #endif
937  case XML_WITH_VALID:
938 #ifdef LIBXML_VALID_ENABLED
939  return(1);
940 #else
941  return(0);
942 #endif
943  case XML_WITH_HTML:
944 #ifdef LIBXML_HTML_ENABLED
945  return(1);
946 #else
947  return(0);
948 #endif
949  case XML_WITH_LEGACY:
950 #ifdef LIBXML_LEGACY_ENABLED
951  return(1);
952 #else
953  return(0);
954 #endif
955  case XML_WITH_C14N:
956 #ifdef LIBXML_C14N_ENABLED
957  return(1);
958 #else
959  return(0);
960 #endif
961  case XML_WITH_CATALOG:
962 #ifdef LIBXML_CATALOG_ENABLED
963  return(1);
964 #else
965  return(0);
966 #endif
967  case XML_WITH_XPATH:
968 #ifdef LIBXML_XPATH_ENABLED
969  return(1);
970 #else
971  return(0);
972 #endif
973  case XML_WITH_XPTR:
974 #ifdef LIBXML_XPTR_ENABLED
975  return(1);
976 #else
977  return(0);
978 #endif
979  case XML_WITH_XINCLUDE:
980 #ifdef LIBXML_XINCLUDE_ENABLED
981  return(1);
982 #else
983  return(0);
984 #endif
985  case XML_WITH_ICONV:
986 #ifdef LIBXML_ICONV_ENABLED
987  return(1);
988 #else
989  return(0);
990 #endif
991  case XML_WITH_ISO8859X:
992 #ifdef LIBXML_ISO8859X_ENABLED
993  return(1);
994 #else
995  return(0);
996 #endif
997  case XML_WITH_UNICODE:
998 #ifdef LIBXML_UNICODE_ENABLED
999  return(1);
1000 #else
1001  return(0);
1002 #endif
1003  case XML_WITH_REGEXP:
1004 #ifdef LIBXML_REGEXP_ENABLED
1005  return(1);
1006 #else
1007  return(0);
1008 #endif
1009  case XML_WITH_AUTOMATA:
1010 #ifdef LIBXML_AUTOMATA_ENABLED
1011  return(1);
1012 #else
1013  return(0);
1014 #endif
1015  case XML_WITH_EXPR:
1016 #ifdef LIBXML_EXPR_ENABLED
1017  return(1);
1018 #else
1019  return(0);
1020 #endif
1021  case XML_WITH_SCHEMAS:
1022 #ifdef LIBXML_SCHEMAS_ENABLED
1023  return(1);
1024 #else
1025  return(0);
1026 #endif
1027  case XML_WITH_SCHEMATRON:
1028 #ifdef LIBXML_SCHEMATRON_ENABLED
1029  return(1);
1030 #else
1031  return(0);
1032 #endif
1033  case XML_WITH_MODULES:
1034 #ifdef LIBXML_MODULES_ENABLED
1035  return(1);
1036 #else
1037  return(0);
1038 #endif
1039  case XML_WITH_DEBUG:
1040 #ifdef LIBXML_DEBUG_ENABLED
1041  return(1);
1042 #else
1043  return(0);
1044 #endif
1045  case XML_WITH_DEBUG_MEM:
1046 #ifdef DEBUG_MEMORY_LOCATION
1047  return(1);
1048 #else
1049  return(0);
1050 #endif
1051  case XML_WITH_DEBUG_RUN:
1052 #ifdef LIBXML_DEBUG_RUNTIME
1053  return(1);
1054 #else
1055  return(0);
1056 #endif
1057  case XML_WITH_ZLIB:
1058 #ifdef LIBXML_ZLIB_ENABLED
1059  return(1);
1060 #else
1061  return(0);
1062 #endif
1063  case XML_WITH_LZMA:
1064 #ifdef LIBXML_LZMA_ENABLED
1065  return(1);
1066 #else
1067  return(0);
1068 #endif
1069  case XML_WITH_ICU:
1070 #ifdef LIBXML_ICU_ENABLED
1071  return(1);
1072 #else
1073  return(0);
1074 #endif
1075  default:
1076  break;
1077  }
1078  return(0);
1079 }
1080 
1081 /************************************************************************
1082  * *
1083  * SAX2 defaulted attributes handling *
1084  * *
1085  ************************************************************************/
1086 
1093 static void
1096 
1097  /* Avoid unused variable warning if features are disabled. */
1098  (void) sax;
1099 
1100  if (ctxt == NULL) return;
1101  sax = ctxt->sax;
1102 #ifdef LIBXML_SAX1_ENABLED
1103  if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1104  ((sax->startElementNs != NULL) ||
1105  (sax->endElementNs != NULL) ||
1106  ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107  ctxt->sax2 = 1;
1108 #else
1109  ctxt->sax2 = 1;
1110 #endif /* LIBXML_SAX1_ENABLED */
1111 
1112  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115  if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116  (ctxt->str_xml_ns == NULL)) {
1117  xmlErrMemory(ctxt, NULL);
1118  }
1119 }
1120 
1121 typedef struct _xmlDefAttrs xmlDefAttrs;
1124  int nbAttrs; /* number of defaulted attributes on that element */
1125  int maxAttrs; /* the size of the array */
1126 #if __STDC_VERSION__ >= 199901L
1127  /* Using a C99 flexible array member avoids UBSan errors. */
1128  const xmlChar *values[]; /* array of localname/prefix/values/external */
1129 #else
1130  const xmlChar *values[5];
1131 #endif
1132 };
1133 
1151 static xmlChar *
1153 {
1154  if ((src == NULL) || (dst == NULL))
1155  return(NULL);
1156 
1157  while (*src == 0x20) src++;
1158  while (*src != 0) {
1159  if (*src == 0x20) {
1160  while (*src == 0x20) src++;
1161  if (*src != 0)
1162  *dst++ = 0x20;
1163  } else {
1164  *dst++ = *src++;
1165  }
1166  }
1167  *dst = 0;
1168  if (dst == src)
1169  return(NULL);
1170  return(dst);
1171 }
1172 
1184 static const xmlChar *
1186 {
1187  int i;
1188  int remove_head = 0;
1189  int need_realloc = 0;
1190  const xmlChar *cur;
1191 
1192  if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193  return(NULL);
1194  i = *len;
1195  if (i <= 0)
1196  return(NULL);
1197 
1198  cur = src;
1199  while (*cur == 0x20) {
1200  cur++;
1201  remove_head++;
1202  }
1203  while (*cur != 0) {
1204  if (*cur == 0x20) {
1205  cur++;
1206  if ((*cur == 0x20) || (*cur == 0)) {
1207  need_realloc = 1;
1208  break;
1209  }
1210  } else
1211  cur++;
1212  }
1213  if (need_realloc) {
1214  xmlChar *ret;
1215 
1216  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217  if (ret == NULL) {
1218  xmlErrMemory(ctxt, NULL);
1219  return(NULL);
1220  }
1222  *len = (int) strlen((const char *)ret);
1223  return(ret);
1224  } else if (remove_head) {
1225  *len -= remove_head;
1226  memmove(src, src + remove_head, 1 + *len);
1227  return(src);
1228  }
1229  return(NULL);
1230 }
1231 
1241 static void
1243  const xmlChar *fullname,
1244  const xmlChar *fullattr,
1245  const xmlChar *value) {
1247  int len;
1248  const xmlChar *name;
1249  const xmlChar *prefix;
1250 
1251  /*
1252  * Allows to detect attribute redefinitions
1253  */
1254  if (ctxt->attsSpecial != NULL) {
1255  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256  return;
1257  }
1258 
1259  if (ctxt->attsDefault == NULL) {
1260  ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261  if (ctxt->attsDefault == NULL)
1262  goto mem_error;
1263  }
1264 
1265  /*
1266  * split the element name into prefix:localname , the string found
1267  * are within the DTD and then not associated to namespace names.
1268  */
1270  if (name == NULL) {
1271  name = xmlDictLookup(ctxt->dict, fullname, -1);
1272  prefix = NULL;
1273  } else {
1274  name = xmlDictLookup(ctxt->dict, name, -1);
1275  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1276  }
1277 
1278  /*
1279  * make sure there is some storage
1280  */
1281  defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282  if (defaults == NULL) {
1284  (4 * 5) * sizeof(const xmlChar *));
1285  if (defaults == NULL)
1286  goto mem_error;
1287  defaults->nbAttrs = 0;
1288  defaults->maxAttrs = 4;
1289  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290  defaults, NULL) < 0) {
1291  xmlFree(defaults);
1292  goto mem_error;
1293  }
1294  } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1296 
1298  (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299  if (temp == NULL)
1300  goto mem_error;
1301  defaults = temp;
1302  defaults->maxAttrs *= 2;
1303  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304  defaults, NULL) < 0) {
1305  xmlFree(defaults);
1306  goto mem_error;
1307  }
1308  }
1309 
1310  /*
1311  * Split the element name into prefix:localname , the string found
1312  * are within the DTD and hen not associated to namespace names.
1313  */
1314  name = xmlSplitQName3(fullattr, &len);
1315  if (name == NULL) {
1316  name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317  prefix = NULL;
1318  } else {
1319  name = xmlDictLookup(ctxt->dict, name, -1);
1320  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321  }
1322 
1323  defaults->values[5 * defaults->nbAttrs] = name;
1324  defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325  /* intern the string and precompute the end */
1326  len = xmlStrlen(value);
1327  value = xmlDictLookup(ctxt->dict, value, len);
1328  defaults->values[5 * defaults->nbAttrs + 2] = value;
1329  defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330  if (ctxt->external)
1331  defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332  else
1333  defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334  defaults->nbAttrs++;
1335 
1336  return;
1337 
1338 mem_error:
1339  xmlErrMemory(ctxt, NULL);
1340  return;
1341 }
1342 
1352 static void
1354  const xmlChar *fullname,
1355  const xmlChar *fullattr,
1356  int type)
1357 {
1358  if (ctxt->attsSpecial == NULL) {
1359  ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360  if (ctxt->attsSpecial == NULL)
1361  goto mem_error;
1362  }
1363 
1364  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365  return;
1366 
1367  xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368  (void *) (ptrdiff_t) type);
1369  return;
1370 
1371 mem_error:
1372  xmlErrMemory(ctxt, NULL);
1373  return;
1374 }
1375 
1381 static void
1382 xmlCleanSpecialAttrCallback(void *payload, void *data,
1383  const xmlChar *fullname, const xmlChar *fullattr,
1384  const xmlChar *unused ATTRIBUTE_UNUSED) {
1386 
1387  if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388  xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1389  }
1390 }
1391 
1400 static void
1402 {
1403  if (ctxt->attsSpecial == NULL)
1404  return;
1405 
1407 
1408  if (xmlHashSize(ctxt->attsSpecial) == 0) {
1409  xmlHashFree(ctxt->attsSpecial, NULL);
1410  ctxt->attsSpecial = NULL;
1411  }
1412  return;
1413 }
1414 
1473 int
1475 {
1476  const xmlChar *cur = lang, *nxt;
1477 
1478  if (cur == NULL)
1479  return (0);
1480  if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481  ((cur[0] == 'I') && (cur[1] == '-')) ||
1482  ((cur[0] == 'x') && (cur[1] == '-')) ||
1483  ((cur[0] == 'X') && (cur[1] == '-'))) {
1484  /*
1485  * Still allow IANA code and user code which were coming
1486  * from the previous version of the XML-1.0 specification
1487  * it's deprecated but we should not fail
1488  */
1489  cur += 2;
1490  while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491  ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492  cur++;
1493  return(cur[0] == 0);
1494  }
1495  nxt = cur;
1496  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498  nxt++;
1499  if (nxt - cur >= 4) {
1500  /*
1501  * Reserved
1502  */
1503  if ((nxt - cur > 8) || (nxt[0] != 0))
1504  return(0);
1505  return(1);
1506  }
1507  if (nxt - cur < 2)
1508  return(0);
1509  /* we got an ISO 639 code */
1510  if (nxt[0] == 0)
1511  return(1);
1512  if (nxt[0] != '-')
1513  return(0);
1514 
1515  nxt++;
1516  cur = nxt;
1517  /* now we can have extlang or script or region or variant */
1518  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519  goto region_m49;
1520 
1521  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523  nxt++;
1524  if (nxt - cur == 4)
1525  goto script;
1526  if (nxt - cur == 2)
1527  goto region;
1528  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529  goto variant;
1530  if (nxt - cur != 3)
1531  return(0);
1532  /* we parsed an extlang */
1533  if (nxt[0] == 0)
1534  return(1);
1535  if (nxt[0] != '-')
1536  return(0);
1537 
1538  nxt++;
1539  cur = nxt;
1540  /* now we can have script or region or variant */
1541  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542  goto region_m49;
1543 
1544  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546  nxt++;
1547  if (nxt - cur == 2)
1548  goto region;
1549  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550  goto variant;
1551  if (nxt - cur != 4)
1552  return(0);
1553  /* we parsed a script */
1554 script:
1555  if (nxt[0] == 0)
1556  return(1);
1557  if (nxt[0] != '-')
1558  return(0);
1559 
1560  nxt++;
1561  cur = nxt;
1562  /* now we can have region or variant */
1563  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564  goto region_m49;
1565 
1566  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568  nxt++;
1569 
1570  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571  goto variant;
1572  if (nxt - cur != 2)
1573  return(0);
1574  /* we parsed a region */
1575 region:
1576  if (nxt[0] == 0)
1577  return(1);
1578  if (nxt[0] != '-')
1579  return(0);
1580 
1581  nxt++;
1582  cur = nxt;
1583  /* now we can just have a variant */
1584  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586  nxt++;
1587 
1588  if ((nxt - cur < 5) || (nxt - cur > 8))
1589  return(0);
1590 
1591  /* we parsed a variant */
1592 variant:
1593  if (nxt[0] == 0)
1594  return(1);
1595  if (nxt[0] != '-')
1596  return(0);
1597  /* extensions and private use subtags not checked */
1598  return (1);
1599 
1600 region_m49:
1601  if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602  ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603  nxt += 3;
1604  goto region;
1605  }
1606  return(0);
1607 }
1608 
1609 /************************************************************************
1610  * *
1611  * Parser stacks related functions and macros *
1612  * *
1613  ************************************************************************/
1614 
1616  const xmlChar ** str);
1617 
1618 #ifdef SAX2
1619 
1630 static int
1631 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1632 {
1633  if (ctxt->options & XML_PARSE_NSCLEAN) {
1634  int i;
1635  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636  if (ctxt->nsTab[i] == prefix) {
1637  /* in scope */
1638  if (ctxt->nsTab[i + 1] == URL)
1639  return(-2);
1640  /* out of scope keep it */
1641  break;
1642  }
1643  }
1644  }
1645  if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646  ctxt->nsMax = 10;
1647  ctxt->nsNr = 0;
1648  ctxt->nsTab = (const xmlChar **)
1649  xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650  if (ctxt->nsTab == NULL) {
1651  xmlErrMemory(ctxt, NULL);
1652  ctxt->nsMax = 0;
1653  return (-1);
1654  }
1655  } else if (ctxt->nsNr >= ctxt->nsMax) {
1656  const xmlChar ** tmp;
1657  ctxt->nsMax *= 2;
1658  tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659  ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660  if (tmp == NULL) {
1661  xmlErrMemory(ctxt, NULL);
1662  ctxt->nsMax /= 2;
1663  return (-1);
1664  }
1665  ctxt->nsTab = tmp;
1666  }
1667  ctxt->nsTab[ctxt->nsNr++] = prefix;
1668  ctxt->nsTab[ctxt->nsNr++] = URL;
1669  return (ctxt->nsNr);
1670 }
1680 static int
1682 {
1683  int i;
1684 
1685  if (ctxt->nsTab == NULL) return(0);
1686  if (ctxt->nsNr < nr) {
1687  xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688  nr = ctxt->nsNr;
1689  }
1690  if (ctxt->nsNr <= 0)
1691  return (0);
1692 
1693  for (i = 0;i < nr;i++) {
1694  ctxt->nsNr--;
1695  ctxt->nsTab[ctxt->nsNr] = NULL;
1696  }
1697  return(nr);
1698 }
1699 #endif
1700 
1701 static int
1703  const xmlChar **atts;
1704  int *attallocs;
1705  int maxatts;
1706 
1707  if (ctxt->atts == NULL) {
1708  maxatts = 55; /* allow for 10 attrs by default */
1709  atts = (const xmlChar **)
1710  xmlMalloc(maxatts * sizeof(xmlChar *));
1711  if (atts == NULL) goto mem_error;
1712  ctxt->atts = atts;
1713  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714  if (attallocs == NULL) goto mem_error;
1715  ctxt->attallocs = attallocs;
1716  ctxt->maxatts = maxatts;
1717  } else if (nr + 5 > ctxt->maxatts) {
1718  maxatts = (nr + 5) * 2;
1719  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720  maxatts * sizeof(const xmlChar *));
1721  if (atts == NULL) goto mem_error;
1722  ctxt->atts = atts;
1723  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724  (maxatts / 5) * sizeof(int));
1725  if (attallocs == NULL) goto mem_error;
1726  ctxt->attallocs = attallocs;
1727  ctxt->maxatts = maxatts;
1728  }
1729  return(ctxt->maxatts);
1730 mem_error:
1731  xmlErrMemory(ctxt, NULL);
1732  return(-1);
1733 }
1734 
1744 int
1746 {
1747  if ((ctxt == NULL) || (value == NULL))
1748  return(-1);
1749  if (ctxt->inputNr >= ctxt->inputMax) {
1750  ctxt->inputMax *= 2;
1751  ctxt->inputTab =
1753  ctxt->inputMax *
1754  sizeof(ctxt->inputTab[0]));
1755  if (ctxt->inputTab == NULL) {
1756  xmlErrMemory(ctxt, NULL);
1757  ctxt->inputMax /= 2;
1758  return (-1);
1759  }
1760  }
1761  ctxt->inputTab[ctxt->inputNr] = value;
1762  ctxt->input = value;
1763  return (ctxt->inputNr++);
1764 }
1775 {
1777 
1778  if (ctxt == NULL)
1779  return(NULL);
1780  if (ctxt->inputNr <= 0)
1781  return (NULL);
1782  ctxt->inputNr--;
1783  if (ctxt->inputNr > 0)
1784  ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785  else
1786  ctxt->input = NULL;
1787  ret = ctxt->inputTab[ctxt->inputNr];
1788  ctxt->inputTab[ctxt->inputNr] = NULL;
1789  return (ret);
1790 }
1800 int
1802 {
1803  if (ctxt == NULL) return(0);
1804  if (ctxt->nodeNr >= ctxt->nodeMax) {
1805  xmlNodePtr *tmp;
1806 
1807  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808  ctxt->nodeMax * 2 *
1809  sizeof(ctxt->nodeTab[0]));
1810  if (tmp == NULL) {
1811  xmlErrMemory(ctxt, NULL);
1812  return (-1);
1813  }
1814  ctxt->nodeTab = tmp;
1815  ctxt->nodeMax *= 2;
1816  }
1817  if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1822  xmlHaltParser(ctxt);
1823  return(-1);
1824  }
1825  ctxt->nodeTab[ctxt->nodeNr] = value;
1826  ctxt->node = value;
1827  return (ctxt->nodeNr++);
1828 }
1829 
1838 xmlNodePtr
1840 {
1841  xmlNodePtr ret;
1842 
1843  if (ctxt == NULL) return(NULL);
1844  if (ctxt->nodeNr <= 0)
1845  return (NULL);
1846  ctxt->nodeNr--;
1847  if (ctxt->nodeNr > 0)
1848  ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849  else
1850  ctxt->node = NULL;
1851  ret = ctxt->nodeTab[ctxt->nodeNr];
1852  ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853  return (ret);
1854 }
1855 
1869 static int
1871  const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872 {
1873  xmlStartTag *tag;
1874 
1875  if (ctxt->nameNr >= ctxt->nameMax) {
1876  const xmlChar * *tmp;
1877  xmlStartTag *tmp2;
1878  ctxt->nameMax *= 2;
1879  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880  ctxt->nameMax *
1881  sizeof(ctxt->nameTab[0]));
1882  if (tmp == NULL) {
1883  ctxt->nameMax /= 2;
1884  goto mem_error;
1885  }
1886  ctxt->nameTab = tmp;
1887  tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888  ctxt->nameMax *
1889  sizeof(ctxt->pushTab[0]));
1890  if (tmp2 == NULL) {
1891  ctxt->nameMax /= 2;
1892  goto mem_error;
1893  }
1894  ctxt->pushTab = tmp2;
1895  } else if (ctxt->pushTab == NULL) {
1896  ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897  sizeof(ctxt->pushTab[0]));
1898  if (ctxt->pushTab == NULL)
1899  goto mem_error;
1900  }
1901  ctxt->nameTab[ctxt->nameNr] = value;
1902  ctxt->name = value;
1903  tag = &ctxt->pushTab[ctxt->nameNr];
1904  tag->prefix = prefix;
1905  tag->URI = URI;
1906  tag->line = line;
1907  tag->nsNr = nsNr;
1908  return (ctxt->nameNr++);
1909 mem_error:
1910  xmlErrMemory(ctxt, NULL);
1911  return (-1);
1912 }
1913 #ifdef LIBXML_PUSH_ENABLED
1914 
1922 static const xmlChar *
1923 nameNsPop(xmlParserCtxtPtr ctxt)
1924 {
1925  const xmlChar *ret;
1926 
1927  if (ctxt->nameNr <= 0)
1928  return (NULL);
1929  ctxt->nameNr--;
1930  if (ctxt->nameNr > 0)
1931  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932  else
1933  ctxt->name = NULL;
1934  ret = ctxt->nameTab[ctxt->nameNr];
1935  ctxt->nameTab[ctxt->nameNr] = NULL;
1936  return (ret);
1937 }
1938 #endif /* LIBXML_PUSH_ENABLED */
1939 
1949 int
1951 {
1952  if (ctxt == NULL) return (-1);
1953 
1954  if (ctxt->nameNr >= ctxt->nameMax) {
1955  const xmlChar * *tmp;
1956  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957  ctxt->nameMax * 2 *
1958  sizeof(ctxt->nameTab[0]));
1959  if (tmp == NULL) {
1960  goto mem_error;
1961  }
1962  ctxt->nameTab = tmp;
1963  ctxt->nameMax *= 2;
1964  }
1965  ctxt->nameTab[ctxt->nameNr] = value;
1966  ctxt->name = value;
1967  return (ctxt->nameNr++);
1968 mem_error:
1969  xmlErrMemory(ctxt, NULL);
1970  return (-1);
1971 }
1980 const xmlChar *
1982 {
1983  const xmlChar *ret;
1984 
1985  if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986  return (NULL);
1987  ctxt->nameNr--;
1988  if (ctxt->nameNr > 0)
1989  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990  else
1991  ctxt->name = NULL;
1992  ret = ctxt->nameTab[ctxt->nameNr];
1993  ctxt->nameTab[ctxt->nameNr] = NULL;
1994  return (ret);
1995 }
1996 
1997 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998  if (ctxt->spaceNr >= ctxt->spaceMax) {
1999  int *tmp;
2000 
2001  ctxt->spaceMax *= 2;
2002  tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004  if (tmp == NULL) {
2005  xmlErrMemory(ctxt, NULL);
2006  ctxt->spaceMax /=2;
2007  return(-1);
2008  }
2009  ctxt->spaceTab = tmp;
2010  }
2011  ctxt->spaceTab[ctxt->spaceNr] = val;
2012  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013  return(ctxt->spaceNr++);
2014 }
2015 
2016 static int spacePop(xmlParserCtxtPtr ctxt) {
2017  int ret;
2018  if (ctxt->spaceNr <= 0) return(0);
2019  ctxt->spaceNr--;
2020  if (ctxt->spaceNr > 0)
2021  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022  else
2023  ctxt->space = &ctxt->spaceTab[0];
2024  ret = ctxt->spaceTab[ctxt->spaceNr];
2025  ctxt->spaceTab[ctxt->spaceNr] = -1;
2026  return(ret);
2027 }
2028 
2029 /*
2030  * Macros for accessing the content. Those should be used only by the parser,
2031  * and not exported.
2032  *
2033  * Dirty macros, i.e. one often need to make assumption on the context to
2034  * use them
2035  *
2036  * CUR_PTR return the current pointer to the xmlChar to be parsed.
2037  * To be used with extreme caution since operations consuming
2038  * characters may move the input buffer to a different location !
2039  * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2040  * This should be used internally by the parser
2041  * only to compare to ASCII values otherwise it would break when
2042  * running with UTF-8 encoding.
2043  * RAW same as CUR but in the input buffer, bypass any token
2044  * extraction that may have been done
2045  * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2046  * to compare on ASCII based substring.
2047  * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048  * strings without newlines within the parser.
2049  * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050  * defined char within the parser.
2051  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052  *
2053  * NEXT Skip to the next character, this does the proper decoding
2054  * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055  * NEXTL(l) Skip the current unicode character of l xmlChars long.
2056  * CUR_CHAR(l) returns the current unicode character (int), set l
2057  * to the number of xmlChars used for the encoding [0-5].
2058  * CUR_SCHAR same but operate on a string instead of the context
2059  * COPY_BUF copy the current unicode char to the target buffer, increment
2060  * the index
2061  * GROW, SHRINK handling of input buffers
2062  */
2063 
2064 #define RAW (*ctxt->input->cur)
2065 #define CUR (*ctxt->input->cur)
2066 #define NXT(val) ctxt->input->cur[(val)]
2067 #define CUR_PTR ctxt->input->cur
2068 #define BASE_PTR ctxt->input->base
2069 
2070 #define CMP4( s, c1, c2, c3, c4 ) \
2071  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072  ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2074  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083  ((unsigned char *) s)[ 8 ] == c9 )
2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086  ((unsigned char *) s)[ 9 ] == c10 )
2087 
2088 #define SKIP(val) do { \
2089  ctxt->input->cur += (val),ctxt->input->col+=(val); \
2090  if (*ctxt->input->cur == 0) \
2091  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2092  } while (0)
2093 
2094 #define SKIPL(val) do { \
2095  int skipl; \
2096  for(skipl=0; skipl<val; skipl++) { \
2097  if (*(ctxt->input->cur) == '\n') { \
2098  ctxt->input->line++; ctxt->input->col = 1; \
2099  } else ctxt->input->col++; \
2100  ctxt->input->cur++; \
2101  } \
2102  if (*ctxt->input->cur == 0) \
2103  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2104  } while (0)
2105 
2106 #define SHRINK if ((ctxt->progressive == 0) && \
2107  (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109  xmlSHRINK (ctxt);
2110 
2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112  xmlParserInputShrink(ctxt->input);
2113  if (*ctxt->input->cur == 0)
2115 }
2116 
2117 #define GROW if ((ctxt->progressive == 0) && \
2118  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119  xmlGROW (ctxt);
2120 
2121 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122  ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123  ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124 
2125  if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126  (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127  ((ctxt->input->buf) &&
2129  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131  xmlHaltParser(ctxt);
2132  return;
2133  }
2135  if ((ctxt->input->cur > ctxt->input->end) ||
2136  (ctxt->input->cur < ctxt->input->base)) {
2137  xmlHaltParser(ctxt);
2138  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139  return;
2140  }
2141  if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2143 }
2144 
2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146 
2147 #define NEXT xmlNextChar(ctxt)
2148 
2149 #define NEXT1 { \
2150  ctxt->input->col++; \
2151  ctxt->input->cur++; \
2152  if (*ctxt->input->cur == 0) \
2153  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2154  }
2155 
2156 #define NEXTL(l) do { \
2157  if (*(ctxt->input->cur) == '\n') { \
2158  ctxt->input->line++; ctxt->input->col = 1; \
2159  } else ctxt->input->col++; \
2160  ctxt->input->cur += l; \
2161  } while (0)
2162 
2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165 
2166 #define COPY_BUF(l,b,i,v) \
2167  if (l == 1) b[i++] = (xmlChar) v; \
2168  else i += xmlCopyCharMultiByte(&b[i],v)
2169 
2170 #define CUR_CONSUMED \
2171  (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172 
2183 int
2185  int res = 0;
2186 
2187  /*
2188  * It's Okay to use CUR/NEXT here since all the blanks are on
2189  * the ASCII range.
2190  */
2191  if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192  (ctxt->instate == XML_PARSER_START)) {
2193  const xmlChar *cur;
2194  /*
2195  * if we are in the document content, go really fast
2196  */
2197  cur = ctxt->input->cur;
2198  while (IS_BLANK_CH(*cur)) {
2199  if (*cur == '\n') {
2200  ctxt->input->line++; ctxt->input->col = 1;
2201  } else {
2202  ctxt->input->col++;
2203  }
2204  cur++;
2205  if (res < INT_MAX)
2206  res++;
2207  if (*cur == 0) {
2208  ctxt->input->cur = cur;
2210  cur = ctxt->input->cur;
2211  }
2212  }
2213  ctxt->input->cur = cur;
2214  } else {
2215  int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216 
2217  while (1) {
2218  if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219  NEXT;
2220  } else if (CUR == '%') {
2221  /*
2222  * Need to handle support of entities branching here
2223  */
2224  if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225  break;
2226  xmlParsePEReference(ctxt);
2227  } else if (CUR == 0) {
2228  if (ctxt->inputNr <= 1)
2229  break;
2230  xmlPopInput(ctxt);
2231  } else {
2232  break;
2233  }
2234 
2235  /*
2236  * Also increase the counter when entering or exiting a PERef.
2237  * The spec says: "When a parameter-entity reference is recognized
2238  * in the DTD and included, its replacement text MUST be enlarged
2239  * by the attachment of one leading and one following space (#x20)
2240  * character."
2241  */
2242  if (res < INT_MAX)
2243  res++;
2244  }
2245  }
2246  return(res);
2247 }
2248 
2249 /************************************************************************
2250  * *
2251  * Commodity functions to handle entities *
2252  * *
2253  ************************************************************************/
2254 
2264 xmlChar
2266  if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2269  "Popping input %d\n", ctxt->inputNr);
2270  if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271  (ctxt->instate != XML_PARSER_EOF))
2273  "Unfinished entity outside the DTD");
2275  if (*ctxt->input->cur == 0)
2277  return(CUR);
2278 }
2279 
2289 int
2291  int ret;
2292  if (input == NULL) return(-1);
2293 
2294  if (xmlParserDebugEntities) {
2295  if ((ctxt->input != NULL) && (ctxt->input->filename))
2297  "%s(%d): ", ctxt->input->filename,
2298  ctxt->input->line);
2300  "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301  }
2302  if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303  (ctxt->inputNr > 1024)) {
2305  while (ctxt->inputNr > 1)
2307  return(-1);
2308  }
2309  ret = inputPush(ctxt, input);
2310  if (ctxt->instate == XML_PARSER_EOF)
2311  return(-1);
2312  GROW;
2313  return(ret);
2314 }
2315 
2331 int
2333  int val = 0;
2334  int count = 0;
2335 
2336  /*
2337  * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338  */
2339  if ((RAW == '&') && (NXT(1) == '#') &&
2340  (NXT(2) == 'x')) {
2341  SKIP(3);
2342  GROW;
2343  while (RAW != ';') { /* loop blocked by count */
2344  if (count++ > 20) {
2345  count = 0;
2346  GROW;
2347  if (ctxt->instate == XML_PARSER_EOF)
2348  return(0);
2349  }
2350  if ((RAW >= '0') && (RAW <= '9'))
2351  val = val * 16 + (CUR - '0');
2352  else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353  val = val * 16 + (CUR - 'a') + 10;
2354  else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355  val = val * 16 + (CUR - 'A') + 10;
2356  else {
2358  val = 0;
2359  break;
2360  }
2361  if (val > 0x110000)
2362  val = 0x110000;
2363 
2364  NEXT;
2365  count++;
2366  }
2367  if (RAW == ';') {
2368  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369  ctxt->input->col++;
2370  ctxt->input->cur++;
2371  }
2372  } else if ((RAW == '&') && (NXT(1) == '#')) {
2373  SKIP(2);
2374  GROW;
2375  while (RAW != ';') { /* loop blocked by count */
2376  if (count++ > 20) {
2377  count = 0;
2378  GROW;
2379  if (ctxt->instate == XML_PARSER_EOF)
2380  return(0);
2381  }
2382  if ((RAW >= '0') && (RAW <= '9'))
2383  val = val * 10 + (CUR - '0');
2384  else {
2386  val = 0;
2387  break;
2388  }
2389  if (val > 0x110000)
2390  val = 0x110000;
2391 
2392  NEXT;
2393  count++;
2394  }
2395  if (RAW == ';') {
2396  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397  ctxt->input->col++;
2398  ctxt->input->cur++;
2399  }
2400  } else {
2402  }
2403 
2404  /*
2405  * [ WFC: Legal Character ]
2406  * Characters referred to using character references must match the
2407  * production for Char.
2408  */
2409  if (val >= 0x110000) {
2410  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411  "xmlParseCharRef: character reference out of bounds\n",
2412  val);
2413  } else if (IS_CHAR(val)) {
2414  return(val);
2415  } else {
2416  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417  "xmlParseCharRef: invalid xmlChar value %d\n",
2418  val);
2419  }
2420  return(0);
2421 }
2422 
2441 static int
2443  const xmlChar *ptr;
2444  xmlChar cur;
2445  int val = 0;
2446 
2447  if ((str == NULL) || (*str == NULL)) return(0);
2448  ptr = *str;
2449  cur = *ptr;
2450  if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451  ptr += 3;
2452  cur = *ptr;
2453  while (cur != ';') { /* Non input consuming loop */
2454  if ((cur >= '0') && (cur <= '9'))
2455  val = val * 16 + (cur - '0');
2456  else if ((cur >= 'a') && (cur <= 'f'))
2457  val = val * 16 + (cur - 'a') + 10;
2458  else if ((cur >= 'A') && (cur <= 'F'))
2459  val = val * 16 + (cur - 'A') + 10;
2460  else {
2462  val = 0;
2463  break;
2464  }
2465  if (val > 0x110000)
2466  val = 0x110000;
2467 
2468  ptr++;
2469  cur = *ptr;
2470  }
2471  if (cur == ';')
2472  ptr++;
2473  } else if ((cur == '&') && (ptr[1] == '#')){
2474  ptr += 2;
2475  cur = *ptr;
2476  while (cur != ';') { /* Non input consuming loops */
2477  if ((cur >= '0') && (cur <= '9'))
2478  val = val * 10 + (cur - '0');
2479  else {
2481  val = 0;
2482  break;
2483  }
2484  if (val > 0x110000)
2485  val = 0x110000;
2486 
2487  ptr++;
2488  cur = *ptr;
2489  }
2490  if (cur == ';')
2491  ptr++;
2492  } else {
2494  return(0);
2495  }
2496  *str = ptr;
2497 
2498  /*
2499  * [ WFC: Legal Character ]
2500  * Characters referred to using character references must match the
2501  * production for Char.
2502  */
2503  if (val >= 0x110000) {
2504  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505  "xmlParseStringCharRef: character reference out of bounds\n",
2506  val);
2507  } else if (IS_CHAR(val)) {
2508  return(val);
2509  } else {
2510  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512  val);
2513  }
2514  return(0);
2515 }
2516 
2549 void
2551  switch(ctxt->instate) {
2553  return;
2554  case XML_PARSER_COMMENT:
2555  return;
2556  case XML_PARSER_START_TAG:
2557  return;
2558  case XML_PARSER_END_TAG:
2559  return;
2560  case XML_PARSER_EOF:
2562  return;
2563  case XML_PARSER_PROLOG:
2564  case XML_PARSER_START:
2565  case XML_PARSER_MISC:
2567  return;
2569  case XML_PARSER_CONTENT:
2571  case XML_PARSER_PI:
2574  /* we just ignore it there */
2575  return;
2576  case XML_PARSER_EPILOG:
2578  return;
2580  /*
2581  * NOTE: in the case of entity values, we don't do the
2582  * substitution here since we need the literal
2583  * entity value to be able to save the internal
2584  * subset of the document.
2585  * This will be handled by xmlStringDecodeEntities
2586  */
2587  return;
2588  case XML_PARSER_DTD:
2589  /*
2590  * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591  * In the internal DTD subset, parameter-entity references
2592  * can occur only where markup declarations can occur, not
2593  * within markup declarations.
2594  * In that case this is handled in xmlParseMarkupDecl
2595  */
2596  if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597  return;
2598  if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599  return;
2600  break;
2601  case XML_PARSER_IGNORE:
2602  return;
2603  }
2604 
2605  xmlParsePEReference(ctxt);
2606 }
2607 
2608 /*
2609  * Macro used to grow the current buffer.
2610  * buffer##_size is expected to be a size_t
2611  * mem_error: is expected to handle memory allocation failures
2612  */
2613 #define growBuffer(buffer, n) { \
2614  xmlChar *tmp; \
2615  size_t new_size = buffer##_size * 2 + n; \
2616  if (new_size < buffer##_size) goto mem_error; \
2617  tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2618  if (tmp == NULL) goto mem_error; \
2619  buffer = tmp; \
2620  buffer##_size = new_size; \
2621 }
2622 
2642 xmlChar *
2644  int what, xmlChar end, xmlChar end2, xmlChar end3) {
2645  xmlChar *buffer = NULL;
2646  size_t buffer_size = 0;
2647  size_t nbchars = 0;
2648 
2649  xmlChar *current = NULL;
2650  xmlChar *rep = NULL;
2651  const xmlChar *last;
2652  xmlEntityPtr ent;
2653  int c,l;
2654 
2655  if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656  return(NULL);
2657  last = str + len;
2658 
2659  if (((ctxt->depth > 40) &&
2660  ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661  (ctxt->depth > 1024)) {
2663  return(NULL);
2664  }
2665 
2666  /*
2667  * allocate a translation buffer.
2668  */
2671  if (buffer == NULL) goto mem_error;
2672 
2673  /*
2674  * OK loop until we reach one of the ending char or a size limit.
2675  * we are operating on already parsed values.
2676  */
2677  if (str < last)
2678  c = CUR_SCHAR(str, l);
2679  else
2680  c = 0;
2681  while ((c != 0) && (c != end) && /* non input consuming loop */
2682  (c != end2) && (c != end3) &&
2683  (ctxt->instate != XML_PARSER_EOF)) {
2684 
2685  if (c == 0) break;
2686  if ((c == '&') && (str[1] == '#')) {
2687  int val = xmlParseStringCharRef(ctxt, &str);
2688  if (val == 0)
2689  goto int_error;
2690  COPY_BUF(0,buffer,nbchars,val);
2691  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2693  }
2694  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2697  "String decoding Entity Reference: %.30s\n",
2698  str);
2699  ent = xmlParseStringEntityRef(ctxt, &str);
2700  xmlParserEntityCheck(ctxt, 0, ent, 0);
2701  if (ent != NULL)
2702  ctxt->nbentities += ent->checked / 2;
2703  if ((ent != NULL) &&
2705  if (ent->content != NULL) {
2706  COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2709  }
2710  } else {
2711  xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712  "predefined entity has no content\n");
2713  goto int_error;
2714  }
2715  } else if ((ent != NULL) && (ent->content != NULL)) {
2716  ctxt->depth++;
2717  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718  0, 0, 0);
2719  ctxt->depth--;
2720  if (rep == NULL) {
2721  ent->content[0] = 0;
2722  goto int_error;
2723  }
2724 
2725  current = rep;
2726  while (*current != 0) { /* non input consuming loop */
2727  buffer[nbchars++] = *current++;
2728  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730  goto int_error;
2732  }
2733  }
2734  xmlFree(rep);
2735  rep = NULL;
2736  } else if (ent != NULL) {
2737  int i = xmlStrlen(ent->name);
2738  const xmlChar *cur = ent->name;
2739 
2740  buffer[nbchars++] = '&';
2741  if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2743  }
2744  for (;i > 0;i--)
2745  buffer[nbchars++] = *cur++;
2746  buffer[nbchars++] = ';';
2747  }
2748  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2751  "String decoding PE Reference: %.30s\n", str);
2752  ent = xmlParseStringPEReference(ctxt, &str);
2753  xmlParserEntityCheck(ctxt, 0, ent, 0);
2754  if (ent != NULL)
2755  ctxt->nbentities += ent->checked / 2;
2756  if (ent != NULL) {
2757  if (ent->content == NULL) {
2758  /*
2759  * Note: external parsed entities will not be loaded,
2760  * it is not required for a non-validating parser to
2761  * complete external PEReferences coming from the
2762  * internal subset
2763  */
2764  if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765  ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766  (ctxt->validate != 0)) {
2767  xmlLoadEntityContent(ctxt, ent);
2768  } else {
2769  xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770  "not validating will not read content for PE entity %s\n",
2771  ent->name, NULL);
2772  }
2773  }
2774  ctxt->depth++;
2775  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776  0, 0, 0);
2777  ctxt->depth--;
2778  if (rep == NULL) {
2779  if (ent->content != NULL)
2780  ent->content[0] = 0;
2781  goto int_error;
2782  }
2783  current = rep;
2784  while (*current != 0) { /* non input consuming loop */
2785  buffer[nbchars++] = *current++;
2786  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788  goto int_error;
2790  }
2791  }
2792  xmlFree(rep);
2793  rep = NULL;
2794  }
2795  } else {
2796  COPY_BUF(l,buffer,nbchars,c);
2797  str += l;
2798  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2800  }
2801  }
2802  if (str < last)
2803  c = CUR_SCHAR(str, l);
2804  else
2805  c = 0;
2806  }
2807  buffer[nbchars] = 0;
2808  return(buffer);
2809 
2810 mem_error:
2811  xmlErrMemory(ctxt, NULL);
2812 int_error:
2813  if (rep != NULL)
2814  xmlFree(rep);
2815  if (buffer != NULL)
2816  xmlFree(buffer);
2817  return(NULL);
2818 }
2819 
2838 xmlChar *
2840  xmlChar end, xmlChar end2, xmlChar end3) {
2841  if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842  return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843  end, end2, end3));
2844 }
2845 
2846 /************************************************************************
2847  * *
2848  * Commodity functions, cleanup needed ? *
2849  * *
2850  ************************************************************************/
2851 
2864 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865  int blank_chars) {
2866  int i, ret;
2867  xmlNodePtr lastChild;
2868 
2869  /*
2870  * Don't spend time trying to differentiate them, the same callback is
2871  * used !
2872  */
2873  if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874  return(0);
2875 
2876  /*
2877  * Check for xml:space value.
2878  */
2879  if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880  (*(ctxt->space) == -2))
2881  return(0);
2882 
2883  /*
2884  * Check that the string is made of blanks
2885  */
2886  if (blank_chars == 0) {
2887  for (i = 0;i < len;i++)
2888  if (!(IS_BLANK_CH(str[i]))) return(0);
2889  }
2890 
2891  /*
2892  * Look if the element is mixed content in the DTD if available
2893  */
2894  if (ctxt->node == NULL) return(0);
2895  if (ctxt->myDoc != NULL) {
2896  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897  if (ret == 0) return(1);
2898  if (ret == 1) return(0);
2899  }
2900 
2901  /*
2902  * Otherwise, heuristic :-\
2903  */
2904  if ((RAW != '<') && (RAW != 0xD)) return(0);
2905  if ((ctxt->node->children == NULL) &&
2906  (RAW == '<') && (NXT(1) == '/')) return(0);
2907 
2908  lastChild = xmlGetLastChild(ctxt->node);
2909  if (lastChild == NULL) {
2910  if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911  (ctxt->node->content != NULL)) return(0);
2912  } else if (xmlNodeIsText(lastChild))
2913  return(0);
2914  else if ((ctxt->node->children != NULL) &&
2915  (xmlNodeIsText(ctxt->node->children)))
2916  return(0);
2917  return(1);
2918 }
2919 
2920 /************************************************************************
2921  * *
2922  * Extra stuff for namespace support *
2923  * Relates to http://www.w3.org/TR/WD-xml-names *
2924  * *
2925  ************************************************************************/
2926 
2945 xmlChar *
2948  xmlChar *buffer = NULL;
2949  int len = 0;
2950  int max = XML_MAX_NAMELEN;
2951  xmlChar *ret = NULL;
2952  const xmlChar *cur = name;
2953  int c;
2954 
2955  if (prefix == NULL) return(NULL);
2956  *prefix = NULL;
2957 
2958  if (cur == NULL) return(NULL);
2959 
2960 #ifndef XML_XML_NAMESPACE
2961  /* xml: prefix is not really a namespace */
2962  if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963  (cur[2] == 'l') && (cur[3] == ':'))
2964  return(xmlStrdup(name));
2965 #endif
2966 
2967  /* nasty but well=formed */
2968  if (cur[0] == ':')
2969  return(xmlStrdup(name));
2970 
2971  c = *cur++;
2972  while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973  buf[len++] = c;
2974  c = *cur++;
2975  }
2976  if (len >= max) {
2977  /*
2978  * Okay someone managed to make a huge name, so he's ready to pay
2979  * for the processing speed.
2980  */
2981  max = len * 2;
2982 
2983  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984  if (buffer == NULL) {
2985  xmlErrMemory(ctxt, NULL);
2986  return(NULL);
2987  }
2988  memcpy(buffer, buf, len);
2989  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990  if (len + 10 > max) {
2991  xmlChar *tmp;
2992 
2993  max *= 2;
2994  tmp = (xmlChar *) xmlRealloc(buffer,
2995  max * sizeof(xmlChar));
2996  if (tmp == NULL) {
2997  xmlFree(buffer);
2998  xmlErrMemory(ctxt, NULL);
2999  return(NULL);
3000  }
3001  buffer = tmp;
3002  }
3003  buffer[len++] = c;
3004  c = *cur++;
3005  }
3006  buffer[len] = 0;
3007  }
3008 
3009  if ((c == ':') && (*cur == 0)) {
3010  if (buffer != NULL)
3011  xmlFree(buffer);
3012  *prefix = NULL;
3013  return(xmlStrdup(name));
3014  }
3015 
3016  if (buffer == NULL)
3017  ret = xmlStrndup(buf, len);
3018  else {
3019  ret = buffer;
3020  buffer = NULL;
3021  max = XML_MAX_NAMELEN;
3022  }
3023 
3024 
3025  if (c == ':') {
3026  c = *cur;
3027  *prefix = ret;
3028  if (c == 0) {
3029  return(xmlStrndup(BAD_CAST "", 0));
3030  }
3031  len = 0;
3032 
3033  /*
3034  * Check that the first character is proper to start
3035  * a new name
3036  */
3037  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038  ((c >= 0x41) && (c <= 0x5A)) ||
3039  (c == '_') || (c == ':'))) {
3040  int l;
3041  int first = CUR_SCHAR(cur, l);
3042 
3043  if (!IS_LETTER(first) && (first != '_')) {
3044  xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045  "Name %s is not XML Namespace compliant\n",
3046  name);
3047  }
3048  }
3049  cur++;
3050 
3051  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052  buf[len++] = c;
3053  c = *cur++;
3054  }
3055  if (len >= max) {
3056  /*
3057  * Okay someone managed to make a huge name, so he's ready to pay
3058  * for the processing speed.
3059  */
3060  max = len * 2;
3061 
3062  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063  if (buffer == NULL) {
3064  xmlErrMemory(ctxt, NULL);
3065  return(NULL);
3066  }
3067  memcpy(buffer, buf, len);
3068  while (c != 0) { /* tested bigname2.xml */
3069  if (len + 10 > max) {
3070  xmlChar *tmp;
3071 
3072  max *= 2;
3073  tmp = (xmlChar *) xmlRealloc(buffer,
3074  max * sizeof(xmlChar));
3075  if (tmp == NULL) {
3076  xmlErrMemory(ctxt, NULL);
3077  xmlFree(buffer);
3078  return(NULL);
3079  }
3080  buffer = tmp;
3081  }
3082  buffer[len++] = c;
3083  c = *cur++;
3084  }
3085  buffer[len] = 0;
3086  }
3087 
3088  if (buffer == NULL)
3089  ret = xmlStrndup(buf, len);
3090  else {
3091  ret = buffer;
3092  }
3093  }
3094 
3095  return(ret);
3096 }
3097 
3098 /************************************************************************
3099  * *
3100  * The parser itself *
3101  * Relates to http://www.w3.org/TR/REC-xml *
3102  * *
3103  ************************************************************************/
3104 
3105 /************************************************************************
3106  * *
3107  * Routines to parse Name, NCName and NmToken *
3108  * *
3109  ************************************************************************/
3110 #ifdef DEBUG
3111 static unsigned long nbParseName = 0;
3112 static unsigned long nbParseNmToken = 0;
3113 static unsigned long nbParseNCName = 0;
3114 static unsigned long nbParseNCNameComplex = 0;
3115 static unsigned long nbParseNameComplex = 0;
3116 static unsigned long nbParseStringName = 0;
3117 #endif
3118 
3119 /*
3120  * The two following functions are related to the change of accepted
3121  * characters for Name and NmToken in the Revision 5 of XML-1.0
3122  * They correspond to the modified production [4] and the new production [4a]
3123  * changes in that revision. Also note that the macros used for the
3124  * productions Letter, Digit, CombiningChar and Extender are not needed
3125  * anymore.
3126  * We still keep compatibility to pre-revision5 parsing semantic if the
3127  * new XML_PARSE_OLD10 option is given to the parser.
3128  */
3129 static int
3131  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132  /*
3133  * Use the new checks of production [4] [4a] amd [5] of the
3134  * Update 5 of XML-1.0
3135  */
3136  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137  (((c >= 'a') && (c <= 'z')) ||
3138  ((c >= 'A') && (c <= 'Z')) ||
3139  (c == '_') || (c == ':') ||
3140  ((c >= 0xC0) && (c <= 0xD6)) ||
3141  ((c >= 0xD8) && (c <= 0xF6)) ||
3142  ((c >= 0xF8) && (c <= 0x2FF)) ||
3143  ((c >= 0x370) && (c <= 0x37D)) ||
3144  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145  ((c >= 0x200C) && (c <= 0x200D)) ||
3146  ((c >= 0x2070) && (c <= 0x218F)) ||
3147  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151  ((c >= 0x10000) && (c <= 0xEFFFF))))
3152  return(1);
3153  } else {
3154  if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155  return(1);
3156  }
3157  return(0);
3158 }
3159 
3160 static int
3162  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163  /*
3164  * Use the new checks of production [4] [4a] amd [5] of the
3165  * Update 5 of XML-1.0
3166  */
3167  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168  (((c >= 'a') && (c <= 'z')) ||
3169  ((c >= 'A') && (c <= 'Z')) ||
3170  ((c >= '0') && (c <= '9')) || /* !start */
3171  (c == '_') || (c == ':') ||
3172  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173  ((c >= 0xC0) && (c <= 0xD6)) ||
3174  ((c >= 0xD8) && (c <= 0xF6)) ||
3175  ((c >= 0xF8) && (c <= 0x2FF)) ||
3176  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177  ((c >= 0x370) && (c <= 0x37D)) ||
3178  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179  ((c >= 0x200C) && (c <= 0x200D)) ||
3180  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181  ((c >= 0x2070) && (c <= 0x218F)) ||
3182  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186  ((c >= 0x10000) && (c <= 0xEFFFF))))
3187  return(1);
3188  } else {
3189  if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190  (c == '.') || (c == '-') ||
3191  (c == '_') || (c == ':') ||
3192  (IS_COMBINING(c)) ||
3193  (IS_EXTENDER(c)))
3194  return(1);
3195  }
3196  return(0);
3197 }
3198 
3200  int *len, int *alloc, int normalize);
3201 
3202 static const xmlChar *
3204  int len = 0, l;
3205  int c;
3206  int count = 0;
3207  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3210 
3211 #ifdef DEBUG
3212  nbParseNameComplex++;
3213 #endif
3214 
3215  /*
3216  * Handler for more complex cases
3217  */
3218  GROW;
3219  if (ctxt->instate == XML_PARSER_EOF)
3220  return(NULL);
3221  c = CUR_CHAR(l);
3222  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223  /*
3224  * Use the new checks of production [4] [4a] amd [5] of the
3225  * Update 5 of XML-1.0
3226  */
3227  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228  (!(((c >= 'a') && (c <= 'z')) ||
3229  ((c >= 'A') && (c <= 'Z')) ||
3230  (c == '_') || (c == ':') ||
3231  ((c >= 0xC0) && (c <= 0xD6)) ||
3232  ((c >= 0xD8) && (c <= 0xF6)) ||
3233  ((c >= 0xF8) && (c <= 0x2FF)) ||
3234  ((c >= 0x370) && (c <= 0x37D)) ||
3235  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236  ((c >= 0x200C) && (c <= 0x200D)) ||
3237  ((c >= 0x2070) && (c <= 0x218F)) ||
3238  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242  ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243  return(NULL);
3244  }
3245  len += l;
3246  NEXTL(l);
3247  c = CUR_CHAR(l);
3248  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249  (((c >= 'a') && (c <= 'z')) ||
3250  ((c >= 'A') && (c <= 'Z')) ||
3251  ((c >= '0') && (c <= '9')) || /* !start */
3252  (c == '_') || (c == ':') ||
3253  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254  ((c >= 0xC0) && (c <= 0xD6)) ||
3255  ((c >= 0xD8) && (c <= 0xF6)) ||
3256  ((c >= 0xF8) && (c <= 0x2FF)) ||
3257  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258  ((c >= 0x370) && (c <= 0x37D)) ||
3259  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260  ((c >= 0x200C) && (c <= 0x200D)) ||
3261  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262  ((c >= 0x2070) && (c <= 0x218F)) ||
3263  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267  ((c >= 0x10000) && (c <= 0xEFFFF))
3268  )) {
3269  if (count++ > XML_PARSER_CHUNK_SIZE) {
3270  count = 0;
3271  GROW;
3272  if (ctxt->instate == XML_PARSER_EOF)
3273  return(NULL);
3274  }
3275  if (len <= INT_MAX - l)
3276  len += l;
3277  NEXTL(l);
3278  c = CUR_CHAR(l);
3279  }
3280  } else {
3281  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282  (!IS_LETTER(c) && (c != '_') &&
3283  (c != ':'))) {
3284  return(NULL);
3285  }
3286  len += l;
3287  NEXTL(l);
3288  c = CUR_CHAR(l);
3289 
3290  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291  ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292  (c == '.') || (c == '-') ||
3293  (c == '_') || (c == ':') ||
3294  (IS_COMBINING(c)) ||
3295  (IS_EXTENDER(c)))) {
3296  if (count++ > XML_PARSER_CHUNK_SIZE) {
3297  count = 0;
3298  GROW;
3299  if (ctxt->instate == XML_PARSER_EOF)
3300  return(NULL);
3301  }
3302  if (len <= INT_MAX - l)
3303  len += l;
3304  NEXTL(l);
3305  c = CUR_CHAR(l);
3306  }
3307  }
3308  if (len > maxLength) {
3309  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310  return(NULL);
3311  }
3312  if (ctxt->input->cur - ctxt->input->base < len) {
3313  /*
3314  * There were a couple of bugs where PERefs lead to to a change
3315  * of the buffer. Check the buffer size to avoid passing an invalid
3316  * pointer to xmlDictLookup.
3317  */
3319  "unexpected change of input buffer");
3320  return (NULL);
3321  }
3322  if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3325 }
3326 
3343 const xmlChar *
3345  const xmlChar *in;
3346  const xmlChar *ret;
3347  size_t count = 0;
3348  size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3351 
3352  GROW;
3353 
3354 #ifdef DEBUG
3355  nbParseName++;
3356 #endif
3357 
3358  /*
3359  * Accelerator for simple ASCII names
3360  */
3361  in = ctxt->input->cur;
3362  if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363  ((*in >= 0x41) && (*in <= 0x5A)) ||
3364  (*in == '_') || (*in == ':')) {
3365  in++;
3366  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367  ((*in >= 0x41) && (*in <= 0x5A)) ||
3368  ((*in >= 0x30) && (*in <= 0x39)) ||
3369  (*in == '_') || (*in == '-') ||
3370  (*in == ':') || (*in == '.'))
3371  in++;
3372  if ((*in > 0) && (*in < 0x80)) {
3373  count = in - ctxt->input->cur;
3374  if (count > maxLength) {
3375  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376  return(NULL);
3377  }
3378  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379  ctxt->input->cur = in;
3380  ctxt->input->col += count;
3381  if (ret == NULL)
3382  xmlErrMemory(ctxt, NULL);
3383  return(ret);
3384  }
3385  }
3386  /* accelerator for special cases */
3387  return(xmlParseNameComplex(ctxt));
3388 }
3389 
3390 static const xmlChar *
3392  int len = 0, l;
3393  int c;
3394  int count = 0;
3395  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398  size_t startPosition = 0;
3399 
3400 #ifdef DEBUG
3401  nbParseNCNameComplex++;
3402 #endif
3403 
3404  /*
3405  * Handler for more complex cases
3406  */
3407  GROW;
3408  startPosition = CUR_PTR - BASE_PTR;
3409  c = CUR_CHAR(l);
3410  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412  return(NULL);
3413  }
3414 
3415  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416  (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417  if (count++ > XML_PARSER_CHUNK_SIZE) {
3418  count = 0;
3419  GROW;
3420  if (ctxt->instate == XML_PARSER_EOF)
3421  return(NULL);
3422  }
3423  if (len <= INT_MAX - l)
3424  len += l;
3425  NEXTL(l);
3426  c = CUR_CHAR(l);
3427  if (c == 0) {
3428  count = 0;
3429  /*
3430  * when shrinking to extend the buffer we really need to preserve
3431  * the part of the name we already parsed. Hence rolling back
3432  * by current length.
3433  */
3434  ctxt->input->cur -= l;
3435  GROW;
3436  if (ctxt->instate == XML_PARSER_EOF)
3437  return(NULL);
3438  ctxt->input->cur += l;
3439  c = CUR_CHAR(l);
3440  }
3441  }
3442  if (len > maxLength) {
3443  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444  return(NULL);
3445  }
3446  return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3447 }
3448 
3464 static const xmlChar *
3466  const xmlChar *in, *e;
3467  const xmlChar *ret;
3468  size_t count = 0;
3469  size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3472 
3473 #ifdef DEBUG
3474  nbParseNCName++;
3475 #endif
3476 
3477  /*
3478  * Accelerator for simple ASCII names
3479  */
3480  in = ctxt->input->cur;
3481  e = ctxt->input->end;
3482  if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483  ((*in >= 0x41) && (*in <= 0x5A)) ||
3484  (*in == '_')) && (in < e)) {
3485  in++;
3486  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487  ((*in >= 0x41) && (*in <= 0x5A)) ||
3488  ((*in >= 0x30) && (*in <= 0x39)) ||
3489  (*in == '_') || (*in == '-') ||
3490  (*in == '.')) && (in < e))
3491  in++;
3492  if (in >= e)
3493  goto complex;
3494  if ((*in > 0) && (*in < 0x80)) {
3495  count = in - ctxt->input->cur;
3496  if (count > maxLength) {
3497  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498  return(NULL);
3499  }
3500  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501  ctxt->input->cur = in;
3502  ctxt->input->col += count;
3503  if (ret == NULL) {
3504  xmlErrMemory(ctxt, NULL);
3505  }
3506  return(ret);
3507  }
3508  }
3509 complex:
3510  return(xmlParseNCNameComplex(ctxt));
3511 }
3512 
3524 static const xmlChar *
3526  register const xmlChar *cmp = other;
3527  register const xmlChar *in;
3528  const xmlChar *ret;
3529 
3530  GROW;
3531  if (ctxt->instate == XML_PARSER_EOF)
3532  return(NULL);
3533 
3534  in = ctxt->input->cur;
3535  while (*in != 0 && *in == *cmp) {
3536  ++in;
3537  ++cmp;
3538  }
3539  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540  /* success */
3541  ctxt->input->col += in - ctxt->input->cur;
3542  ctxt->input->cur = in;
3543  return (const xmlChar*) 1;
3544  }
3545  /* failure (or end of input buffer), check with full function */
3546  ret = xmlParseName (ctxt);
3547  /* strings coming from the dictionary direct compare possible */
3548  if (ret == other) {
3549  return (const xmlChar*) 1;
3550  }
3551  return ret;
3552 }
3553 
3572 static xmlChar *
3575  const xmlChar *cur = *str;
3576  int len = 0, l;
3577  int c;
3578  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3581 
3582 #ifdef DEBUG
3583  nbParseStringName++;
3584 #endif
3585 
3586  c = CUR_SCHAR(cur, l);
3587  if (!xmlIsNameStartChar(ctxt, c)) {
3588  return(NULL);
3589  }
3590 
3591  COPY_BUF(l,buf,len,c);
3592  cur += l;
3593  c = CUR_SCHAR(cur, l);
3594  while (xmlIsNameChar(ctxt, c)) {
3595  COPY_BUF(l,buf,len,c);
3596  cur += l;
3597  c = CUR_SCHAR(cur, l);
3598  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599  /*
3600  * Okay someone managed to make a huge name, so he's ready to pay
3601  * for the processing speed.
3602  */
3603  xmlChar *buffer;
3604  int max = len * 2;
3605 
3606  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607  if (buffer == NULL) {
3608  xmlErrMemory(ctxt, NULL);
3609  return(NULL);
3610  }
3611  memcpy(buffer, buf, len);
3612  while (xmlIsNameChar(ctxt, c)) {
3613  if (len + 10 > max) {
3614  xmlChar *tmp;
3615 
3616  max *= 2;
3617  tmp = (xmlChar *) xmlRealloc(buffer,
3618  max * sizeof(xmlChar));
3619  if (tmp == NULL) {
3620  xmlErrMemory(ctxt, NULL);
3621  xmlFree(buffer);
3622  return(NULL);
3623  }
3624  buffer = tmp;
3625  }
3626  COPY_BUF(l,buffer,len,c);
3627  cur += l;
3628  c = CUR_SCHAR(cur, l);
3629  if (len > maxLength) {
3630  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631  xmlFree(buffer);
3632  return(NULL);
3633  }
3634  }
3635  buffer[len] = 0;
3636  *str = cur;
3637  return(buffer);
3638  }
3639  }
3640  if (len > maxLength) {
3641  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642  return(NULL);
3643  }
3644  *str = cur;
3645  return(xmlStrndup(buf, len));
3646 }
3647 
3661 xmlChar *
3664  int len = 0, l;
3665  int c;
3666  int count = 0;
3667  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3670 
3671 #ifdef DEBUG
3672  nbParseNmToken++;
3673 #endif
3674 
3675  GROW;
3676  if (ctxt->instate == XML_PARSER_EOF)
3677  return(NULL);
3678  c = CUR_CHAR(l);
3679 
3680  while (xmlIsNameChar(ctxt, c)) {
3681  if (count++ > XML_PARSER_CHUNK_SIZE) {
3682  count = 0;
3683  GROW;
3684  }
3685  COPY_BUF(l,buf,len,c);
3686  NEXTL(l);
3687  c = CUR_CHAR(l);
3688  if (c == 0) {
3689  count = 0;
3690  GROW;
3691  if (ctxt->instate == XML_PARSER_EOF)
3692  return(NULL);
3693  c = CUR_CHAR(l);
3694  }
3695  if (len >= XML_MAX_NAMELEN) {
3696  /*
3697  * Okay someone managed to make a huge token, so he's ready to pay
3698  * for the processing speed.
3699  */
3700  xmlChar *buffer;
3701  int max = len * 2;
3702 
3703  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704  if (buffer == NULL) {
3705  xmlErrMemory(ctxt, NULL);
3706  return(NULL);
3707  }
3708  memcpy(buffer, buf, len);
3709  while (xmlIsNameChar(ctxt, c)) {
3710  if (count++ > XML_PARSER_CHUNK_SIZE) {
3711  count = 0;
3712  GROW;
3713  if (ctxt->instate == XML_PARSER_EOF) {
3714  xmlFree(buffer);
3715  return(NULL);
3716  }
3717  }
3718  if (len + 10 > max) {
3719  xmlChar *tmp;
3720 
3721  max *= 2;
3722  tmp = (xmlChar *) xmlRealloc(buffer,
3723  max * sizeof(xmlChar));
3724  if (tmp == NULL) {
3725  xmlErrMemory(ctxt, NULL);
3726  xmlFree(buffer);
3727  return(NULL);
3728  }
3729  buffer = tmp;
3730  }
3731  COPY_BUF(l,buffer,len,c);
3732  NEXTL(l);
3733  c = CUR_CHAR(l);
3734  if (len > maxLength) {
3735  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736  xmlFree(buffer);
3737  return(NULL);
3738  }
3739  }
3740  buffer[len] = 0;
3741  return(buffer);
3742  }
3743  }
3744  if (len == 0)
3745  return(NULL);
3746  if (len > maxLength) {
3747  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748  return(NULL);
3749  }
3750  return(xmlStrndup(buf, len));
3751 }
3752 
3766 xmlChar *
3768  xmlChar *buf = NULL;
3769  int len = 0;
3771  int c, l;
3772  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3775  xmlChar stop;
3776  xmlChar *ret = NULL;
3777  const xmlChar *cur = NULL;
3779 
3780  if (RAW == '"') stop = '"';
3781  else if (RAW == '\'') stop = '\'';
3782  else {
3784  return(NULL);
3785  }
3786  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787  if (buf == NULL) {
3788  xmlErrMemory(ctxt, NULL);
3789  return(NULL);
3790  }
3791 
3792  /*
3793  * The content of the entity definition is copied in a buffer.
3794  */
3795 
3797  input = ctxt->input;
3798  GROW;
3799  if (ctxt->instate == XML_PARSER_EOF)
3800  goto error;
3801  NEXT;
3802  c = CUR_CHAR(l);
3803  /*
3804  * NOTE: 4.4.5 Included in Literal
3805  * When a parameter entity reference appears in a literal entity
3806  * value, ... a single or double quote character in the replacement
3807  * text is always treated as a normal data character and will not
3808  * terminate the literal.
3809  * In practice it means we stop the loop only when back at parsing
3810  * the initial entity and the quote is found
3811  */
3812  while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813  (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814  if (len + 5 >= size) {
3815  xmlChar *tmp;
3816 
3817  size *= 2;
3818  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819  if (tmp == NULL) {
3820  xmlErrMemory(ctxt, NULL);
3821  goto error;
3822  }
3823  buf = tmp;
3824  }
3825  COPY_BUF(l,buf,len,c);
3826  NEXTL(l);
3827 
3828  GROW;
3829  c = CUR_CHAR(l);
3830  if (c == 0) {
3831  GROW;
3832  c = CUR_CHAR(l);
3833  }
3834 
3835  if (len > maxLength) {
3836  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837  "entity value too long\n");
3838  goto error;
3839  }
3840  }
3841  buf[len] = 0;
3842  if (ctxt->instate == XML_PARSER_EOF)
3843  goto error;
3844  if (c != stop) {
3846  goto error;
3847  }
3848  NEXT;
3849 
3850  /*
3851  * Raise problem w.r.t. '&' and '%' being used in non-entities
3852  * reference constructs. Note Charref will be handled in
3853  * xmlStringDecodeEntities()
3854  */
3855  cur = buf;
3856  while (*cur != 0) { /* non input consuming */
3857  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858  xmlChar *name;
3859  xmlChar tmp = *cur;
3860  int nameOk = 0;
3861 
3862  cur++;
3863  name = xmlParseStringName(ctxt, &cur);
3864  if (name != NULL) {
3865  nameOk = 1;
3866  xmlFree(name);
3867  }
3868  if ((nameOk == 0) || (*cur != ';')) {
3869  xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870  "EntityValue: '%c' forbidden except for entities references\n",
3871  tmp);
3872  goto error;
3873  }
3874  if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875  (ctxt->inputNr == 1)) {
3877  goto error;
3878  }
3879  if (*cur == 0)
3880  break;
3881  }
3882  cur++;
3883  }
3884 
3885  /*
3886  * Then PEReference entities are substituted.
3887  *
3888  * NOTE: 4.4.7 Bypassed
3889  * When a general entity reference appears in the EntityValue in
3890  * an entity declaration, it is bypassed and left as is.
3891  * so XML_SUBSTITUTE_REF is not set here.
3892  */
3893  ++ctxt->depth;
3895  0, 0, 0);
3896  --ctxt->depth;
3897  if (orig != NULL) {
3898  *orig = buf;
3899  buf = NULL;
3900  }
3901 
3902 error:
3903  if (buf != NULL)
3904  xmlFree(buf);
3905  return(ret);
3906 }
3907 
3920 static xmlChar *
3922  xmlChar limit = 0;
3923  xmlChar *buf = NULL;
3924  xmlChar *rep = NULL;
3925  size_t len = 0;
3926  size_t buf_size = 0;
3927  size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3930  int c, l, in_space = 0;
3931  xmlChar *current = NULL;
3932  xmlEntityPtr ent;
3933 
3934  if (NXT(0) == '"') {
3936  limit = '"';
3937  NEXT;
3938  } else if (NXT(0) == '\'') {
3939  limit = '\'';
3941  NEXT;
3942  } else {
3944  return(NULL);
3945  }
3946 
3947  /*
3948  * allocate a translation buffer.
3949  */
3950  buf_size = XML_PARSER_BUFFER_SIZE;
3951  buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952  if (buf == NULL) goto mem_error;
3953 
3954  /*
3955  * OK loop until we reach one of the ending char or a size limit.
3956  */
3957  c = CUR_CHAR(l);
3958  while (((NXT(0) != limit) && /* checked */
3959  (IS_CHAR(c)) && (c != '<')) &&
3960  (ctxt->instate != XML_PARSER_EOF)) {
3961  if (c == '&') {
3962  in_space = 0;
3963  if (NXT(1) == '#') {
3964  int val = xmlParseCharRef(ctxt);
3965 
3966  if (val == '&') {
3967  if (ctxt->replaceEntities) {
3968  if (len + 10 > buf_size) {
3969  growBuffer(buf, 10);
3970  }
3971  buf[len++] = '&';
3972  } else {
3973  /*
3974  * The reparsing will be done in xmlStringGetNodeList()
3975  * called by the attribute() function in SAX.c
3976  */
3977  if (len + 10 > buf_size) {
3978  growBuffer(buf, 10);
3979  }
3980  buf[len++] = '&';
3981  buf[len++] = '#';
3982  buf[len++] = '3';
3983  buf[len++] = '8';
3984  buf[len++] = ';';
3985  }
3986  } else if (val != 0) {
3987  if (len + 10 > buf_size) {
3988  growBuffer(buf, 10);
3989  }
3990  len += xmlCopyChar(0, &buf[len], val);
3991  }
3992  } else {
3993  ent = xmlParseEntityRef(ctxt);
3994  ctxt->nbentities++;
3995  if (ent != NULL)
3996  ctxt->nbentities += ent->owner;
3997  if ((ent != NULL) &&
3999  if (len + 10 > buf_size) {
4000  growBuffer(buf, 10);
4001  }
4002  if ((ctxt->replaceEntities == 0) &&
4003  (ent->content[0] == '&')) {
4004  buf[len++] = '&';
4005  buf[len++] = '#';
4006  buf[len++] = '3';
4007  buf[len++] = '8';
4008  buf[len++] = ';';
4009  } else {
4010  buf[len++] = ent->content[0];
4011  }
4012  } else if ((ent != NULL) &&
4013  (ctxt->replaceEntities != 0)) {
4014  if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4015  ++ctxt->depth;
4016  rep = xmlStringDecodeEntities(ctxt, ent->content,
4018  0, 0, 0);
4019  --ctxt->depth;
4020  if (rep != NULL) {
4021  current = rep;
4022  while (*current != 0) { /* non input consuming */
4023  if ((*current == 0xD) || (*current == 0xA) ||
4024  (*current == 0x9)) {
4025  buf[len++] = 0x20;
4026  current++;
4027  } else
4028  buf[len++] = *current++;
4029  if (len + 10 > buf_size) {
4030  growBuffer(buf, 10);
4031  }
4032  }
4033  xmlFree(rep);
4034  rep = NULL;
4035  }
4036  } else {
4037  if (len + 10 > buf_size) {
4038  growBuffer(buf, 10);
4039  }
4040  if (ent->content != NULL)
4041  buf[len++] = ent->content[0];
4042  }
4043  } else if (ent != NULL) {
4044  int i = xmlStrlen(ent->name);
4045  const xmlChar *cur = ent->name;
4046 
4047  /*
4048  * This may look absurd but is needed to detect
4049  * entities problems
4050  */
4051  if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052  (ent->content != NULL) && (ent->checked == 0)) {
4053  unsigned long oldnbent = ctxt->nbentities, diff;
4054 
4055  ++ctxt->depth;
4056  rep = xmlStringDecodeEntities(ctxt, ent->content,
4057  XML_SUBSTITUTE_REF, 0, 0, 0);
4058  --ctxt->depth;
4059 
4060  diff = ctxt->nbentities - oldnbent + 1;
4061  if (diff > INT_MAX / 2)
4062  diff = INT_MAX / 2;
4063  ent->checked = diff * 2;
4064  if (rep != NULL) {
4065  if (xmlStrchr(rep, '<'))
4066  ent->checked |= 1;
4067  xmlFree(rep);
4068  rep = NULL;
4069  } else {
4070  ent->content[0] = 0;
4071  }
4072  }
4073 
4074  /*
4075  * Just output the reference
4076  */
4077  buf[len++] = '&';
4078  while (len + i + 10 > buf_size) {
4079  growBuffer(buf, i + 10);
4080  }
4081  for (;i > 0;i--)
4082  buf[len++] = *cur++;
4083  buf[len++] = ';';
4084  }
4085  }
4086  } else {
4087  if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088  if ((len != 0) || (!normalize)) {
4089  if ((!normalize) || (!in_space)) {
4090  COPY_BUF(l,buf,len,0x20);
4091  while (len + 10 > buf_size) {
4092  growBuffer(buf, 10);
4093  }
4094  }
4095  in_space = 1;
4096  }
4097  } else {
4098  in_space = 0;
4099  COPY_BUF(l,buf,len,c);
4100  if (len + 10 > buf_size) {
4101  growBuffer(buf, 10);
4102  }
4103  }
4104  NEXTL(l);
4105  }
4106  GROW;
4107  c = CUR_CHAR(l);
4108  if (len > maxLength) {
4109  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110  "AttValue length too long\n");
4111  goto mem_error;
4112  }
4113  }
4114  if (ctxt->instate == XML_PARSER_EOF)
4115  goto error;
4116 
4117  if ((in_space) && (normalize)) {
4118  while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4119  }
4120  buf[len] = 0;
4121  if (RAW == '<') {
4123  } else if (RAW != limit) {
4124  if ((c != 0) && (!IS_CHAR(c))) {
4125  xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126  "invalid character in attribute value\n");
4127  } else {
4128  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129  "AttValue: ' expected\n");
4130  }
4131  } else
4132  NEXT;
4133 
4134  if (attlen != NULL) *attlen = (int) len;
4135  return(buf);
4136 
4137 mem_error:
4138  xmlErrMemory(ctxt, NULL);
4139 error:
4140  if (buf != NULL)
4141  xmlFree(buf);
4142  if (rep != NULL)
4143  xmlFree(rep);
4144  return(NULL);
4145 }
4146 
4181 xmlChar *
4183  if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184  return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4185 }
4186 
4198 xmlChar *
4200  xmlChar *buf = NULL;
4201  int len = 0;
4203  int cur, l;
4204  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4207  xmlChar stop;
4208  int state = ctxt->instate;
4209  int count = 0;
4210 
4211  SHRINK;
4212  if (RAW == '"') {
4213  NEXT;
4214  stop = '"';
4215  } else if (RAW == '\'') {
4216  NEXT;
4217  stop = '\'';
4218  } else {
4220  return(NULL);
4221  }
4222 
4223  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224  if (buf == NULL) {
4225  xmlErrMemory(ctxt, NULL);
4226  return(NULL);
4227  }
4229  cur = CUR_CHAR(l);
4230  while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231  if (len + 5 >= size) {
4232  xmlChar *tmp;
4233 
4234  size *= 2;
4235  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236  if (tmp == NULL) {
4237  xmlFree(buf);
4238  xmlErrMemory(ctxt, NULL);
4239  ctxt->instate = (xmlParserInputState) state;
4240  return(NULL);
4241  }
4242  buf = tmp;
4243  }
4244  count++;
4245  if (count > 50) {
4246  SHRINK;
4247  GROW;
4248  count = 0;
4249  if (ctxt->instate == XML_PARSER_EOF) {
4250  xmlFree(buf);
4251  return(NULL);
4252  }
4253  }
4254  COPY_BUF(l,buf,len,cur);
4255  NEXTL(l);
4256  cur = CUR_CHAR(l);
4257  if (cur == 0) {
4258  GROW;
4259  SHRINK;
4260  cur = CUR_CHAR(l);
4261  }
4262  if (len > maxLength) {
4263  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264  xmlFree(buf);
4265  ctxt->instate = (xmlParserInputState) state;
4266  return(NULL);
4267  }
4268  }
4269  buf[len] = 0;
4270  ctxt->instate = (xmlParserInputState) state;
4271  if (!IS_CHAR(cur)) {
4273  } else {
4274  NEXT;
4275  }
4276  return(buf);
4277 }
4278 
4290 xmlChar *
4292  xmlChar *buf = NULL;
4293  int len = 0;
4295  int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4298  xmlChar cur;
4299  xmlChar stop;
4300  int count = 0;
4301  xmlParserInputState oldstate = ctxt->instate;
4302 
4303  SHRINK;
4304  if (RAW == '"') {
4305  NEXT;
4306  stop = '"';
4307  } else if (RAW == '\'') {
4308  NEXT;
4309  stop = '\'';
4310  } else {
4312  return(NULL);
4313  }
4314  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315  if (buf == NULL) {
4316  xmlErrMemory(ctxt, NULL);
4317  return(NULL);
4318  }
4320  cur = CUR;
4321  while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322  if (len + 1 >= size) {
4323  xmlChar *tmp;
4324 
4325  size *= 2;
4326  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327  if (tmp == NULL) {
4328  xmlErrMemory(ctxt, NULL);
4329  xmlFree(buf);
4330  return(NULL);
4331  }
4332  buf = tmp;
4333  }
4334  buf[len++] = cur;
4335  count++;
4336  if (count > 50) {
4337  SHRINK;
4338  GROW;
4339  count = 0;
4340  if (ctxt->instate == XML_PARSER_EOF) {
4341  xmlFree(buf);
4342  return(NULL);
4343  }
4344  }
4345  NEXT;
4346  cur = CUR;
4347  if (cur == 0) {
4348  GROW;
4349  SHRINK;
4350  cur = CUR;
4351  }
4352  if (len > maxLength) {
4353  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354  xmlFree(buf);
4355  return(NULL);
4356  }
4357  }
4358  buf[len] = 0;
4359  if (cur != stop) {
4361  } else {
4362  NEXT;
4363  }
4364  ctxt->instate = oldstate;
4365  return(buf);
4366 }
4367 
4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4369 
4370 /*
4371  * used for the test in the inner loop of the char data testing
4372  */
4373 static const unsigned char test_char_data[256] = {
4374  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375  0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381  0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4406 };
4407 
4424 void
4426  const xmlChar *in;
4427  int nbchar = 0;
4428  int line = ctxt->input->line;
4429  int col = ctxt->input->col;
4430  int ccol;
4431 
4432  SHRINK;
4433  GROW;
4434  /*
4435  * Accelerated common case where input don't need to be
4436  * modified before passing it to the handler.
4437  */
4438  if (!cdata) {
4439  in = ctxt->input->cur;
4440  do {
4441 get_more_space:
4442  while (*in == 0x20) { in++; ctxt->input->col++; }
4443  if (*in == 0xA) {
4444  do {
4445  ctxt->input->line++; ctxt->input->col = 1;
4446  in++;
4447  } while (*in == 0xA);
4448  goto get_more_space;
4449  }
4450  if (*in == '<') {
4451  nbchar = in - ctxt->input->cur;
4452  if (nbchar > 0) {
4453  const xmlChar *tmp = ctxt->input->cur;
4454  ctxt->input->cur = in;
4455 
4456  if ((ctxt->sax != NULL) &&
4457  (ctxt->sax->ignorableWhitespace !=
4458  ctxt->sax->characters)) {
4459  if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460  if (ctxt->sax->ignorableWhitespace != NULL)
4461  ctxt->sax->ignorableWhitespace(ctxt->userData,
4462  tmp, nbchar);
4463  } else {
4464  if (ctxt->sax->characters != NULL)
4465  ctxt->sax->characters(ctxt->userData,
4466  tmp, nbchar);
4467  if (*ctxt->space == -1)
4468  *ctxt->space = -2;
4469  }
4470  } else if ((ctxt->sax != NULL) &&
4471  (ctxt->sax->characters != NULL)) {
4472  ctxt->sax->characters(ctxt->userData,
4473  tmp, nbchar);
4474  }
4475  }
4476  return;
4477  }
4478 
4479 get_more:
4480  ccol = ctxt->input->col;
4481  while (test_char_data[*in]) {
4482  in++;
4483  ccol++;
4484  }
4485  ctxt->input->col = ccol;
4486  if (*in == 0xA) {
4487  do {
4488  ctxt->input->line++; ctxt->input->col = 1;
4489  in++;
4490  } while (*in == 0xA);
4491  goto get_more;
4492  }
4493  if (*in == ']') {
4494  if ((in[1] == ']') && (in[2] == '>')) {
4496  ctxt->input->cur = in + 1;
4497  return;
4498  }
4499  in++;
4500  ctxt->input->col++;
4501  goto get_more;
4502  }
4503  nbchar = in - ctxt->input->cur;
4504  if (nbchar > 0) {
4505  if ((ctxt->sax != NULL) &&
4506  (ctxt->sax->ignorableWhitespace !=
4507  ctxt->sax->characters) &&
4508  (IS_BLANK_CH(*ctxt->input->cur))) {
4509  const xmlChar *tmp = ctxt->input->cur;
4510  ctxt->input->cur = in;
4511 
4512  if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513  if (ctxt->sax->ignorableWhitespace != NULL)
4514  ctxt->sax->ignorableWhitespace(ctxt->userData,
4515  tmp, nbchar);
4516  } else {
4517  if (ctxt->sax->characters != NULL)
4518  ctxt->sax->characters(ctxt->userData,
4519  tmp, nbchar);
4520  if (*ctxt->space == -1)
4521  *ctxt->space = -2;
4522  }
4523  line = ctxt->input->line;
4524  col = ctxt->input->col;
4525  } else if (ctxt->sax != NULL) {
4526  if (ctxt->sax->characters != NULL)
4527  ctxt->sax->characters(ctxt->userData,
4528  ctxt->input->cur, nbchar);
4529  line = ctxt->input->line;
4530  col = ctxt->input->col;
4531  }
4532  /* something really bad happened in the SAX callback */
4533  if (ctxt->instate != XML_PARSER_CONTENT)
4534  return;
4535  }
4536  ctxt->input->cur = in;
4537  if (*in == 0xD) {
4538  in++;
4539  if (*in == 0xA) {
4540  ctxt->input->cur = in;
4541  in++;
4542  ctxt->input->line++; ctxt->input->col = 1;
4543  continue; /* while */
4544  }
4545  in--;
4546  }
4547  if (*in == '<') {
4548  return;
4549  }
4550  if (*in == '&') {
4551  return;
4552  }
4553  SHRINK;
4554  GROW;
4555  if (ctxt->instate == XML_PARSER_EOF)
4556  return;
4557  in = ctxt->input->cur;
4558  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559  nbchar = 0;
4560  }
4561  ctxt->input->line = line;
4562  ctxt->input->col = col;
4563  xmlParseCharDataComplex(ctxt, cdata);
4564 }
4565 
4575 static void
4578  int nbchar = 0;
4579  int cur, l;
4580  int count = 0;
4581 
4582  SHRINK;
4583  GROW;
4584  cur = CUR_CHAR(l);
4585  while ((cur != '<') && /* checked */
4586  (cur != '&') &&
4587  (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588  if ((cur == ']') && (NXT(1) == ']') &&
4589  (NXT(2) == '>')) {
4590  if (cdata) break;
4591  else {
4593  }
4594  }
4595  COPY_BUF(l,buf,nbchar,cur);
4596  /* move current position before possible calling of ctxt->sax->characters */
4597  NEXTL(l);
4598  cur = CUR_CHAR(l);
4599  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600  buf[nbchar] = 0;
4601 
4602  /*
4603  * OK the segment is to be consumed as chars.
4604  */
4605  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606  if (areBlanks(ctxt, buf, nbchar, 0)) {
4607  if (ctxt->sax->ignorableWhitespace != NULL)
4608  ctxt->sax->ignorableWhitespace(ctxt->userData,
4609  buf, nbchar);
4610  } else {
4611  if (ctxt->sax->characters != NULL)
4612  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613  if ((ctxt->sax->characters !=
4614  ctxt->sax->ignorableWhitespace) &&
4615  (*ctxt->space == -1))
4616  *ctxt->space = -2;
4617  }
4618  }
4619  nbchar = 0;
4620  /* something really bad happened in the SAX callback */
4621  if (ctxt->instate != XML_PARSER_CONTENT)
4622  return;
4623  }
4624  count++;
4625  if (count > 50) {
4626  SHRINK;
4627  GROW;
4628  count = 0;
4629  if (ctxt->instate == XML_PARSER_EOF)
4630  return;
4631  }
4632  }
4633  if (nbchar != 0) {
4634  buf[nbchar] = 0;
4635  /*
4636  * OK the segment is to be consumed as chars.
4637  */
4638  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639  if (areBlanks(ctxt, buf, nbchar, 0)) {
4640  if (ctxt->sax->ignorableWhitespace != NULL)
4641  ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642  } else {
4643  if (ctxt->sax->characters != NULL)
4644  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645  if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646  (*ctxt->space == -1))
4647  *ctxt->space = -2;
4648  }
4649  }
4650  }
4651  if ((cur != 0) && (!IS_CHAR(cur))) {
4652  /* Generate the error and skip the offending character */
4653  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654  "PCDATA invalid Char value %d\n",
4655  cur);
4656  NEXTL(l);
4657  }
4658 }
4659 
4682 xmlChar *
4684  xmlChar *URI = NULL;
4685 
4686  SHRINK;
4687 
4688  *publicID = NULL;
4689  if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690  SKIP(6);
4691  if (SKIP_BLANKS == 0) {
4692  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693  "Space required after 'SYSTEM'\n");
4694  }
4695  URI = xmlParseSystemLiteral(ctxt);
4696  if (URI == NULL) {
4698  }
4699  } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700  SKIP(6);
4701  if (SKIP_BLANKS == 0) {
4702  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703  "Space required after 'PUBLIC'\n");
4704  }
4705  *publicID = xmlParsePubidLiteral(ctxt);
4706  if (*publicID == NULL) {
4708  }
4709  if (strict) {
4710  /*
4711  * We don't handle [83] so "S SystemLiteral" is required.
4712  */
4713  if (SKIP_BLANKS == 0) {
4714  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4715  "Space required after the Public Identifier\n");
4716  }
4717  } else {
4718  /*
4719  * We handle [83] so we return immediately, if
4720  * "S SystemLiteral" is not detected. We skip blanks if no
4721  * system literal was found, but this is harmless since we must
4722  * be at the end of a NotationDecl.
4723  */
4724  if (SKIP_BLANKS == 0) return(NULL);
4725  if ((CUR != '\'') && (CUR != '"')) return(NULL);
4726  }
4727  URI = xmlParseSystemLiteral(ctxt);
4728  if (URI == NULL) {
4730  }
4731  }
4732  return(URI);
4733 }
4734 
4749 static void
4751  size_t len, size_t size) {
4752  int q, ql;
4753  int r, rl;
4754  int cur, l;
4755  size_t count = 0;
4756  size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4759  int inputid;
4760 
4761  inputid = ctxt->input->id;
4762 
4763  if (buf == NULL) {
4764  len = 0;
4766  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4767  if (buf == NULL) {
4768  xmlErrMemory(ctxt, NULL);
4769  return;
4770  }
4771  }
4772  GROW; /* Assure there's enough input data */
4773  q = CUR_CHAR(ql);
4774  if (q == 0)
4775  goto not_terminated;
4776  if (!IS_CHAR(q)) {
4777  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4778  "xmlParseComment: invalid xmlChar value %d\n",
4779  q);
4780  xmlFree (buf);
4781  return;
4782  }
4783  NEXTL(ql);
4784  r = CUR_CHAR(rl);
4785  if (r == 0)
4786  goto not_terminated;
4787  if (!IS_CHAR(r)) {
4788  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789  "xmlParseComment: invalid xmlChar value %d\n",
4790  q);
4791  xmlFree (buf);
4792  return;
4793  }
4794  NEXTL(rl);
4795  cur = CUR_CHAR(l);
4796  if (cur == 0)
4797  goto not_terminated;
4798  while (IS_CHAR(cur) && /* checked */
4799  ((cur != '>') ||
4800  (r != '-') || (q != '-'))) {
4801  if ((r == '-') && (q == '-')) {
4803  }
4804  if (len + 5 >= size) {
4805  xmlChar *new_buf;
4806  size_t new_size;
4807 
4808  new_size = size * 2;
4809  new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4810  if (new_buf == NULL) {
4811  xmlFree (buf);
4812  xmlErrMemory(ctxt, NULL);
4813  return;
4814  }
4815  buf = new_buf;
4816  size = new_size;
4817  }
4818  COPY_BUF(ql,buf,len,q);
4819  q = r;
4820  ql = rl;
4821  r = cur;
4822  rl = l;
4823 
4824  count++;
4825  if (count > 50) {
4826  SHRINK;
4827  GROW;
4828  count = 0;
4829  if (ctxt->instate == XML_PARSER_EOF) {
4830  xmlFree(buf);
4831  return;
4832  }
4833  }
4834  NEXTL(l);
4835  cur = CUR_CHAR(l);
4836  if (cur == 0) {
4837  SHRINK;
4838  GROW;
4839  cur = CUR_CHAR(l);
4840  }
4841 
4842  if (len > maxLength) {
4843  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4844  "Comment too big found", NULL);
4845  xmlFree (buf);
4846  return;
4847  }
4848  }
4849  buf[len] = 0;
4850  if (cur == 0) {
4851  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4852  "Comment not terminated \n<!--%.50s\n", buf);
4853  } else if (!IS_CHAR(cur)) {
4854  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4855  "xmlParseComment: invalid xmlChar value %d\n",
4856  cur);
4857  } else {
4858  if (inputid != ctxt->input->id) {
4859  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4860  "Comment doesn't start and stop in the same"
4861  " entity\n");
4862  }
4863  NEXT;
4864  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4865  (!ctxt->disableSAX))
4866  ctxt->sax->comment(ctxt->userData, buf);
4867  }
4868  xmlFree(buf);
4869  return;
4870 not_terminated:
4871  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872  "Comment not terminated\n", NULL);
4873  xmlFree(buf);
4874  return;
4875 }
4876 
4887 void
4889  xmlChar *buf = NULL;
4890  size_t size = XML_PARSER_BUFFER_SIZE;
4891  size_t len = 0;
4892  size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4896  const xmlChar *in;
4897  size_t nbchar = 0;
4898  int ccol;
4899  int inputid;
4900 
4901  /*
4902  * Check that there is a comment right here.
4903  */
4904  if ((RAW != '<') || (NXT(1) != '!') ||
4905  (NXT(2) != '-') || (NXT(3) != '-')) return;
4906  state = ctxt->instate;
4907  ctxt->instate = XML_PARSER_COMMENT;
4908  inputid = ctxt->input->id;
4909  SKIP(4);
4910  SHRINK;
4911  GROW;
4912 
4913  /*
4914  * Accelerated common case where input don't need to be
4915  * modified before passing it to the handler.
4916  */
4917  in = ctxt->input->cur;
4918  do {
4919  if (*in == 0xA) {
4920  do {
4921  ctxt->input->line++; ctxt->input->col = 1;
4922  in++;
4923  } while (*in == 0xA);
4924  }
4925 get_more:
4926  ccol = ctxt->input->col;
4927  while (((*in > '-') && (*in <= 0x7F)) ||
4928  ((*in >= 0x20) && (*in < '-')) ||
4929  (*in == 0x09)) {
4930  in++;
4931  ccol++;
4932  }
4933  ctxt->input->col = ccol;
4934  if (*in == 0xA) {
4935  do {
4936  ctxt->input->line++; ctxt->input->col = 1;
4937  in++;
4938  } while (*in == 0xA);
4939  goto get_more;
4940  }
4941  nbchar = in - ctxt->input->cur;
4942  /*
4943  * save current set of data
4944  */
4945  if (nbchar > 0) {
4946  if ((ctxt->sax != NULL) &&
4947  (ctxt->sax->comment != NULL)) {
4948  if (buf == NULL) {
4949  if ((*in == '-') && (in[1] == '-'))
4950  size = nbchar + 1;
4951  else
4952  size = XML_PARSER_BUFFER_SIZE + nbchar;
4953  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4954  if (buf == NULL) {
4955  xmlErrMemory(ctxt, NULL);
4956  ctxt->instate = state;
4957  return;
4958  }
4959  len = 0;
4960  } else if (len + nbchar + 1 >= size) {
4961  xmlChar *new_buf;
4962  size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4963  new_buf = (xmlChar *) xmlRealloc(buf,
4964  size * sizeof(xmlChar));
4965  if (new_buf == NULL) {
4966  xmlFree (buf);
4967  xmlErrMemory(ctxt, NULL);
4968  ctxt->instate = state;
4969  return;
4970  }
4971  buf = new_buf;
4972  }
4973  memcpy(&buf[len], ctxt->input->cur, nbchar);
4974  len += nbchar;
4975  buf[len] = 0;
4976  }
4977  }
4978  if (len > maxLength) {
4979  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4980  "Comment too big found", NULL);
4981  xmlFree (buf);
4982  return;
4983  }
4984  ctxt->input->cur = in;
4985  if (*in == 0xA) {
4986  in++;
4987  ctxt->input->line++; ctxt->input->col = 1;
4988  }
4989  if (*in == 0xD) {
4990  in++;
4991  if (*in == 0xA) {
4992  ctxt->input->cur = in;
4993  in++;
4994  ctxt->input->line++; ctxt->input->col = 1;
4995  goto get_more;
4996  }
4997  in--;
4998  }
4999  SHRINK;
5000  GROW;
5001  if (ctxt->instate == XML_PARSER_EOF) {
5002  xmlFree(buf);
5003  return;
5004  }
5005  in = ctxt->input->cur;
5006  if (*in == '-') {
5007  if (in[1] == '-') {
5008  if (in[2] == '>') {
5009  if (ctxt->input->id != inputid) {
5010  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5011  "comment doesn't start and stop in the"
5012  " same entity\n"