ReactOS  0.4.15-dev-3451-gf606fec
parser.c
Go to the documentation of this file.
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  * implemented on top of the SAX interfaces
4  *
5  * References:
6  * The XML specification:
7  * http://www.w3.org/TR/REC-xml
8  * Original 1.0 version:
9  * http://www.w3.org/TR/1998/REC-xml-19980210
10  * XML second edition working draft
11  * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 struct _xmlStartTag {
91  const xmlChar *prefix;
92  const xmlChar *URI;
93  int line;
94  int nsNr;
95 };
96 
97 static void
99 
100 static xmlParserCtxtPtr
102  const xmlChar *base, xmlParserCtxtPtr pctx);
103 
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 
106 static int
108 
109 static void
111 
112 /************************************************************************
113  * *
114  * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115  * *
116  ************************************************************************/
117 
118 #define XML_PARSER_BIG_ENTITY 1000
119 #define XML_PARSER_LOT_ENTITY 5000
120 
121 /*
122  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123  * replacement over the size in byte of the input indicates that you have
124  * and exponential behaviour. A value of 10 correspond to at least 3 entity
125  * replacement per byte of input.
126  */
127 #define XML_PARSER_NON_LINEAR 10
128 
129 /*
130  * xmlParserEntityCheck
131  *
132  * Function to check non-linear entity expansion behaviour
133  * This is here to detect and stop exponential linear entity expansion
134  * This is not a limitation of the parser but a safety
135  * boundary feature. It can be disabled with the XML_PARSE_HUGE
136  * parser option.
137  */
138 static int
140  xmlEntityPtr ent, size_t replacement)
141 {
142  size_t consumed = 0;
143  int i;
144 
145  if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146  return (0);
147  if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148  return (1);
149 
150  /*
151  * This may look absurd but is needed to detect
152  * entities problems
153  */
154  if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
155  (ent->content != NULL) && (ent->checked == 0) &&
156  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
157  unsigned long oldnbent = ctxt->nbentities, diff;
158  xmlChar *rep;
159 
160  ent->checked = 1;
161 
162  ++ctxt->depth;
163  rep = xmlStringDecodeEntities(ctxt, ent->content,
164  XML_SUBSTITUTE_REF, 0, 0, 0);
165  --ctxt->depth;
166  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
167  ent->content[0] = 0;
168  }
169 
170  diff = ctxt->nbentities - oldnbent + 1;
171  if (diff > INT_MAX / 2)
172  diff = INT_MAX / 2;
173  ent->checked = diff * 2;
174  if (rep != NULL) {
175  if (xmlStrchr(rep, '<'))
176  ent->checked |= 1;
177  xmlFree(rep);
178  rep = NULL;
179  }
180  }
181 
182  /*
183  * Prevent entity exponential check, not just replacement while
184  * parsing the DTD
185  * The check is potentially costly so do that only once in a thousand
186  */
187  if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188  (ctxt->nbentities % 1024 == 0)) {
189  for (i = 0;i < ctxt->inputNr;i++) {
190  consumed += ctxt->inputTab[i]->consumed +
191  (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192  }
193  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
195  ctxt->instate = XML_PARSER_EOF;
196  return (1);
197  }
198  consumed = 0;
199  }
200 
201 
202 
203  if (replacement != 0) {
204  if (replacement < XML_MAX_TEXT_LENGTH)
205  return(0);
206 
207  /*
208  * If the volume of entity copy reaches 10 times the
209  * amount of parsed data and over the large text threshold
210  * then that's very likely to be an abuse.
211  */
212  if (ctxt->input != NULL) {
213  consumed = ctxt->input->consumed +
214  (ctxt->input->cur - ctxt->input->base);
215  }
216  consumed += ctxt->sizeentities;
217 
218  if (replacement < XML_PARSER_NON_LINEAR * consumed)
219  return(0);
220  } else if (size != 0) {
221  /*
222  * Do the check based on the replacement size of the entity
223  */
225  return(0);
226 
227  /*
228  * A limit on the amount of text data reasonably used
229  */
230  if (ctxt->input != NULL) {
231  consumed = ctxt->input->consumed +
232  (ctxt->input->cur - ctxt->input->base);
233  }
234  consumed += ctxt->sizeentities;
235 
236  if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237  (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238  return (0);
239  } else if (ent != NULL) {
240  /*
241  * use the number of parsed entities in the replacement
242  */
243  size = ent->checked / 2;
244 
245  /*
246  * The amount of data parsed counting entities size only once
247  */
248  if (ctxt->input != NULL) {
249  consumed = ctxt->input->consumed +
250  (ctxt->input->cur - ctxt->input->base);
251  }
252  consumed += ctxt->sizeentities;
253 
254  /*
255  * Check the density of entities for the amount of data
256  * knowing an entity reference will take at least 3 bytes
257  */
258  if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259  return (0);
260  } else {
261  /*
262  * strange we got no data for checking
263  */
264  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
266  (ctxt->nbentities <= 10000))
267  return (0);
268  }
270  return (1);
271 }
272 
281 unsigned int xmlParserMaxDepth = 256;
282 
283 
284 
285 #define SAX2 1
286 #define XML_PARSER_BIG_BUFFER_SIZE 300
287 #define XML_PARSER_BUFFER_SIZE 100
288 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289 
299 #define XML_PARSER_CHUNK_SIZE 100
300 
301 /*
302  * List of XML prefixed PI allowed by W3C specs
303  */
304 
305 static const char *xmlW3CPIs[] = {
306  "xml-stylesheet",
307  "xml-model",
308  NULL
309 };
310 
311 
312 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
314  const xmlChar **str);
315 
316 static xmlParserErrors
319  void *user_data, int depth, const xmlChar *URL,
320  const xmlChar *ID, xmlNodePtr *list);
321 
322 static int
324  const char *encoding);
325 #ifdef LIBXML_LEGACY_ENABLED
326 static void
327 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328  xmlNodePtr lastNode);
329 #endif /* LIBXML_LEGACY_ENABLED */
330 
331 static xmlParserErrors
333  const xmlChar *string, void *user_data, xmlNodePtr *lst);
334 
335 static int
337 
338 /************************************************************************
339  * *
340  * Some factorized error routines *
341  * *
342  ************************************************************************/
343 
352 static void
354  const xmlChar * localname)
355 {
356  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357  (ctxt->instate == XML_PARSER_EOF))
358  return;
359  if (ctxt != NULL)
361 
362  if (prefix == NULL)
363  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
365  (const char *) localname, NULL, NULL, 0, 0,
366  "Attribute %s redefined\n", localname);
367  else
368  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
370  (const char *) prefix, (const char *) localname,
371  NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372  localname);
373  if (ctxt != NULL) {
374  ctxt->wellFormed = 0;
375  if (ctxt->recovery == 0)
376  ctxt->disableSAX = 1;
377  }
378 }
379 
388 static void
390 {
391  const char *errmsg;
392 
393  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394  (ctxt->instate == XML_PARSER_EOF))
395  return;
396  switch (error) {
398  errmsg = "CharRef: invalid hexadecimal value";
399  break;
401  errmsg = "CharRef: invalid decimal value";
402  break;
404  errmsg = "CharRef: invalid value";
405  break;
407  errmsg = "internal error";
408  break;
410  errmsg = "PEReference at end of document";
411  break;
413  errmsg = "PEReference in prolog";
414  break;
416  errmsg = "PEReference in epilog";
417  break;
419  errmsg = "PEReference: no name";
420  break;
422  errmsg = "PEReference: expecting ';'";
423  break;
424  case XML_ERR_ENTITY_LOOP:
425  errmsg = "Detected an entity reference loop";
426  break;
428  errmsg = "EntityValue: \" or ' expected";
429  break;
431  errmsg = "PEReferences forbidden in internal subset";
432  break;
434  errmsg = "EntityValue: \" or ' expected";
435  break;
437  errmsg = "AttValue: \" or ' expected";
438  break;
440  errmsg = "Unescaped '<' not allowed in attributes values";
441  break;
443  errmsg = "SystemLiteral \" or ' expected";
444  break;
446  errmsg = "Unfinished System or Public ID \" or ' expected";
447  break;
449  errmsg = "Sequence ']]>' not allowed in content";
450  break;
452  errmsg = "SYSTEM or PUBLIC, the URI is missing";
453  break;
455  errmsg = "PUBLIC, the Public Identifier is missing";
456  break;
458  errmsg = "Comment must not contain '--' (double-hyphen)";
459  break;
461  errmsg = "xmlParsePI : no target name";
462  break;
464  errmsg = "Invalid PI name";
465  break;
467  errmsg = "NOTATION: Name expected here";
468  break;
470  errmsg = "'>' required to close NOTATION declaration";
471  break;
473  errmsg = "Entity value required";
474  break;
476  errmsg = "Fragment not allowed";
477  break;
479  errmsg = "'(' required to start ATTLIST enumeration";
480  break;
482  errmsg = "NmToken expected in ATTLIST enumeration";
483  break;
485  errmsg = "')' required to finish ATTLIST enumeration";
486  break;
488  errmsg = "MixedContentDecl : '|' or ')*' expected";
489  break;
491  errmsg = "MixedContentDecl : '#PCDATA' expected";
492  break;
494  errmsg = "ContentDecl : Name or '(' expected";
495  break;
497  errmsg = "ContentDecl : ',' '|' or ')' expected";
498  break;
500  errmsg =
501  "PEReference: forbidden within markup decl in internal subset";
502  break;
503  case XML_ERR_GT_REQUIRED:
504  errmsg = "expected '>'";
505  break;
507  errmsg = "XML conditional section '[' expected";
508  break;
510  errmsg = "Content error in the external subset";
511  break;
513  errmsg =
514  "conditional section INCLUDE or IGNORE keyword expected";
515  break;
517  errmsg = "XML conditional section not closed";
518  break;
520  errmsg = "Text declaration '<?xml' required";
521  break;
523  errmsg = "parsing XML declaration: '?>' expected";
524  break;
526  errmsg = "external parsed entities cannot be standalone";
527  break;
529  errmsg = "EntityRef: expecting ';'";
530  break;
532  errmsg = "DOCTYPE improperly terminated";
533  break;
535  errmsg = "EndTag: '</' not found";
536  break;
538  errmsg = "expected '='";
539  break;
541  errmsg = "String not closed expecting \" or '";
542  break;
544  errmsg = "String not started expecting ' or \"";
545  break;
547  errmsg = "Invalid XML encoding name";
548  break;
550  errmsg = "standalone accepts only 'yes' or 'no'";
551  break;
553  errmsg = "Document is empty";
554  break;
556  errmsg = "Extra content at the end of the document";
557  break;
559  errmsg = "chunk is not well balanced";
560  break;
562  errmsg = "extra content at the end of well balanced chunk";
563  break;
565  errmsg = "Malformed declaration expecting version";
566  break;
568  errmsg = "Name too long use XML_PARSE_HUGE option";
569  break;
570 #if 0
571  case:
572  errmsg = "";
573  break;
574 #endif
575  default:
576  errmsg = "Unregistered error message";
577  }
578  if (ctxt != NULL)
579  ctxt->errNo = error;
580  if (info == NULL) {
581  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583  errmsg);
584  } else {
585  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587  errmsg, info);
588  }
589  if (ctxt != NULL) {
590  ctxt->wellFormed = 0;
591  if (ctxt->recovery == 0)
592  ctxt->disableSAX = 1;
593  }
594 }
595 
604 static void LIBXML_ATTR_FORMAT(3,0)
605 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606  const char *msg)
607 {
608  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609  (ctxt->instate == XML_PARSER_EOF))
610  return;
611  if (ctxt != NULL)
612  ctxt->errNo = error;
613  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
614  XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
615  if (ctxt != NULL) {
616  ctxt->wellFormed = 0;
617  if (ctxt->recovery == 0)
618  ctxt->disableSAX = 1;
619  }
620 }
621 
632 static void LIBXML_ATTR_FORMAT(3,0)
633 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634  const char *msg, const xmlChar *str1, const xmlChar *str2)
635 {
636  xmlStructuredErrorFunc schannel = NULL;
637 
638  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639  (ctxt->instate == XML_PARSER_EOF))
640  return;
641  if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642  (ctxt->sax->initialized == XML_SAX2_MAGIC))
643  schannel = ctxt->sax->serror;
644  if (ctxt != NULL) {
645  __xmlRaiseError(schannel,
646  (ctxt->sax) ? ctxt->sax->warning : NULL,
647  ctxt->userData,
648  ctxt, NULL, XML_FROM_PARSER, error,
649  XML_ERR_WARNING, NULL, 0,
650  (const char *) str1, (const char *) str2, NULL, 0, 0,
651  msg, (const char *) str1, (const char *) str2);
652  } else {
653  __xmlRaiseError(schannel, NULL, NULL,
654  ctxt, NULL, XML_FROM_PARSER, error,
655  XML_ERR_WARNING, NULL, 0,
656  (const char *) str1, (const char *) str2, NULL, 0, 0,
657  msg, (const char *) str1, (const char *) str2);
658  }
659 }
660 
670 static void LIBXML_ATTR_FORMAT(3,0)
671 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
672  const char *msg, const xmlChar *str1, const xmlChar *str2)
673 {
674  xmlStructuredErrorFunc schannel = NULL;
675 
676  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677  (ctxt->instate == XML_PARSER_EOF))
678  return;
679  if (ctxt != NULL) {
680  ctxt->errNo = error;
681  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682  schannel = ctxt->sax->serror;
683  }
684  if (ctxt != NULL) {
685  __xmlRaiseError(schannel,
686  ctxt->vctxt.error, ctxt->vctxt.userData,
687  ctxt, NULL, XML_FROM_DTD, error,
688  XML_ERR_ERROR, NULL, 0, (const char *) str1,
689  (const char *) str2, NULL, 0, 0,
690  msg, (const char *) str1, (const char *) str2);
691  ctxt->valid = 0;
692  } else {
693  __xmlRaiseError(schannel, NULL, NULL,
694  ctxt, NULL, XML_FROM_DTD, error,
695  XML_ERR_ERROR, NULL, 0, (const char *) str1,
696  (const char *) str2, NULL, 0, 0,
697  msg, (const char *) str1, (const char *) str2);
698  }
699 }
700 
710 static void LIBXML_ATTR_FORMAT(3,0)
711 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
712  const char *msg, int val)
713 {
714  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715  (ctxt->instate == XML_PARSER_EOF))
716  return;
717  if (ctxt != NULL)
718  ctxt->errNo = error;
719  __xmlRaiseError(NULL, NULL, NULL,
721  NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
722  if (ctxt != NULL) {
723  ctxt->wellFormed = 0;
724  if (ctxt->recovery == 0)
725  ctxt->disableSAX = 1;
726  }
727 }
728 
740 static void LIBXML_ATTR_FORMAT(3,0)
741 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742  const char *msg, const xmlChar *str1, int val,
743  const xmlChar *str2)
744 {
745  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746  (ctxt->instate == XML_PARSER_EOF))
747  return;
748  if (ctxt != NULL)
749  ctxt->errNo = error;
750  __xmlRaiseError(NULL, NULL, NULL,
752  NULL, 0, (const char *) str1, (const char *) str2,
753  NULL, val, 0, msg, str1, val, str2);
754  if (ctxt != NULL) {
755  ctxt->wellFormed = 0;
756  if (ctxt->recovery == 0)
757  ctxt->disableSAX = 1;
758  }
759 }
760 
770 static void LIBXML_ATTR_FORMAT(3,0)
771 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
772  const char *msg, const xmlChar * val)
773 {
774  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775  (ctxt->instate == XML_PARSER_EOF))
776  return;
777  if (ctxt != NULL)
778  ctxt->errNo = error;
779  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
781  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782  val);
783  if (ctxt != NULL) {
784  ctxt->wellFormed = 0;
785  if (ctxt->recovery == 0)
786  ctxt->disableSAX = 1;
787  }
788 }
789 
799 static void LIBXML_ATTR_FORMAT(3,0)
800 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801  const char *msg, const xmlChar * val)
802 {
803  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804  (ctxt->instate == XML_PARSER_EOF))
805  return;
806  if (ctxt != NULL)
807  ctxt->errNo = error;
808  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
810  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811  val);
812 }
813 
824 static void LIBXML_ATTR_FORMAT(3,0)
825 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826  const char *msg,
827  const xmlChar * info1, const xmlChar * info2,
828  const xmlChar * info3)
829 {
830  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831  (ctxt->instate == XML_PARSER_EOF))
832  return;
833  if (ctxt != NULL)
834  ctxt->errNo = error;
835  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
836  XML_ERR_ERROR, NULL, 0, (const char *) info1,
837  (const char *) info2, (const char *) info3, 0, 0, msg,
838  info1, info2, info3);
839  if (ctxt != NULL)
840  ctxt->nsWellFormed = 0;
841 }
842 
853 static void LIBXML_ATTR_FORMAT(3,0)
854 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855  const char *msg,
856  const xmlChar * info1, const xmlChar * info2,
857  const xmlChar * info3)
858 {
859  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860  (ctxt->instate == XML_PARSER_EOF))
861  return;
862  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863  XML_ERR_WARNING, NULL, 0, (const char *) info1,
864  (const char *) info2, (const char *) info3, 0, 0, msg,
865  info1, info2, info3);
866 }
867 
868 /************************************************************************
869  * *
870  * Library wide options *
871  * *
872  ************************************************************************/
873 
884 int
886 {
887  switch (feature) {
888  case XML_WITH_THREAD:
889 #ifdef LIBXML_THREAD_ENABLED
890  return(1);
891 #else
892  return(0);
893 #endif
894  case XML_WITH_TREE:
895 #ifdef LIBXML_TREE_ENABLED
896  return(1);
897 #else
898  return(0);
899 #endif
900  case XML_WITH_OUTPUT:
901 #ifdef LIBXML_OUTPUT_ENABLED
902  return(1);
903 #else
904  return(0);
905 #endif
906  case XML_WITH_PUSH:
907 #ifdef LIBXML_PUSH_ENABLED
908  return(1);
909 #else
910  return(0);
911 #endif
912  case XML_WITH_READER:
913 #ifdef LIBXML_READER_ENABLED
914  return(1);
915 #else
916  return(0);
917 #endif
918  case XML_WITH_PATTERN:
919 #ifdef LIBXML_PATTERN_ENABLED
920  return(1);
921 #else
922  return(0);
923 #endif
924  case XML_WITH_WRITER:
925 #ifdef LIBXML_WRITER_ENABLED
926  return(1);
927 #else
928  return(0);
929 #endif
930  case XML_WITH_SAX1:
931 #ifdef LIBXML_SAX1_ENABLED
932  return(1);
933 #else
934  return(0);
935 #endif
936  case XML_WITH_FTP:
937 #ifdef LIBXML_FTP_ENABLED
938  return(1);
939 #else
940  return(0);
941 #endif
942  case XML_WITH_HTTP:
943 #ifdef LIBXML_HTTP_ENABLED
944  return(1);
945 #else
946  return(0);
947 #endif
948  case XML_WITH_VALID:
949 #ifdef LIBXML_VALID_ENABLED
950  return(1);
951 #else
952  return(0);
953 #endif
954  case XML_WITH_HTML:
955 #ifdef LIBXML_HTML_ENABLED
956  return(1);
957 #else
958  return(0);
959 #endif
960  case XML_WITH_LEGACY:
961 #ifdef LIBXML_LEGACY_ENABLED
962  return(1);
963 #else
964  return(0);
965 #endif
966  case XML_WITH_C14N:
967 #ifdef LIBXML_C14N_ENABLED
968  return(1);
969 #else
970  return(0);
971 #endif
972  case XML_WITH_CATALOG:
973 #ifdef LIBXML_CATALOG_ENABLED
974  return(1);
975 #else
976  return(0);
977 #endif
978  case XML_WITH_XPATH:
979 #ifdef LIBXML_XPATH_ENABLED
980  return(1);
981 #else
982  return(0);
983 #endif
984  case XML_WITH_XPTR:
985 #ifdef LIBXML_XPTR_ENABLED
986  return(1);
987 #else
988  return(0);
989 #endif
990  case XML_WITH_XINCLUDE:
991 #ifdef LIBXML_XINCLUDE_ENABLED
992  return(1);
993 #else
994  return(0);
995 #endif
996  case XML_WITH_ICONV:
997 #ifdef LIBXML_ICONV_ENABLED
998  return(1);
999 #else
1000  return(0);
1001 #endif
1002  case XML_WITH_ISO8859X:
1003 #ifdef LIBXML_ISO8859X_ENABLED
1004  return(1);
1005 #else
1006  return(0);
1007 #endif
1008  case XML_WITH_UNICODE:
1009 #ifdef LIBXML_UNICODE_ENABLED
1010  return(1);
1011 #else
1012  return(0);
1013 #endif
1014  case XML_WITH_REGEXP:
1015 #ifdef LIBXML_REGEXP_ENABLED
1016  return(1);
1017 #else
1018  return(0);
1019 #endif
1020  case XML_WITH_AUTOMATA:
1021 #ifdef LIBXML_AUTOMATA_ENABLED
1022  return(1);
1023 #else
1024  return(0);
1025 #endif
1026  case XML_WITH_EXPR:
1027 #ifdef LIBXML_EXPR_ENABLED
1028  return(1);
1029 #else
1030  return(0);
1031 #endif
1032  case XML_WITH_SCHEMAS:
1033 #ifdef LIBXML_SCHEMAS_ENABLED
1034  return(1);
1035 #else
1036  return(0);
1037 #endif
1038  case XML_WITH_SCHEMATRON:
1039 #ifdef LIBXML_SCHEMATRON_ENABLED
1040  return(1);
1041 #else
1042  return(0);
1043 #endif
1044  case XML_WITH_MODULES:
1045 #ifdef LIBXML_MODULES_ENABLED
1046  return(1);
1047 #else
1048  return(0);
1049 #endif
1050  case XML_WITH_DEBUG:
1051 #ifdef LIBXML_DEBUG_ENABLED
1052  return(1);
1053 #else
1054  return(0);
1055 #endif
1056  case XML_WITH_DEBUG_MEM:
1057 #ifdef DEBUG_MEMORY_LOCATION
1058  return(1);
1059 #else
1060  return(0);
1061 #endif
1062  case XML_WITH_DEBUG_RUN:
1063 #ifdef LIBXML_DEBUG_RUNTIME
1064  return(1);
1065 #else
1066  return(0);
1067 #endif
1068  case XML_WITH_ZLIB:
1069 #ifdef LIBXML_ZLIB_ENABLED
1070  return(1);
1071 #else
1072  return(0);
1073 #endif
1074  case XML_WITH_LZMA:
1075 #ifdef LIBXML_LZMA_ENABLED
1076  return(1);
1077 #else
1078  return(0);
1079 #endif
1080  case XML_WITH_ICU:
1081 #ifdef LIBXML_ICU_ENABLED
1082  return(1);
1083 #else
1084  return(0);
1085 #endif
1086  default:
1087  break;
1088  }
1089  return(0);
1090 }
1091 
1092 /************************************************************************
1093  * *
1094  * SAX2 defaulted attributes handling *
1095  * *
1096  ************************************************************************/
1097 
1104 static void
1107  if (ctxt == NULL) return;
1108  sax = ctxt->sax;
1109 #ifdef LIBXML_SAX1_ENABLED
1110  if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1111  ((sax->startElementNs != NULL) ||
1112  (sax->endElementNs != NULL) ||
1113  ((sax->startElement == NULL) && (sax->endElement == NULL))))
1114  ctxt->sax2 = 1;
1115 #else
1116  ctxt->sax2 = 1;
1117 #endif /* LIBXML_SAX1_ENABLED */
1118 
1119  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1120  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1121  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1122  if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1123  (ctxt->str_xml_ns == NULL)) {
1124  xmlErrMemory(ctxt, NULL);
1125  }
1126 }
1127 
1128 typedef struct _xmlDefAttrs xmlDefAttrs;
1131  int nbAttrs; /* number of defaulted attributes on that element */
1132  int maxAttrs; /* the size of the array */
1133 #if __STDC_VERSION__ >= 199901L
1134  /* Using a C99 flexible array member avoids UBSan errors. */
1135  const xmlChar *values[]; /* array of localname/prefix/values/external */
1136 #else
1137  const xmlChar *values[5];
1138 #endif
1139 };
1140 
1158 static xmlChar *
1160 {
1161  if ((src == NULL) || (dst == NULL))
1162  return(NULL);
1163 
1164  while (*src == 0x20) src++;
1165  while (*src != 0) {
1166  if (*src == 0x20) {
1167  while (*src == 0x20) src++;
1168  if (*src != 0)
1169  *dst++ = 0x20;
1170  } else {
1171  *dst++ = *src++;
1172  }
1173  }
1174  *dst = 0;
1175  if (dst == src)
1176  return(NULL);
1177  return(dst);
1178 }
1179 
1191 static const xmlChar *
1193 {
1194  int i;
1195  int remove_head = 0;
1196  int need_realloc = 0;
1197  const xmlChar *cur;
1198 
1199  if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1200  return(NULL);
1201  i = *len;
1202  if (i <= 0)
1203  return(NULL);
1204 
1205  cur = src;
1206  while (*cur == 0x20) {
1207  cur++;
1208  remove_head++;
1209  }
1210  while (*cur != 0) {
1211  if (*cur == 0x20) {
1212  cur++;
1213  if ((*cur == 0x20) || (*cur == 0)) {
1214  need_realloc = 1;
1215  break;
1216  }
1217  } else
1218  cur++;
1219  }
1220  if (need_realloc) {
1221  xmlChar *ret;
1222 
1223  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1224  if (ret == NULL) {
1225  xmlErrMemory(ctxt, NULL);
1226  return(NULL);
1227  }
1229  *len = (int) strlen((const char *)ret);
1230  return(ret);
1231  } else if (remove_head) {
1232  *len -= remove_head;
1233  memmove(src, src + remove_head, 1 + *len);
1234  return(src);
1235  }
1236  return(NULL);
1237 }
1238 
1248 static void
1250  const xmlChar *fullname,
1251  const xmlChar *fullattr,
1252  const xmlChar *value) {
1254  int len;
1255  const xmlChar *name;
1256  const xmlChar *prefix;
1257 
1258  /*
1259  * Allows to detect attribute redefinitions
1260  */
1261  if (ctxt->attsSpecial != NULL) {
1262  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1263  return;
1264  }
1265 
1266  if (ctxt->attsDefault == NULL) {
1267  ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1268  if (ctxt->attsDefault == NULL)
1269  goto mem_error;
1270  }
1271 
1272  /*
1273  * split the element name into prefix:localname , the string found
1274  * are within the DTD and then not associated to namespace names.
1275  */
1277  if (name == NULL) {
1278  name = xmlDictLookup(ctxt->dict, fullname, -1);
1279  prefix = NULL;
1280  } else {
1281  name = xmlDictLookup(ctxt->dict, name, -1);
1282  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1283  }
1284 
1285  /*
1286  * make sure there is some storage
1287  */
1288  defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1289  if (defaults == NULL) {
1291  (4 * 5) * sizeof(const xmlChar *));
1292  if (defaults == NULL)
1293  goto mem_error;
1294  defaults->nbAttrs = 0;
1295  defaults->maxAttrs = 4;
1296  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297  defaults, NULL) < 0) {
1298  xmlFree(defaults);
1299  goto mem_error;
1300  }
1301  } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1303 
1305  (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1306  if (temp == NULL)
1307  goto mem_error;
1308  defaults = temp;
1309  defaults->maxAttrs *= 2;
1310  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1311  defaults, NULL) < 0) {
1312  xmlFree(defaults);
1313  goto mem_error;
1314  }
1315  }
1316 
1317  /*
1318  * Split the element name into prefix:localname , the string found
1319  * are within the DTD and hen not associated to namespace names.
1320  */
1321  name = xmlSplitQName3(fullattr, &len);
1322  if (name == NULL) {
1323  name = xmlDictLookup(ctxt->dict, fullattr, -1);
1324  prefix = NULL;
1325  } else {
1326  name = xmlDictLookup(ctxt->dict, name, -1);
1327  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1328  }
1329 
1330  defaults->values[5 * defaults->nbAttrs] = name;
1331  defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1332  /* intern the string and precompute the end */
1333  len = xmlStrlen(value);
1334  value = xmlDictLookup(ctxt->dict, value, len);
1335  defaults->values[5 * defaults->nbAttrs + 2] = value;
1336  defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1337  if (ctxt->external)
1338  defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1339  else
1340  defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1341  defaults->nbAttrs++;
1342 
1343  return;
1344 
1345 mem_error:
1346  xmlErrMemory(ctxt, NULL);
1347  return;
1348 }
1349 
1359 static void
1361  const xmlChar *fullname,
1362  const xmlChar *fullattr,
1363  int type)
1364 {
1365  if (ctxt->attsSpecial == NULL) {
1366  ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1367  if (ctxt->attsSpecial == NULL)
1368  goto mem_error;
1369  }
1370 
1371  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1372  return;
1373 
1374  xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1375  (void *) (ptrdiff_t) type);
1376  return;
1377 
1378 mem_error:
1379  xmlErrMemory(ctxt, NULL);
1380  return;
1381 }
1382 
1388 static void
1389 xmlCleanSpecialAttrCallback(void *payload, void *data,
1390  const xmlChar *fullname, const xmlChar *fullattr,
1391  const xmlChar *unused ATTRIBUTE_UNUSED) {
1393 
1394  if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1395  xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1396  }
1397 }
1398 
1407 static void
1409 {
1410  if (ctxt->attsSpecial == NULL)
1411  return;
1412 
1414 
1415  if (xmlHashSize(ctxt->attsSpecial) == 0) {
1416  xmlHashFree(ctxt->attsSpecial, NULL);
1417  ctxt->attsSpecial = NULL;
1418  }
1419  return;
1420 }
1421 
1480 int
1482 {
1483  const xmlChar *cur = lang, *nxt;
1484 
1485  if (cur == NULL)
1486  return (0);
1487  if (((cur[0] == 'i') && (cur[1] == '-')) ||
1488  ((cur[0] == 'I') && (cur[1] == '-')) ||
1489  ((cur[0] == 'x') && (cur[1] == '-')) ||
1490  ((cur[0] == 'X') && (cur[1] == '-'))) {
1491  /*
1492  * Still allow IANA code and user code which were coming
1493  * from the previous version of the XML-1.0 specification
1494  * it's deprecated but we should not fail
1495  */
1496  cur += 2;
1497  while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1498  ((cur[0] >= 'a') && (cur[0] <= 'z')))
1499  cur++;
1500  return(cur[0] == 0);
1501  }
1502  nxt = cur;
1503  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505  nxt++;
1506  if (nxt - cur >= 4) {
1507  /*
1508  * Reserved
1509  */
1510  if ((nxt - cur > 8) || (nxt[0] != 0))
1511  return(0);
1512  return(1);
1513  }
1514  if (nxt - cur < 2)
1515  return(0);
1516  /* we got an ISO 639 code */
1517  if (nxt[0] == 0)
1518  return(1);
1519  if (nxt[0] != '-')
1520  return(0);
1521 
1522  nxt++;
1523  cur = nxt;
1524  /* now we can have extlang or script or region or variant */
1525  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526  goto region_m49;
1527 
1528  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530  nxt++;
1531  if (nxt - cur == 4)
1532  goto script;
1533  if (nxt - cur == 2)
1534  goto region;
1535  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536  goto variant;
1537  if (nxt - cur != 3)
1538  return(0);
1539  /* we parsed an extlang */
1540  if (nxt[0] == 0)
1541  return(1);
1542  if (nxt[0] != '-')
1543  return(0);
1544 
1545  nxt++;
1546  cur = nxt;
1547  /* now we can have script or region or variant */
1548  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1549  goto region_m49;
1550 
1551  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1552  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1553  nxt++;
1554  if (nxt - cur == 2)
1555  goto region;
1556  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1557  goto variant;
1558  if (nxt - cur != 4)
1559  return(0);
1560  /* we parsed a script */
1561 script:
1562  if (nxt[0] == 0)
1563  return(1);
1564  if (nxt[0] != '-')
1565  return(0);
1566 
1567  nxt++;
1568  cur = nxt;
1569  /* now we can have region or variant */
1570  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1571  goto region_m49;
1572 
1573  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1574  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1575  nxt++;
1576 
1577  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1578  goto variant;
1579  if (nxt - cur != 2)
1580  return(0);
1581  /* we parsed a region */
1582 region:
1583  if (nxt[0] == 0)
1584  return(1);
1585  if (nxt[0] != '-')
1586  return(0);
1587 
1588  nxt++;
1589  cur = nxt;
1590  /* now we can just have a variant */
1591  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1592  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1593  nxt++;
1594 
1595  if ((nxt - cur < 5) || (nxt - cur > 8))
1596  return(0);
1597 
1598  /* we parsed a variant */
1599 variant:
1600  if (nxt[0] == 0)
1601  return(1);
1602  if (nxt[0] != '-')
1603  return(0);
1604  /* extensions and private use subtags not checked */
1605  return (1);
1606 
1607 region_m49:
1608  if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1609  ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1610  nxt += 3;
1611  goto region;
1612  }
1613  return(0);
1614 }
1615 
1616 /************************************************************************
1617  * *
1618  * Parser stacks related functions and macros *
1619  * *
1620  ************************************************************************/
1621 
1623  const xmlChar ** str);
1624 
1625 #ifdef SAX2
1626 
1637 static int
1638 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1639 {
1640  if (ctxt->options & XML_PARSE_NSCLEAN) {
1641  int i;
1642  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1643  if (ctxt->nsTab[i] == prefix) {
1644  /* in scope */
1645  if (ctxt->nsTab[i + 1] == URL)
1646  return(-2);
1647  /* out of scope keep it */
1648  break;
1649  }
1650  }
1651  }
1652  if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1653  ctxt->nsMax = 10;
1654  ctxt->nsNr = 0;
1655  ctxt->nsTab = (const xmlChar **)
1656  xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1657  if (ctxt->nsTab == NULL) {
1658  xmlErrMemory(ctxt, NULL);
1659  ctxt->nsMax = 0;
1660  return (-1);
1661  }
1662  } else if (ctxt->nsNr >= ctxt->nsMax) {
1663  const xmlChar ** tmp;
1664  ctxt->nsMax *= 2;
1665  tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1666  ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1667  if (tmp == NULL) {
1668  xmlErrMemory(ctxt, NULL);
1669  ctxt->nsMax /= 2;
1670  return (-1);
1671  }
1672  ctxt->nsTab = tmp;
1673  }
1674  ctxt->nsTab[ctxt->nsNr++] = prefix;
1675  ctxt->nsTab[ctxt->nsNr++] = URL;
1676  return (ctxt->nsNr);
1677 }
1687 static int
1689 {
1690  int i;
1691 
1692  if (ctxt->nsTab == NULL) return(0);
1693  if (ctxt->nsNr < nr) {
1694  xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1695  nr = ctxt->nsNr;
1696  }
1697  if (ctxt->nsNr <= 0)
1698  return (0);
1699 
1700  for (i = 0;i < nr;i++) {
1701  ctxt->nsNr--;
1702  ctxt->nsTab[ctxt->nsNr] = NULL;
1703  }
1704  return(nr);
1705 }
1706 #endif
1707 
1708 static int
1710  const xmlChar **atts;
1711  int *attallocs;
1712  int maxatts;
1713 
1714  if (ctxt->atts == NULL) {
1715  maxatts = 55; /* allow for 10 attrs by default */
1716  atts = (const xmlChar **)
1717  xmlMalloc(maxatts * sizeof(xmlChar *));
1718  if (atts == NULL) goto mem_error;
1719  ctxt->atts = atts;
1720  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1721  if (attallocs == NULL) goto mem_error;
1722  ctxt->attallocs = attallocs;
1723  ctxt->maxatts = maxatts;
1724  } else if (nr + 5 > ctxt->maxatts) {
1725  maxatts = (nr + 5) * 2;
1726  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1727  maxatts * sizeof(const xmlChar *));
1728  if (atts == NULL) goto mem_error;
1729  ctxt->atts = atts;
1730  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1731  (maxatts / 5) * sizeof(int));
1732  if (attallocs == NULL) goto mem_error;
1733  ctxt->attallocs = attallocs;
1734  ctxt->maxatts = maxatts;
1735  }
1736  return(ctxt->maxatts);
1737 mem_error:
1738  xmlErrMemory(ctxt, NULL);
1739  return(-1);
1740 }
1741 
1751 int
1753 {
1754  if ((ctxt == NULL) || (value == NULL))
1755  return(-1);
1756  if (ctxt->inputNr >= ctxt->inputMax) {
1757  ctxt->inputMax *= 2;
1758  ctxt->inputTab =
1760  ctxt->inputMax *
1761  sizeof(ctxt->inputTab[0]));
1762  if (ctxt->inputTab == NULL) {
1763  xmlErrMemory(ctxt, NULL);
1765  ctxt->inputMax /= 2;
1766  value = NULL;
1767  return (-1);
1768  }
1769  }
1770  ctxt->inputTab[ctxt->inputNr] = value;
1771  ctxt->input = value;
1772  return (ctxt->inputNr++);
1773 }
1784 {
1786 
1787  if (ctxt == NULL)
1788  return(NULL);
1789  if (ctxt->inputNr <= 0)
1790  return (NULL);
1791  ctxt->inputNr--;
1792  if (ctxt->inputNr > 0)
1793  ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1794  else
1795  ctxt->input = NULL;
1796  ret = ctxt->inputTab[ctxt->inputNr];
1797  ctxt->inputTab[ctxt->inputNr] = NULL;
1798  return (ret);
1799 }
1809 int
1811 {
1812  if (ctxt == NULL) return(0);
1813  if (ctxt->nodeNr >= ctxt->nodeMax) {
1814  xmlNodePtr *tmp;
1815 
1816  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1817  ctxt->nodeMax * 2 *
1818  sizeof(ctxt->nodeTab[0]));
1819  if (tmp == NULL) {
1820  xmlErrMemory(ctxt, NULL);
1821  return (-1);
1822  }
1823  ctxt->nodeTab = tmp;
1824  ctxt->nodeMax *= 2;
1825  }
1826  if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1827  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1828  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1829  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1831  xmlHaltParser(ctxt);
1832  return(-1);
1833  }
1834  ctxt->nodeTab[ctxt->nodeNr] = value;
1835  ctxt->node = value;
1836  return (ctxt->nodeNr++);
1837 }
1838 
1847 xmlNodePtr
1849 {
1850  xmlNodePtr ret;
1851 
1852  if (ctxt == NULL) return(NULL);
1853  if (ctxt->nodeNr <= 0)
1854  return (NULL);
1855  ctxt->nodeNr--;
1856  if (ctxt->nodeNr > 0)
1857  ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1858  else
1859  ctxt->node = NULL;
1860  ret = ctxt->nodeTab[ctxt->nodeNr];
1861  ctxt->nodeTab[ctxt->nodeNr] = NULL;
1862  return (ret);
1863 }
1864 
1878 static int
1880  const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1881 {
1882  xmlStartTag *tag;
1883 
1884  if (ctxt->nameNr >= ctxt->nameMax) {
1885  const xmlChar * *tmp;
1886  xmlStartTag *tmp2;
1887  ctxt->nameMax *= 2;
1888  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1889  ctxt->nameMax *
1890  sizeof(ctxt->nameTab[0]));
1891  if (tmp == NULL) {
1892  ctxt->nameMax /= 2;
1893  goto mem_error;
1894  }
1895  ctxt->nameTab = tmp;
1896  tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1897  ctxt->nameMax *
1898  sizeof(ctxt->pushTab[0]));
1899  if (tmp2 == NULL) {
1900  ctxt->nameMax /= 2;
1901  goto mem_error;
1902  }
1903  ctxt->pushTab = tmp2;
1904  } else if (ctxt->pushTab == NULL) {
1905  ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1906  sizeof(ctxt->pushTab[0]));
1907  if (ctxt->pushTab == NULL)
1908  goto mem_error;
1909  }
1910  ctxt->nameTab[ctxt->nameNr] = value;
1911  ctxt->name = value;
1912  tag = &ctxt->pushTab[ctxt->nameNr];
1913  tag->prefix = prefix;
1914  tag->URI = URI;
1915  tag->line = line;
1916  tag->nsNr = nsNr;
1917  return (ctxt->nameNr++);
1918 mem_error:
1919  xmlErrMemory(ctxt, NULL);
1920  return (-1);
1921 }
1922 #ifdef LIBXML_PUSH_ENABLED
1923 
1931 static const xmlChar *
1932 nameNsPop(xmlParserCtxtPtr ctxt)
1933 {
1934  const xmlChar *ret;
1935 
1936  if (ctxt->nameNr <= 0)
1937  return (NULL);
1938  ctxt->nameNr--;
1939  if (ctxt->nameNr > 0)
1940  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1941  else
1942  ctxt->name = NULL;
1943  ret = ctxt->nameTab[ctxt->nameNr];
1944  ctxt->nameTab[ctxt->nameNr] = NULL;
1945  return (ret);
1946 }
1947 #endif /* LIBXML_PUSH_ENABLED */
1948 
1958 int
1960 {
1961  if (ctxt == NULL) return (-1);
1962 
1963  if (ctxt->nameNr >= ctxt->nameMax) {
1964  const xmlChar * *tmp;
1965  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1966  ctxt->nameMax * 2 *
1967  sizeof(ctxt->nameTab[0]));
1968  if (tmp == NULL) {
1969  goto mem_error;
1970  }
1971  ctxt->nameTab = tmp;
1972  ctxt->nameMax *= 2;
1973  }
1974  ctxt->nameTab[ctxt->nameNr] = value;
1975  ctxt->name = value;
1976  return (ctxt->nameNr++);
1977 mem_error:
1978  xmlErrMemory(ctxt, NULL);
1979  return (-1);
1980 }
1989 const xmlChar *
1991 {
1992  const xmlChar *ret;
1993 
1994  if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1995  return (NULL);
1996  ctxt->nameNr--;
1997  if (ctxt->nameNr > 0)
1998  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1999  else
2000  ctxt->name = NULL;
2001  ret = ctxt->nameTab[ctxt->nameNr];
2002  ctxt->nameTab[ctxt->nameNr] = NULL;
2003  return (ret);
2004 }
2005 
2006 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2007  if (ctxt->spaceNr >= ctxt->spaceMax) {
2008  int *tmp;
2009 
2010  ctxt->spaceMax *= 2;
2011  tmp = (int *) xmlRealloc(ctxt->spaceTab,
2012  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2013  if (tmp == NULL) {
2014  xmlErrMemory(ctxt, NULL);
2015  ctxt->spaceMax /=2;
2016  return(-1);
2017  }
2018  ctxt->spaceTab = tmp;
2019  }
2020  ctxt->spaceTab[ctxt->spaceNr] = val;
2021  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2022  return(ctxt->spaceNr++);
2023 }
2024 
2025 static int spacePop(xmlParserCtxtPtr ctxt) {
2026  int ret;
2027  if (ctxt->spaceNr <= 0) return(0);
2028  ctxt->spaceNr--;
2029  if (ctxt->spaceNr > 0)
2030  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2031  else
2032  ctxt->space = &ctxt->spaceTab[0];
2033  ret = ctxt->spaceTab[ctxt->spaceNr];
2034  ctxt->spaceTab[ctxt->spaceNr] = -1;
2035  return(ret);
2036 }
2037 
2038 /*
2039  * Macros for accessing the content. Those should be used only by the parser,
2040  * and not exported.
2041  *
2042  * Dirty macros, i.e. one often need to make assumption on the context to
2043  * use them
2044  *
2045  * CUR_PTR return the current pointer to the xmlChar to be parsed.
2046  * To be used with extreme caution since operations consuming
2047  * characters may move the input buffer to a different location !
2048  * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2049  * This should be used internally by the parser
2050  * only to compare to ASCII values otherwise it would break when
2051  * running with UTF-8 encoding.
2052  * RAW same as CUR but in the input buffer, bypass any token
2053  * extraction that may have been done
2054  * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2055  * to compare on ASCII based substring.
2056  * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2057  * strings without newlines within the parser.
2058  * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2059  * defined char within the parser.
2060  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2061  *
2062  * NEXT Skip to the next character, this does the proper decoding
2063  * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2064  * NEXTL(l) Skip the current unicode character of l xmlChars long.
2065  * CUR_CHAR(l) returns the current unicode character (int), set l
2066  * to the number of xmlChars used for the encoding [0-5].
2067  * CUR_SCHAR same but operate on a string instead of the context
2068  * COPY_BUF copy the current unicode char to the target buffer, increment
2069  * the index
2070  * GROW, SHRINK handling of input buffers
2071  */
2072 
2073 #define RAW (*ctxt->input->cur)
2074 #define CUR (*ctxt->input->cur)
2075 #define NXT(val) ctxt->input->cur[(val)]
2076 #define CUR_PTR ctxt->input->cur
2077 #define BASE_PTR ctxt->input->base
2078 
2079 #define CMP4( s, c1, c2, c3, c4 ) \
2080  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2081  ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2082 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2083  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2084 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2085  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2086 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2087  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2088 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2089  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2090 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2091  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2092  ((unsigned char *) s)[ 8 ] == c9 )
2093 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2094  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2095  ((unsigned char *) s)[ 9 ] == c10 )
2096 
2097 #define SKIP(val) do { \
2098  ctxt->input->cur += (val),ctxt->input->col+=(val); \
2099  if (*ctxt->input->cur == 0) \
2100  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2101  } while (0)
2102 
2103 #define SKIPL(val) do { \
2104  int skipl; \
2105  for(skipl=0; skipl<val; skipl++) { \
2106  if (*(ctxt->input->cur) == '\n') { \
2107  ctxt->input->line++; ctxt->input->col = 1; \
2108  } else ctxt->input->col++; \
2109  ctxt->input->cur++; \
2110  } \
2111  if (*ctxt->input->cur == 0) \
2112  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2113  } while (0)
2114 
2115 #define SHRINK if ((ctxt->progressive == 0) && \
2116  (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2117  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2118  xmlSHRINK (ctxt);
2119 
2120 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2121  xmlParserInputShrink(ctxt->input);
2122  if (*ctxt->input->cur == 0)
2124 }
2125 
2126 #define GROW if ((ctxt->progressive == 0) && \
2127  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2128  xmlGROW (ctxt);
2129 
2130 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2131  ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2132  ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2133 
2134  if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2135  (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2136  ((ctxt->input->buf) &&
2138  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2139  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2140  xmlHaltParser(ctxt);
2141  return;
2142  }
2144  if ((ctxt->input->cur > ctxt->input->end) ||
2145  (ctxt->input->cur < ctxt->input->base)) {
2146  xmlHaltParser(ctxt);
2147  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2148  return;
2149  }
2150  if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2152 }
2153 
2154 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2155 
2156 #define NEXT xmlNextChar(ctxt)
2157 
2158 #define NEXT1 { \
2159  ctxt->input->col++; \
2160  ctxt->input->cur++; \
2161  if (*ctxt->input->cur == 0) \
2162  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2163  }
2164 
2165 #define NEXTL(l) do { \
2166  if (*(ctxt->input->cur) == '\n') { \
2167  ctxt->input->line++; ctxt->input->col = 1; \
2168  } else ctxt->input->col++; \
2169  ctxt->input->cur += l; \
2170  } while (0)
2171 
2172 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2173 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2174 
2175 #define COPY_BUF(l,b,i,v) \
2176  if (l == 1) b[i++] = (xmlChar) v; \
2177  else i += xmlCopyCharMultiByte(&b[i],v)
2178 
2189 int
2191  int res = 0;
2192 
2193  /*
2194  * It's Okay to use CUR/NEXT here since all the blanks are on
2195  * the ASCII range.
2196  */
2197  if (ctxt->instate != XML_PARSER_DTD) {
2198  const xmlChar *cur;
2199  /*
2200  * if we are in the document content, go really fast
2201  */
2202  cur = ctxt->input->cur;
2203  while (IS_BLANK_CH(*cur)) {
2204  if (*cur == '\n') {
2205  ctxt->input->line++; ctxt->input->col = 1;
2206  } else {
2207  ctxt->input->col++;
2208  }
2209  cur++;
2210  res++;
2211  if (*cur == 0) {
2212  ctxt->input->cur = cur;
2214  cur = ctxt->input->cur;
2215  }
2216  }
2217  ctxt->input->cur = cur;
2218  } else {
2219  int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2220 
2221  while (1) {
2222  if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2223  NEXT;
2224  } else if (CUR == '%') {
2225  /*
2226  * Need to handle support of entities branching here
2227  */
2228  if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2229  break;
2230  xmlParsePEReference(ctxt);
2231  } else if (CUR == 0) {
2232  if (ctxt->inputNr <= 1)
2233  break;
2234  xmlPopInput(ctxt);
2235  } else {
2236  break;
2237  }
2238 
2239  /*
2240  * Also increase the counter when entering or exiting a PERef.
2241  * The spec says: "When a parameter-entity reference is recognized
2242  * in the DTD and included, its replacement text MUST be enlarged
2243  * by the attachment of one leading and one following space (#x20)
2244  * character."
2245  */
2246  res++;
2247  }
2248  }
2249  return(res);
2250 }
2251 
2252 /************************************************************************
2253  * *
2254  * Commodity functions to handle entities *
2255  * *
2256  ************************************************************************/
2257 
2267 xmlChar
2269  if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2272  "Popping input %d\n", ctxt->inputNr);
2273  if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2274  (ctxt->instate != XML_PARSER_EOF))
2276  "Unfinished entity outside the DTD");
2278  if (*ctxt->input->cur == 0)
2280  return(CUR);
2281 }
2282 
2292 int
2294  int ret;
2295  if (input == NULL) return(-1);
2296 
2297  if (xmlParserDebugEntities) {
2298  if ((ctxt->input != NULL) && (ctxt->input->filename))
2300  "%s(%d): ", ctxt->input->filename,
2301  ctxt->input->line);
2303  "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2304  }
2305  if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2306  (ctxt->inputNr > 1024)) {
2308  while (ctxt->inputNr > 1)
2310  return(-1);
2311  }
2312  ret = inputPush(ctxt, input);
2313  if (ctxt->instate == XML_PARSER_EOF)
2314  return(-1);
2315  GROW;
2316  return(ret);
2317 }
2318 
2334 int
2336  int val = 0;
2337  int count = 0;
2338 
2339  /*
2340  * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2341  */
2342  if ((RAW == '&') && (NXT(1) == '#') &&
2343  (NXT(2) == 'x')) {
2344  SKIP(3);
2345  GROW;
2346  while (RAW != ';') { /* loop blocked by count */
2347  if (count++ > 20) {
2348  count = 0;
2349  GROW;
2350  if (ctxt->instate == XML_PARSER_EOF)
2351  return(0);
2352  }
2353  if ((RAW >= '0') && (RAW <= '9'))
2354  val = val * 16 + (CUR - '0');
2355  else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2356  val = val * 16 + (CUR - 'a') + 10;
2357  else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2358  val = val * 16 + (CUR - 'A') + 10;
2359  else {
2361  val = 0;
2362  break;
2363  }
2364  if (val > 0x110000)
2365  val = 0x110000;
2366 
2367  NEXT;
2368  count++;
2369  }
2370  if (RAW == ';') {
2371  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2372  ctxt->input->col++;
2373  ctxt->input->cur++;
2374  }
2375  } else if ((RAW == '&') && (NXT(1) == '#')) {
2376  SKIP(2);
2377  GROW;
2378  while (RAW != ';') { /* loop blocked by count */
2379  if (count++ > 20) {
2380  count = 0;
2381  GROW;
2382  if (ctxt->instate == XML_PARSER_EOF)
2383  return(0);
2384  }
2385  if ((RAW >= '0') && (RAW <= '9'))
2386  val = val * 10 + (CUR - '0');
2387  else {
2389  val = 0;
2390  break;
2391  }
2392  if (val > 0x110000)
2393  val = 0x110000;
2394 
2395  NEXT;
2396  count++;
2397  }
2398  if (RAW == ';') {
2399  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2400  ctxt->input->col++;
2401  ctxt->input->cur++;
2402  }
2403  } else {
2405  }
2406 
2407  /*
2408  * [ WFC: Legal Character ]
2409  * Characters referred to using character references must match the
2410  * production for Char.
2411  */
2412  if (val >= 0x110000) {
2413  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2414  "xmlParseCharRef: character reference out of bounds\n",
2415  val);
2416  } else if (IS_CHAR(val)) {
2417  return(val);
2418  } else {
2419  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2420  "xmlParseCharRef: invalid xmlChar value %d\n",
2421  val);
2422  }
2423  return(0);
2424 }
2425 
2444 static int
2446  const xmlChar *ptr;
2447  xmlChar cur;
2448  int val = 0;
2449 
2450  if ((str == NULL) || (*str == NULL)) return(0);
2451  ptr = *str;
2452  cur = *ptr;
2453  if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2454  ptr += 3;
2455  cur = *ptr;
2456  while (cur != ';') { /* Non input consuming loop */
2457  if ((cur >= '0') && (cur <= '9'))
2458  val = val * 16 + (cur - '0');
2459  else if ((cur >= 'a') && (cur <= 'f'))
2460  val = val * 16 + (cur - 'a') + 10;
2461  else if ((cur >= 'A') && (cur <= 'F'))
2462  val = val * 16 + (cur - 'A') + 10;
2463  else {
2465  val = 0;
2466  break;
2467  }
2468  if (val > 0x110000)
2469  val = 0x110000;
2470 
2471  ptr++;
2472  cur = *ptr;
2473  }
2474  if (cur == ';')
2475  ptr++;
2476  } else if ((cur == '&') && (ptr[1] == '#')){
2477  ptr += 2;
2478  cur = *ptr;
2479  while (cur != ';') { /* Non input consuming loops */
2480  if ((cur >= '0') && (cur <= '9'))
2481  val = val * 10 + (cur - '0');
2482  else {
2484  val = 0;
2485  break;
2486  }
2487  if (val > 0x110000)
2488  val = 0x110000;
2489 
2490  ptr++;
2491  cur = *ptr;
2492  }
2493  if (cur == ';')
2494  ptr++;
2495  } else {
2497  return(0);
2498  }
2499  *str = ptr;
2500 
2501  /*
2502  * [ WFC: Legal Character ]
2503  * Characters referred to using character references must match the
2504  * production for Char.
2505  */
2506  if (val >= 0x110000) {
2507  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2508  "xmlParseStringCharRef: character reference out of bounds\n",
2509  val);
2510  } else if (IS_CHAR(val)) {
2511  return(val);
2512  } else {
2513  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2514  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2515  val);
2516  }
2517  return(0);
2518 }
2519 
2552 void
2554  switch(ctxt->instate) {
2556  return;
2557  case XML_PARSER_COMMENT:
2558  return;
2559  case XML_PARSER_START_TAG:
2560  return;
2561  case XML_PARSER_END_TAG:
2562  return;
2563  case XML_PARSER_EOF:
2565  return;
2566  case XML_PARSER_PROLOG:
2567  case XML_PARSER_START:
2568  case XML_PARSER_MISC:
2570  return;
2572  case XML_PARSER_CONTENT:
2574  case XML_PARSER_PI:
2577  /* we just ignore it there */
2578  return;
2579  case XML_PARSER_EPILOG:
2581  return;
2583  /*
2584  * NOTE: in the case of entity values, we don't do the
2585  * substitution here since we need the literal
2586  * entity value to be able to save the internal
2587  * subset of the document.
2588  * This will be handled by xmlStringDecodeEntities
2589  */
2590  return;
2591  case XML_PARSER_DTD:
2592  /*
2593  * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2594  * In the internal DTD subset, parameter-entity references
2595  * can occur only where markup declarations can occur, not
2596  * within markup declarations.
2597  * In that case this is handled in xmlParseMarkupDecl
2598  */
2599  if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2600  return;
2601  if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2602  return;
2603  break;
2604  case XML_PARSER_IGNORE:
2605  return;
2606  }
2607 
2608  xmlParsePEReference(ctxt);
2609 }
2610 
2611 /*
2612  * Macro used to grow the current buffer.
2613  * buffer##_size is expected to be a size_t
2614  * mem_error: is expected to handle memory allocation failures
2615  */
2616 #define growBuffer(buffer, n) { \
2617  xmlChar *tmp; \
2618  size_t new_size = buffer##_size * 2 + n; \
2619  if (new_size < buffer##_size) goto mem_error; \
2620  tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2621  if (tmp == NULL) goto mem_error; \
2622  buffer = tmp; \
2623  buffer##_size = new_size; \
2624 }
2625 
2645 xmlChar *
2647  int what, xmlChar end, xmlChar end2, xmlChar end3) {
2648  xmlChar *buffer = NULL;
2649  size_t buffer_size = 0;
2650  size_t nbchars = 0;
2651 
2652  xmlChar *current = NULL;
2653  xmlChar *rep = NULL;
2654  const xmlChar *last;
2655  xmlEntityPtr ent;
2656  int c,l;
2657 
2658  if ((ctxt == NULL) || (str == NULL) || (len < 0))
2659  return(NULL);
2660  last = str + len;
2661 
2662  if (((ctxt->depth > 40) &&
2663  ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2664  (ctxt->depth > 1024)) {
2666  return(NULL);
2667  }
2668 
2669  /*
2670  * allocate a translation buffer.
2671  */
2674  if (buffer == NULL) goto mem_error;
2675 
2676  /*
2677  * OK loop until we reach one of the ending char or a size limit.
2678  * we are operating on already parsed values.
2679  */
2680  if (str < last)
2681  c = CUR_SCHAR(str, l);
2682  else
2683  c = 0;
2684  while ((c != 0) && (c != end) && /* non input consuming loop */
2685  (c != end2) && (c != end3) &&
2686  (ctxt->instate != XML_PARSER_EOF)) {
2687 
2688  if (c == 0) break;
2689  if ((c == '&') && (str[1] == '#')) {
2690  int val = xmlParseStringCharRef(ctxt, &str);
2691  if (val == 0)
2692  goto int_error;
2693  COPY_BUF(0,buffer,nbchars,val);
2694  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2696  }
2697  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2700  "String decoding Entity Reference: %.30s\n",
2701  str);
2702  ent = xmlParseStringEntityRef(ctxt, &str);
2703  xmlParserEntityCheck(ctxt, 0, ent, 0);
2704  if (ent != NULL)
2705  ctxt->nbentities += ent->checked / 2;
2706  if ((ent != NULL) &&
2708  if (ent->content != NULL) {
2709  COPY_BUF(0,buffer,nbchars,ent->content[0]);
2710  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712  }
2713  } else {
2714  xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2715  "predefined entity has no content\n");
2716  goto int_error;
2717  }
2718  } else if ((ent != NULL) && (ent->content != NULL)) {
2719  ctxt->depth++;
2720  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2721  0, 0, 0);
2722  ctxt->depth--;
2723  if (rep == NULL) {
2724  ent->content[0] = 0;
2725  goto int_error;
2726  }
2727 
2728  current = rep;
2729  while (*current != 0) { /* non input consuming loop */
2730  buffer[nbchars++] = *current++;
2731  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2732  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2733  goto int_error;
2735  }
2736  }
2737  xmlFree(rep);
2738  rep = NULL;
2739  } else if (ent != NULL) {
2740  int i = xmlStrlen(ent->name);
2741  const xmlChar *cur = ent->name;
2742 
2743  buffer[nbchars++] = '&';
2744  if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2746  }
2747  for (;i > 0;i--)
2748  buffer[nbchars++] = *cur++;
2749  buffer[nbchars++] = ';';
2750  }
2751  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2754  "String decoding PE Reference: %.30s\n", str);
2755  ent = xmlParseStringPEReference(ctxt, &str);
2756  xmlParserEntityCheck(ctxt, 0, ent, 0);
2757  if (ent != NULL)
2758  ctxt->nbentities += ent->checked / 2;
2759  if (ent != NULL) {
2760  if (ent->content == NULL) {
2761  /*
2762  * Note: external parsed entities will not be loaded,
2763  * it is not required for a non-validating parser to
2764  * complete external PEReferences coming from the
2765  * internal subset
2766  */
2767  if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2768  ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2769  (ctxt->validate != 0)) {
2770  xmlLoadEntityContent(ctxt, ent);
2771  } else {
2772  xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2773  "not validating will not read content for PE entity %s\n",
2774  ent->name, NULL);
2775  }
2776  }
2777  ctxt->depth++;
2778  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2779  0, 0, 0);
2780  ctxt->depth--;
2781  if (rep == NULL) {
2782  if (ent->content != NULL)
2783  ent->content[0] = 0;
2784  goto int_error;
2785  }
2786  current = rep;
2787  while (*current != 0) { /* non input consuming loop */
2788  buffer[nbchars++] = *current++;
2789  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2791  goto int_error;
2793  }
2794  }
2795  xmlFree(rep);
2796  rep = NULL;
2797  }
2798  } else {
2799  COPY_BUF(l,buffer,nbchars,c);
2800  str += l;
2801  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2803  }
2804  }
2805  if (str < last)
2806  c = CUR_SCHAR(str, l);
2807  else
2808  c = 0;
2809  }
2810  buffer[nbchars] = 0;
2811  return(buffer);
2812 
2813 mem_error:
2814  xmlErrMemory(ctxt, NULL);
2815 int_error:
2816  if (rep != NULL)
2817  xmlFree(rep);
2818  if (buffer != NULL)
2819  xmlFree(buffer);
2820  return(NULL);
2821 }
2822 
2841 xmlChar *
2843  xmlChar end, xmlChar end2, xmlChar end3) {
2844  if ((ctxt == NULL) || (str == NULL)) return(NULL);
2845  return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2846  end, end2, end3));
2847 }
2848 
2849 /************************************************************************
2850  * *
2851  * Commodity functions, cleanup needed ? *
2852  * *
2853  ************************************************************************/
2854 
2867 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2868  int blank_chars) {
2869  int i, ret;
2870  xmlNodePtr lastChild;
2871 
2872  /*
2873  * Don't spend time trying to differentiate them, the same callback is
2874  * used !
2875  */
2876  if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2877  return(0);
2878 
2879  /*
2880  * Check for xml:space value.
2881  */
2882  if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2883  (*(ctxt->space) == -2))
2884  return(0);
2885 
2886  /*
2887  * Check that the string is made of blanks
2888  */
2889  if (blank_chars == 0) {
2890  for (i = 0;i < len;i++)
2891  if (!(IS_BLANK_CH(str[i]))) return(0);
2892  }
2893 
2894  /*
2895  * Look if the element is mixed content in the DTD if available
2896  */
2897  if (ctxt->node == NULL) return(0);
2898  if (ctxt->myDoc != NULL) {
2899  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2900  if (ret == 0) return(1);
2901  if (ret == 1) return(0);
2902  }
2903 
2904  /*
2905  * Otherwise, heuristic :-\
2906  */
2907  if ((RAW != '<') && (RAW != 0xD)) return(0);
2908  if ((ctxt->node->children == NULL) &&
2909  (RAW == '<') && (NXT(1) == '/')) return(0);
2910 
2911  lastChild = xmlGetLastChild(ctxt->node);
2912  if (lastChild == NULL) {
2913  if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2914  (ctxt->node->content != NULL)) return(0);
2915  } else if (xmlNodeIsText(lastChild))
2916  return(0);
2917  else if ((ctxt->node->children != NULL) &&
2918  (xmlNodeIsText(ctxt->node->children)))
2919  return(0);
2920  return(1);
2921 }
2922 
2923 /************************************************************************
2924  * *
2925  * Extra stuff for namespace support *
2926  * Relates to http://www.w3.org/TR/WD-xml-names *
2927  * *
2928  ************************************************************************/
2929 
2948 xmlChar *
2951  xmlChar *buffer = NULL;
2952  int len = 0;
2953  int max = XML_MAX_NAMELEN;
2954  xmlChar *ret = NULL;
2955  const xmlChar *cur = name;
2956  int c;
2957 
2958  if (prefix == NULL) return(NULL);
2959  *prefix = NULL;
2960 
2961  if (cur == NULL) return(NULL);
2962 
2963 #ifndef XML_XML_NAMESPACE
2964  /* xml: prefix is not really a namespace */
2965  if ((cur[0] == 'x') && (cur[1] == 'm') &&
2966  (cur[2] == 'l') && (cur[3] == ':'))
2967  return(xmlStrdup(name));
2968 #endif
2969 
2970  /* nasty but well=formed */
2971  if (cur[0] == ':')
2972  return(xmlStrdup(name));
2973 
2974  c = *cur++;
2975  while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2976  buf[len++] = c;
2977  c = *cur++;
2978  }
2979  if (len >= max) {
2980  /*
2981  * Okay someone managed to make a huge name, so he's ready to pay
2982  * for the processing speed.
2983  */
2984  max = len * 2;
2985 
2986  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2987  if (buffer == NULL) {
2988  xmlErrMemory(ctxt, NULL);
2989  return(NULL);
2990  }
2991  memcpy(buffer, buf, len);
2992  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2993  if (len + 10 > max) {
2994  xmlChar *tmp;
2995 
2996  max *= 2;
2997  tmp = (xmlChar *) xmlRealloc(buffer,
2998  max * sizeof(xmlChar));
2999  if (tmp == NULL) {
3000  xmlFree(buffer);
3001  xmlErrMemory(ctxt, NULL);
3002  return(NULL);
3003  }
3004  buffer = tmp;
3005  }
3006  buffer[len++] = c;
3007  c = *cur++;
3008  }
3009  buffer[len] = 0;
3010  }
3011 
3012  if ((c == ':') && (*cur == 0)) {
3013  if (buffer != NULL)
3014  xmlFree(buffer);
3015  *prefix = NULL;
3016  return(xmlStrdup(name));
3017  }
3018 
3019  if (buffer == NULL)
3020  ret = xmlStrndup(buf, len);
3021  else {
3022  ret = buffer;
3023  buffer = NULL;
3024  max = XML_MAX_NAMELEN;
3025  }
3026 
3027 
3028  if (c == ':') {
3029  c = *cur;
3030  *prefix = ret;
3031  if (c == 0) {
3032  return(xmlStrndup(BAD_CAST "", 0));
3033  }
3034  len = 0;
3035 
3036  /*
3037  * Check that the first character is proper to start
3038  * a new name
3039  */
3040  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3041  ((c >= 0x41) && (c <= 0x5A)) ||
3042  (c == '_') || (c == ':'))) {
3043  int l;
3044  int first = CUR_SCHAR(cur, l);
3045 
3046  if (!IS_LETTER(first) && (first != '_')) {
3047  xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3048  "Name %s is not XML Namespace compliant\n",
3049  name);
3050  }
3051  }
3052  cur++;
3053 
3054  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3055  buf[len++] = c;
3056  c = *cur++;
3057  }
3058  if (len >= max) {
3059  /*
3060  * Okay someone managed to make a huge name, so he's ready to pay
3061  * for the processing speed.
3062  */
3063  max = len * 2;
3064 
3065  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3066  if (buffer == NULL) {
3067  xmlErrMemory(ctxt, NULL);
3068  return(NULL);
3069  }
3070  memcpy(buffer, buf, len);
3071  while (c != 0) { /* tested bigname2.xml */
3072  if (len + 10 > max) {
3073  xmlChar *tmp;
3074 
3075  max *= 2;
3076  tmp = (xmlChar *) xmlRealloc(buffer,
3077  max * sizeof(xmlChar));
3078  if (tmp == NULL) {
3079  xmlErrMemory(ctxt, NULL);
3080  xmlFree(buffer);
3081  return(NULL);
3082  }
3083  buffer = tmp;
3084  }
3085  buffer[len++] = c;
3086  c = *cur++;
3087  }
3088  buffer[len] = 0;
3089  }
3090 
3091  if (buffer == NULL)
3092  ret = xmlStrndup(buf, len);
3093  else {
3094  ret = buffer;
3095  }
3096  }
3097 
3098  return(ret);
3099 }
3100 
3101 /************************************************************************
3102  * *
3103  * The parser itself *
3104  * Relates to http://www.w3.org/TR/REC-xml *
3105  * *
3106  ************************************************************************/
3107 
3108 /************************************************************************
3109  * *
3110  * Routines to parse Name, NCName and NmToken *
3111  * *
3112  ************************************************************************/
3113 #ifdef DEBUG
3114 static unsigned long nbParseName = 0;
3115 static unsigned long nbParseNmToken = 0;
3116 static unsigned long nbParseNCName = 0;
3117 static unsigned long nbParseNCNameComplex = 0;
3118 static unsigned long nbParseNameComplex = 0;
3119 static unsigned long nbParseStringName = 0;
3120 #endif
3121 
3122 /*
3123  * The two following functions are related to the change of accepted
3124  * characters for Name and NmToken in the Revision 5 of XML-1.0
3125  * They correspond to the modified production [4] and the new production [4a]
3126  * changes in that revision. Also note that the macros used for the
3127  * productions Letter, Digit, CombiningChar and Extender are not needed
3128  * anymore.
3129  * We still keep compatibility to pre-revision5 parsing semantic if the
3130  * new XML_PARSE_OLD10 option is given to the parser.
3131  */
3132 static int
3134  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3135  /*
3136  * Use the new checks of production [4] [4a] amd [5] of the
3137  * Update 5 of XML-1.0
3138  */
3139  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3140  (((c >= 'a') && (c <= 'z')) ||
3141  ((c >= 'A') && (c <= 'Z')) ||
3142  (c == '_') || (c == ':') ||
3143  ((c >= 0xC0) && (c <= 0xD6)) ||
3144  ((c >= 0xD8) && (c <= 0xF6)) ||
3145  ((c >= 0xF8) && (c <= 0x2FF)) ||
3146  ((c >= 0x370) && (c <= 0x37D)) ||
3147  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3148  ((c >= 0x200C) && (c <= 0x200D)) ||
3149  ((c >= 0x2070) && (c <= 0x218F)) ||
3150  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154  ((c >= 0x10000) && (c <= 0xEFFFF))))
3155  return(1);
3156  } else {
3157  if (IS_LETTER(c) || (c == '_') || (c == ':'))
3158  return(1);
3159  }
3160  return(0);
3161 }
3162 
3163 static int
3165  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166  /*
3167  * Use the new checks of production [4] [4a] amd [5] of the
3168  * Update 5 of XML-1.0
3169  */
3170  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171  (((c >= 'a') && (c <= 'z')) ||
3172  ((c >= 'A') && (c <= 'Z')) ||
3173  ((c >= '0') && (c <= '9')) || /* !start */
3174  (c == '_') || (c == ':') ||
3175  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3176  ((c >= 0xC0) && (c <= 0xD6)) ||
3177  ((c >= 0xD8) && (c <= 0xF6)) ||
3178  ((c >= 0xF8) && (c <= 0x2FF)) ||
3179  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3180  ((c >= 0x370) && (c <= 0x37D)) ||
3181  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3182  ((c >= 0x200C) && (c <= 0x200D)) ||
3183  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3184  ((c >= 0x2070) && (c <= 0x218F)) ||
3185  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3186  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3187  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3188  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3189  ((c >= 0x10000) && (c <= 0xEFFFF))))
3190  return(1);
3191  } else {
3192  if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3193  (c == '.') || (c == '-') ||
3194  (c == '_') || (c == ':') ||
3195  (IS_COMBINING(c)) ||
3196  (IS_EXTENDER(c)))
3197  return(1);
3198  }
3199  return(0);
3200 }
3201 
3203  int *len, int *alloc, int normalize);
3204 
3205 static const xmlChar *
3207  int len = 0, l;
3208  int c;
3209  int count = 0;
3210 
3211 #ifdef DEBUG
3212  nbParseNameComplex++;
3213 #endif
3214 
3215  /*
3216  * Handler for more complex cases
3217  */
3218  GROW;
3219  if (ctxt->instate == XML_PARSER_EOF)
3220  return(NULL);
3221  c = CUR_CHAR(l);
3222  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223  /*
3224  * Use the new checks of production [4] [4a] amd [5] of the
3225  * Update 5 of XML-1.0
3226  */
3227  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228  (!(((c >= 'a') && (c <= 'z')) ||
3229  ((c >= 'A') && (c <= 'Z')) ||
3230  (c == '_') || (c == ':') ||
3231  ((c >= 0xC0) && (c <= 0xD6)) ||
3232  ((c >= 0xD8) && (c <= 0xF6)) ||
3233  ((c >= 0xF8) && (c <= 0x2FF)) ||
3234  ((c >= 0x370) && (c <= 0x37D)) ||
3235  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236  ((c >= 0x200C) && (c <= 0x200D)) ||
3237  ((c >= 0x2070) && (c <= 0x218F)) ||
3238  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242  ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243  return(NULL);
3244  }
3245  len += l;
3246  NEXTL(l);
3247  c = CUR_CHAR(l);
3248  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249  (((c >= 'a') && (c <= 'z')) ||
3250  ((c >= 'A') && (c <= 'Z')) ||
3251  ((c >= '0') && (c <= '9')) || /* !start */
3252  (c == '_') || (c == ':') ||
3253  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254  ((c >= 0xC0) && (c <= 0xD6)) ||
3255  ((c >= 0xD8) && (c <= 0xF6)) ||
3256  ((c >= 0xF8) && (c <= 0x2FF)) ||
3257  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258  ((c >= 0x370) && (c <= 0x37D)) ||
3259  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260  ((c >= 0x200C) && (c <= 0x200D)) ||
3261  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262  ((c >= 0x2070) && (c <= 0x218F)) ||
3263  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267  ((c >= 0x10000) && (c <= 0xEFFFF))
3268  )) {
3269  if (count++ > XML_PARSER_CHUNK_SIZE) {
3270  count = 0;
3271  GROW;
3272  if (ctxt->instate == XML_PARSER_EOF)
3273  return(NULL);
3274  }
3275  len += l;
3276  NEXTL(l);
3277  c = CUR_CHAR(l);
3278  }
3279  } else {
3280  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3281  (!IS_LETTER(c) && (c != '_') &&
3282  (c != ':'))) {
3283  return(NULL);
3284  }
3285  len += l;
3286  NEXTL(l);
3287  c = CUR_CHAR(l);
3288 
3289  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3290  ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3291  (c == '.') || (c == '-') ||
3292  (c == '_') || (c == ':') ||
3293  (IS_COMBINING(c)) ||
3294  (IS_EXTENDER(c)))) {
3295  if (count++ > XML_PARSER_CHUNK_SIZE) {
3296  count = 0;
3297  GROW;
3298  if (ctxt->instate == XML_PARSER_EOF)
3299  return(NULL);
3300  }
3301  len += l;
3302  NEXTL(l);
3303  c = CUR_CHAR(l);
3304  }
3305  }
3306  if ((len > XML_MAX_NAME_LENGTH) &&
3307  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3308  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309  return(NULL);
3310  }
3311  if (ctxt->input->cur - ctxt->input->base < len) {
3312  /*
3313  * There were a couple of bugs where PERefs lead to to a change
3314  * of the buffer. Check the buffer size to avoid passing an invalid
3315  * pointer to xmlDictLookup.
3316  */
3318  "unexpected change of input buffer");
3319  return (NULL);
3320  }
3321  if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324 }
3325 
3342 const xmlChar *
3344  const xmlChar *in;
3345  const xmlChar *ret;
3346  int count = 0;
3347 
3348  GROW;
3349 
3350 #ifdef DEBUG
3351  nbParseName++;
3352 #endif
3353 
3354  /*
3355  * Accelerator for simple ASCII names
3356  */
3357  in = ctxt->input->cur;
3358  if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359  ((*in >= 0x41) && (*in <= 0x5A)) ||
3360  (*in == '_') || (*in == ':')) {
3361  in++;
3362  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363  ((*in >= 0x41) && (*in <= 0x5A)) ||
3364  ((*in >= 0x30) && (*in <= 0x39)) ||
3365  (*in == '_') || (*in == '-') ||
3366  (*in == ':') || (*in == '.'))
3367  in++;
3368  if ((*in > 0) && (*in < 0x80)) {
3369  count = in - ctxt->input->cur;
3370  if ((count > XML_MAX_NAME_LENGTH) &&
3371  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3372  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3373  return(NULL);
3374  }
3375  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3376  ctxt->input->cur = in;
3377  ctxt->input->col += count;
3378  if (ret == NULL)
3379  xmlErrMemory(ctxt, NULL);
3380  return(ret);
3381  }
3382  }
3383  /* accelerator for special cases */
3384  return(xmlParseNameComplex(ctxt));
3385 }
3386 
3387 static const xmlChar *
3389  int len = 0, l;
3390  int c;
3391  int count = 0;
3392  size_t startPosition = 0;
3393 
3394 #ifdef DEBUG
3395  nbParseNCNameComplex++;
3396 #endif
3397 
3398  /*
3399  * Handler for more complex cases
3400  */
3401  GROW;
3402  startPosition = CUR_PTR - BASE_PTR;
3403  c = CUR_CHAR(l);
3404  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406  return(NULL);
3407  }
3408 
3409  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410  (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3411  if (count++ > XML_PARSER_CHUNK_SIZE) {
3412  if ((len > XML_MAX_NAME_LENGTH) &&
3413  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3414  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3415  return(NULL);
3416  }
3417  count = 0;
3418  GROW;
3419  if (ctxt->instate == XML_PARSER_EOF)
3420  return(NULL);
3421  }
3422  len += l;
3423  NEXTL(l);
3424  c = CUR_CHAR(l);
3425  if (c == 0) {
3426  count = 0;
3427  /*
3428  * when shrinking to extend the buffer we really need to preserve
3429  * the part of the name we already parsed. Hence rolling back
3430  * by current length.
3431  */
3432  ctxt->input->cur -= l;
3433  GROW;
3434  if (ctxt->instate == XML_PARSER_EOF)
3435  return(NULL);
3436  ctxt->input->cur += l;
3437  c = CUR_CHAR(l);
3438  }
3439  }
3440  if ((len > XML_MAX_NAME_LENGTH) &&
3441  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3442  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3443  return(NULL);
3444  }
3445  return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3446 }
3447 
3463 static const xmlChar *
3465  const xmlChar *in, *e;
3466  const xmlChar *ret;
3467  int count = 0;
3468 
3469 #ifdef DEBUG
3470  nbParseNCName++;
3471 #endif
3472 
3473  /*
3474  * Accelerator for simple ASCII names
3475  */
3476  in = ctxt->input->cur;
3477  e = ctxt->input->end;
3478  if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3479  ((*in >= 0x41) && (*in <= 0x5A)) ||
3480  (*in == '_')) && (in < e)) {
3481  in++;
3482  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483  ((*in >= 0x41) && (*in <= 0x5A)) ||
3484  ((*in >= 0x30) && (*in <= 0x39)) ||
3485  (*in == '_') || (*in == '-') ||
3486  (*in == '.')) && (in < e))
3487  in++;
3488  if (in >= e)
3489  goto complex;
3490  if ((*in > 0) && (*in < 0x80)) {
3491  count = in - ctxt->input->cur;
3492  if ((count > XML_MAX_NAME_LENGTH) &&
3493  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3494  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3495  return(NULL);
3496  }
3497  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3498  ctxt->input->cur = in;
3499  ctxt->input->col += count;
3500  if (ret == NULL) {
3501  xmlErrMemory(ctxt, NULL);
3502  }
3503  return(ret);
3504  }
3505  }
3506 complex:
3507  return(xmlParseNCNameComplex(ctxt));
3508 }
3509 
3521 static const xmlChar *
3523  register const xmlChar *cmp = other;
3524  register const xmlChar *in;
3525  const xmlChar *ret;
3526 
3527  GROW;
3528  if (ctxt->instate == XML_PARSER_EOF)
3529  return(NULL);
3530 
3531  in = ctxt->input->cur;
3532  while (*in != 0 && *in == *cmp) {
3533  ++in;
3534  ++cmp;
3535  }
3536  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3537  /* success */
3538  ctxt->input->col += in - ctxt->input->cur;
3539  ctxt->input->cur = in;
3540  return (const xmlChar*) 1;
3541  }
3542  /* failure (or end of input buffer), check with full function */
3543  ret = xmlParseName (ctxt);
3544  /* strings coming from the dictionary direct compare possible */
3545  if (ret == other) {
3546  return (const xmlChar*) 1;
3547  }
3548  return ret;
3549 }
3550 
3569 static xmlChar *
3572  const xmlChar *cur = *str;
3573  int len = 0, l;
3574  int c;
3575 
3576 #ifdef DEBUG
3577  nbParseStringName++;
3578 #endif
3579 
3580  c = CUR_SCHAR(cur, l);
3581  if (!xmlIsNameStartChar(ctxt, c)) {
3582  return(NULL);
3583  }
3584 
3585  COPY_BUF(l,buf,len,c);
3586  cur += l;
3587  c = CUR_SCHAR(cur, l);
3588  while (xmlIsNameChar(ctxt, c)) {
3589  COPY_BUF(l,buf,len,c);
3590  cur += l;
3591  c = CUR_SCHAR(cur, l);
3592  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3593  /*
3594  * Okay someone managed to make a huge name, so he's ready to pay
3595  * for the processing speed.
3596  */
3597  xmlChar *buffer;
3598  int max = len * 2;
3599 
3600  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3601  if (buffer == NULL) {
3602  xmlErrMemory(ctxt, NULL);
3603  return(NULL);
3604  }
3605  memcpy(buffer, buf, len);
3606  while (xmlIsNameChar(ctxt, c)) {
3607  if (len + 10 > max) {
3608  xmlChar *tmp;
3609 
3610  if ((len > XML_MAX_NAME_LENGTH) &&
3611  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3612  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613  xmlFree(buffer);
3614  return(NULL);
3615  }
3616  max *= 2;
3617  tmp = (xmlChar *) xmlRealloc(buffer,
3618  max * sizeof(xmlChar));
3619  if (tmp == NULL) {
3620  xmlErrMemory(ctxt, NULL);
3621  xmlFree(buffer);
3622  return(NULL);
3623  }
3624  buffer = tmp;
3625  }
3626  COPY_BUF(l,buffer,len,c);
3627  cur += l;
3628  c = CUR_SCHAR(cur, l);
3629  }
3630  buffer[len] = 0;
3631  *str = cur;
3632  return(buffer);
3633  }
3634  }
3635  if ((len > XML_MAX_NAME_LENGTH) &&
3636  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3637  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638  return(NULL);
3639  }
3640  *str = cur;
3641  return(xmlStrndup(buf, len));
3642 }
3643 
3657 xmlChar *
3660  int len = 0, l;
3661  int c;
3662  int count = 0;
3663 
3664 #ifdef DEBUG
3665  nbParseNmToken++;
3666 #endif
3667 
3668  GROW;
3669  if (ctxt->instate == XML_PARSER_EOF)
3670  return(NULL);
3671  c = CUR_CHAR(l);
3672 
3673  while (xmlIsNameChar(ctxt, c)) {
3674  if (count++ > XML_PARSER_CHUNK_SIZE) {
3675  count = 0;
3676  GROW;
3677  }
3678  COPY_BUF(l,buf,len,c);
3679  NEXTL(l);
3680  c = CUR_CHAR(l);
3681  if (c == 0) {
3682  count = 0;
3683  GROW;
3684  if (ctxt->instate == XML_PARSER_EOF)
3685  return(NULL);
3686  c = CUR_CHAR(l);
3687  }
3688  if (len >= XML_MAX_NAMELEN) {
3689  /*
3690  * Okay someone managed to make a huge token, so he's ready to pay
3691  * for the processing speed.
3692  */
3693  xmlChar *buffer;
3694  int max = len * 2;
3695 
3696  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3697  if (buffer == NULL) {
3698  xmlErrMemory(ctxt, NULL);
3699  return(NULL);
3700  }
3701  memcpy(buffer, buf, len);
3702  while (xmlIsNameChar(ctxt, c)) {
3703  if (count++ > XML_PARSER_CHUNK_SIZE) {
3704  count = 0;
3705  GROW;
3706  if (ctxt->instate == XML_PARSER_EOF) {
3707  xmlFree(buffer);
3708  return(NULL);
3709  }
3710  }
3711  if (len + 10 > max) {
3712  xmlChar *tmp;
3713 
3714  if ((max > XML_MAX_NAME_LENGTH) &&
3715  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3717  xmlFree(buffer);
3718  return(NULL);
3719  }
3720  max *= 2;
3721  tmp = (xmlChar *) xmlRealloc(buffer,
3722  max * sizeof(xmlChar));
3723  if (tmp == NULL) {
3724  xmlErrMemory(ctxt, NULL);
3725  xmlFree(buffer);
3726  return(NULL);
3727  }
3728  buffer = tmp;
3729  }
3730  COPY_BUF(l,buffer,len,c);
3731  NEXTL(l);
3732  c = CUR_CHAR(l);
3733  }
3734  buffer[len] = 0;
3735  return(buffer);
3736  }
3737  }
3738  if (len == 0)
3739  return(NULL);
3740  if ((len > XML_MAX_NAME_LENGTH) &&
3741  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3742  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743  return(NULL);
3744  }
3745  return(xmlStrndup(buf, len));
3746 }
3747 
3761 xmlChar *
3763  xmlChar *buf = NULL;
3764  int len = 0;
3766  int c, l;
3767  xmlChar stop;
3768  xmlChar *ret = NULL;
3769  const xmlChar *cur = NULL;
3771 
3772  if (RAW == '"') stop = '"';
3773  else if (RAW == '\'') stop = '\'';
3774  else {
3776  return(NULL);
3777  }
3778  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3779  if (buf == NULL) {
3780  xmlErrMemory(ctxt, NULL);
3781  return(NULL);
3782  }
3783 
3784  /*
3785  * The content of the entity definition is copied in a buffer.
3786  */
3787 
3789  input = ctxt->input;
3790  GROW;
3791  if (ctxt->instate == XML_PARSER_EOF)
3792  goto error;
3793  NEXT;
3794  c = CUR_CHAR(l);
3795  /*
3796  * NOTE: 4.4.5 Included in Literal
3797  * When a parameter entity reference appears in a literal entity
3798  * value, ... a single or double quote character in the replacement
3799  * text is always treated as a normal data character and will not
3800  * terminate the literal.
3801  * In practice it means we stop the loop only when back at parsing
3802  * the initial entity and the quote is found
3803  */
3804  while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3805  (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3806  if (len + 5 >= size) {
3807  xmlChar *tmp;
3808 
3809  size *= 2;
3810  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3811  if (tmp == NULL) {
3812  xmlErrMemory(ctxt, NULL);
3813  goto error;
3814  }
3815  buf = tmp;
3816  }
3817  COPY_BUF(l,buf,len,c);
3818  NEXTL(l);
3819 
3820  GROW;
3821  c = CUR_CHAR(l);
3822  if (c == 0) {
3823  GROW;
3824  c = CUR_CHAR(l);
3825  }
3826  }
3827  buf[len] = 0;
3828  if (ctxt->instate == XML_PARSER_EOF)
3829  goto error;
3830  if (c != stop) {
3832  goto error;
3833  }
3834  NEXT;
3835 
3836  /*
3837  * Raise problem w.r.t. '&' and '%' being used in non-entities
3838  * reference constructs. Note Charref will be handled in
3839  * xmlStringDecodeEntities()
3840  */
3841  cur = buf;
3842  while (*cur != 0) { /* non input consuming */
3843  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3844  xmlChar *name;
3845  xmlChar tmp = *cur;
3846  int nameOk = 0;
3847 
3848  cur++;
3849  name = xmlParseStringName(ctxt, &cur);
3850  if (name != NULL) {
3851  nameOk = 1;
3852  xmlFree(name);
3853  }
3854  if ((nameOk == 0) || (*cur != ';')) {
3855  xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3856  "EntityValue: '%c' forbidden except for entities references\n",
3857  tmp);
3858  goto error;
3859  }
3860  if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861  (ctxt->inputNr == 1)) {
3863  goto error;
3864  }
3865  if (*cur == 0)
3866  break;
3867  }
3868  cur++;
3869  }
3870 
3871  /*
3872  * Then PEReference entities are substituted.
3873  *
3874  * NOTE: 4.4.7 Bypassed
3875  * When a general entity reference appears in the EntityValue in
3876  * an entity declaration, it is bypassed and left as is.
3877  * so XML_SUBSTITUTE_REF is not set here.
3878  */
3879  ++ctxt->depth;
3881  0, 0, 0);
3882  --ctxt->depth;
3883  if (orig != NULL) {
3884  *orig = buf;
3885  buf = NULL;
3886  }
3887 
3888 error:
3889  if (buf != NULL)
3890  xmlFree(buf);
3891  return(ret);
3892 }
3893 
3906 static xmlChar *
3908  xmlChar limit = 0;
3909  xmlChar *buf = NULL;
3910  xmlChar *rep = NULL;
3911  size_t len = 0;
3912  size_t buf_size = 0;
3913  int c, l, in_space = 0;
3914  xmlChar *current = NULL;
3915  xmlEntityPtr ent;
3916 
3917  if (NXT(0) == '"') {
3919  limit = '"';
3920  NEXT;
3921  } else if (NXT(0) == '\'') {
3922  limit = '\'';
3924  NEXT;
3925  } else {
3927  return(NULL);
3928  }
3929 
3930  /*
3931  * allocate a translation buffer.
3932  */
3933  buf_size = XML_PARSER_BUFFER_SIZE;
3934  buf = (xmlChar *) xmlMallocAtomic(buf_size);
3935  if (buf == NULL) goto mem_error;
3936 
3937  /*
3938  * OK loop until we reach one of the ending char or a size limit.
3939  */
3940  c = CUR_CHAR(l);
3941  while (((NXT(0) != limit) && /* checked */
3942  (IS_CHAR(c)) && (c != '<')) &&
3943  (ctxt->instate != XML_PARSER_EOF)) {
3944  /*
3945  * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3946  * special option is given
3947  */
3948  if ((len > XML_MAX_TEXT_LENGTH) &&
3949  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3950  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3951  "AttValue length too long\n");
3952  goto mem_error;
3953  }
3954  if (c == '&') {
3955  in_space = 0;
3956  if (NXT(1) == '#') {
3957  int val = xmlParseCharRef(ctxt);
3958 
3959  if (val == '&') {
3960  if (ctxt->replaceEntities) {
3961  if (len + 10 > buf_size) {
3962  growBuffer(buf, 10);
3963  }
3964  buf[len++] = '&';
3965  } else {
3966  /*
3967  * The reparsing will be done in xmlStringGetNodeList()
3968  * called by the attribute() function in SAX.c
3969  */
3970  if (len + 10 > buf_size) {
3971  growBuffer(buf, 10);
3972  }
3973  buf[len++] = '&';
3974  buf[len++] = '#';
3975  buf[len++] = '3';
3976  buf[len++] = '8';
3977  buf[len++] = ';';
3978  }
3979  } else if (val != 0) {
3980  if (len + 10 > buf_size) {
3981  growBuffer(buf, 10);
3982  }
3983  len += xmlCopyChar(0, &buf[len], val);
3984  }
3985  } else {
3986  ent = xmlParseEntityRef(ctxt);
3987  ctxt->nbentities++;
3988  if (ent != NULL)
3989  ctxt->nbentities += ent->owner;
3990  if ((ent != NULL) &&
3992  if (len + 10 > buf_size) {
3993  growBuffer(buf, 10);
3994  }
3995  if ((ctxt->replaceEntities == 0) &&
3996  (ent->content[0] == '&')) {
3997  buf[len++] = '&';
3998  buf[len++] = '#';
3999  buf[len++] = '3';
4000  buf[len++] = '8';
4001  buf[len++] = ';';
4002  } else {
4003  buf[len++] = ent->content[0];
4004  }
4005  } else if ((ent != NULL) &&
4006  (ctxt->replaceEntities != 0)) {
4007  if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4008  ++ctxt->depth;
4009  rep = xmlStringDecodeEntities(ctxt, ent->content,
4011  0, 0, 0);
4012  --ctxt->depth;
4013  if (rep != NULL) {
4014  current = rep;
4015  while (*current != 0) { /* non input consuming */
4016  if ((*current == 0xD) || (*current == 0xA) ||
4017  (*current == 0x9)) {
4018  buf[len++] = 0x20;
4019  current++;
4020  } else
4021  buf[len++] = *current++;
4022  if (len + 10 > buf_size) {
4023  growBuffer(buf, 10);
4024  }
4025  }
4026  xmlFree(rep);
4027  rep = NULL;
4028  }
4029  } else {
4030  if (len + 10 > buf_size) {
4031  growBuffer(buf, 10);
4032  }
4033  if (ent->content != NULL)
4034  buf[len++] = ent->content[0];
4035  }
4036  } else if (ent != NULL) {
4037  int i = xmlStrlen(ent->name);
4038  const xmlChar *cur = ent->name;
4039 
4040  /*
4041  * This may look absurd but is needed to detect
4042  * entities problems
4043  */
4044  if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4045  (ent->content != NULL) && (ent->checked == 0)) {
4046  unsigned long oldnbent = ctxt->nbentities, diff;
4047 
4048  ++ctxt->depth;
4049  rep = xmlStringDecodeEntities(ctxt, ent->content,
4050  XML_SUBSTITUTE_REF, 0, 0, 0);
4051  --ctxt->depth;
4052 
4053  diff = ctxt->nbentities - oldnbent + 1;
4054  if (diff > INT_MAX / 2)
4055  diff = INT_MAX / 2;
4056  ent->checked = diff * 2;
4057  if (rep != NULL) {
4058  if (xmlStrchr(rep, '<'))
4059  ent->checked |= 1;
4060  xmlFree(rep);
4061  rep = NULL;
4062  } else {
4063  ent->content[0] = 0;
4064  }
4065  }
4066 
4067  /*
4068  * Just output the reference
4069  */
4070  buf[len++] = '&';
4071  while (len + i + 10 > buf_size) {
4072  growBuffer(buf, i + 10);
4073  }
4074  for (;i > 0;i--)
4075  buf[len++] = *cur++;
4076  buf[len++] = ';';
4077  }
4078  }
4079  } else {
4080  if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4081  if ((len != 0) || (!normalize)) {
4082  if ((!normalize) || (!in_space)) {
4083  COPY_BUF(l,buf,len,0x20);
4084  while (len + 10 > buf_size) {
4085  growBuffer(buf, 10);
4086  }
4087  }
4088  in_space = 1;
4089  }
4090  } else {
4091  in_space = 0;
4092  COPY_BUF(l,buf,len,c);
4093  if (len + 10 > buf_size) {
4094  growBuffer(buf, 10);
4095  }
4096  }
4097  NEXTL(l);
4098  }
4099  GROW;
4100  c = CUR_CHAR(l);
4101  }
4102  if (ctxt->instate == XML_PARSER_EOF)
4103  goto error;
4104 
4105  if ((in_space) && (normalize)) {
4106  while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107  }
4108  buf[len] = 0;
4109  if (RAW == '<') {
4111  } else if (RAW != limit) {
4112  if ((c != 0) && (!IS_CHAR(c))) {
4113  xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4114  "invalid character in attribute value\n");
4115  } else {
4116  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4117  "AttValue: ' expected\n");
4118  }
4119  } else
4120  NEXT;
4121 
4122  /*
4123  * There we potentially risk an overflow, don't allow attribute value of
4124  * length more than INT_MAX it is a very reasonable assumption !
4125  */
4126  if (len >= INT_MAX) {
4127  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4128  "AttValue length too long\n");
4129  goto mem_error;
4130  }
4131 
4132  if (attlen != NULL) *attlen = (int) len;
4133  return(buf);
4134 
4135 mem_error:
4136  xmlErrMemory(ctxt, NULL);
4137 error:
4138  if (buf != NULL)
4139  xmlFree(buf);
4140  if (rep != NULL)
4141  xmlFree(rep);
4142  return(NULL);
4143 }
4144 
4179 xmlChar *
4181  if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4182  return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4183 }
4184 
4196 xmlChar *
4198  xmlChar *buf = NULL;
4199  int len = 0;
4201  int cur, l;
4202  xmlChar stop;
4203  int state = ctxt->instate;
4204  int count = 0;
4205 
4206  SHRINK;
4207  if (RAW == '"') {
4208  NEXT;
4209  stop = '"';
4210  } else if (RAW == '\'') {
4211  NEXT;
4212  stop = '\'';
4213  } else {
4215  return(NULL);
4216  }
4217 
4218  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4219  if (buf == NULL) {
4220  xmlErrMemory(ctxt, NULL);
4221  return(NULL);
4222  }
4224  cur = CUR_CHAR(l);
4225  while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4226  if (len + 5 >= size) {
4227  xmlChar *tmp;
4228 
4229  if ((size > XML_MAX_NAME_LENGTH) &&
4230  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4231  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4232  xmlFree(buf);
4233  ctxt->instate = (xmlParserInputState) state;
4234  return(NULL);
4235  }
4236  size *= 2;
4237  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4238  if (tmp == NULL) {
4239  xmlFree(buf);
4240  xmlErrMemory(ctxt, NULL);
4241  ctxt->instate = (xmlParserInputState) state;
4242  return(NULL);
4243  }
4244  buf = tmp;
4245  }
4246  count++;
4247  if (count > 50) {
4248  SHRINK;
4249  GROW;
4250  count = 0;
4251  if (ctxt->instate == XML_PARSER_EOF) {
4252  xmlFree(buf);
4253  return(NULL);
4254  }
4255  }
4256  COPY_BUF(l,buf,len,cur);
4257  NEXTL(l);
4258  cur = CUR_CHAR(l);
4259  if (cur == 0) {
4260  GROW;
4261  SHRINK;
4262  cur = CUR_CHAR(l);
4263  }
4264  }
4265  buf[len] = 0;
4266  ctxt->instate = (xmlParserInputState) state;
4267  if (!IS_CHAR(cur)) {
4269  } else {
4270  NEXT;
4271  }
4272  return(buf);
4273 }
4274 
4286 xmlChar *
4288  xmlChar *buf = NULL;
4289  int len = 0;
4291  xmlChar cur;
4292  xmlChar stop;
4293  int count = 0;
4294  xmlParserInputState oldstate = ctxt->instate;
4295 
4296  SHRINK;
4297  if (RAW == '"') {
4298  NEXT;
4299  stop = '"';
4300  } else if (RAW == '\'') {
4301  NEXT;
4302  stop = '\'';
4303  } else {
4305  return(NULL);
4306  }
4307  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4308  if (buf == NULL) {
4309  xmlErrMemory(ctxt, NULL);
4310  return(NULL);
4311  }
4313  cur = CUR;
4314  while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4315  if (len + 1 >= size) {
4316  xmlChar *tmp;
4317 
4318  if ((size > XML_MAX_NAME_LENGTH) &&
4319  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321  xmlFree(buf);
4322  return(NULL);
4323  }
4324  size *= 2;
4325  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326  if (tmp == NULL) {
4327  xmlErrMemory(ctxt, NULL);
4328  xmlFree(buf);
4329  return(NULL);
4330  }
4331  buf = tmp;
4332  }
4333  buf[len++] = cur;
4334  count++;
4335  if (count > 50) {
4336  SHRINK;
4337  GROW;
4338  count = 0;
4339  if (ctxt->instate == XML_PARSER_EOF) {
4340  xmlFree(buf);
4341  return(NULL);
4342  }
4343  }
4344  NEXT;
4345  cur = CUR;
4346  if (cur == 0) {
4347  GROW;
4348  SHRINK;
4349  cur = CUR;
4350  }
4351  }
4352  buf[len] = 0;
4353  if (cur != stop) {
4355  } else {
4356  NEXT;
4357  }
4358  ctxt->instate = oldstate;
4359  return(buf);
4360 }
4361 
4362 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4363 
4364 /*
4365  * used for the test in the inner loop of the char data testing
4366  */
4367 static const unsigned char test_char_data[256] = {
4368  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369  0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4370  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4373  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4374  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4375  0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4376  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4377  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4378  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4379  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4380  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4381  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4382  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4383  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4384  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4385  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4400 };
4401 
4418 void
4420  const xmlChar *in;
4421  int nbchar = 0;
4422  int line = ctxt->input->line;
4423  int col = ctxt->input->col;
4424  int ccol;
4425 
4426  SHRINK;
4427  GROW;
4428  /*
4429  * Accelerated common case where input don't need to be
4430  * modified before passing it to the handler.
4431  */
4432  if (!cdata) {
4433  in = ctxt->input->cur;
4434  do {
4435 get_more_space:
4436  while (*in == 0x20) { in++; ctxt->input->col++; }
4437  if (*in == 0xA) {
4438  do {
4439  ctxt->input->line++; ctxt->input->col = 1;
4440  in++;
4441  } while (*in == 0xA);
4442  goto get_more_space;
4443  }
4444  if (*in == '<') {
4445  nbchar = in - ctxt->input->cur;
4446  if (nbchar > 0) {
4447  const xmlChar *tmp = ctxt->input->cur;
4448  ctxt->input->cur = in;
4449 
4450  if ((ctxt->sax != NULL) &&
4451  (ctxt->sax->ignorableWhitespace !=
4452  ctxt->sax->characters)) {
4453  if (areBlanks(ctxt, tmp, nbchar, 1)) {
4454  if (ctxt->sax->ignorableWhitespace != NULL)
4455  ctxt->sax->ignorableWhitespace(ctxt->userData,
4456  tmp, nbchar);
4457  } else {
4458  if (ctxt->sax->characters != NULL)
4459  ctxt->sax->characters(ctxt->userData,
4460  tmp, nbchar);
4461  if (*ctxt->space == -1)
4462  *ctxt->space = -2;
4463  }
4464  } else if ((ctxt->sax != NULL) &&
4465  (ctxt->sax->characters != NULL)) {
4466  ctxt->sax->characters(ctxt->userData,
4467  tmp, nbchar);
4468  }
4469  }
4470  return;
4471  }
4472 
4473 get_more:
4474  ccol = ctxt->input->col;
4475  while (test_char_data[*in]) {
4476  in++;
4477  ccol++;
4478  }
4479  ctxt->input->col = ccol;
4480  if (*in == 0xA) {
4481  do {
4482  ctxt->input->line++; ctxt->input->col = 1;
4483  in++;
4484  } while (*in == 0xA);
4485  goto get_more;
4486  }
4487  if (*in == ']') {
4488  if ((in[1] == ']') && (in[2] == '>')) {
4490  ctxt->input->cur = in + 1;
4491  return;
4492  }
4493  in++;
4494  ctxt->input->col++;
4495  goto get_more;
4496  }
4497  nbchar = in - ctxt->input->cur;
4498  if (nbchar > 0) {
4499  if ((ctxt->sax != NULL) &&
4500  (ctxt->sax->ignorableWhitespace !=
4501  ctxt->sax->characters) &&
4502  (IS_BLANK_CH(*ctxt->input->cur))) {
4503  const xmlChar *tmp = ctxt->input->cur;
4504  ctxt->input->cur = in;
4505 
4506  if (areBlanks(ctxt, tmp, nbchar, 0)) {
4507  if (ctxt->sax->ignorableWhitespace != NULL)
4508  ctxt->sax->ignorableWhitespace(ctxt->userData,
4509  tmp, nbchar);
4510  } else {
4511  if (ctxt->sax->characters != NULL)
4512  ctxt->sax->characters(ctxt->userData,
4513  tmp, nbchar);
4514  if (*ctxt->space == -1)
4515  *ctxt->space = -2;
4516  }
4517  line = ctxt->input->line;
4518  col = ctxt->input->col;
4519  } else if (ctxt->sax != NULL) {
4520  if (ctxt->sax->characters != NULL)
4521  ctxt->sax->characters(ctxt->userData,
4522  ctxt->input->cur, nbchar);
4523  line = ctxt->input->line;
4524  col = ctxt->input->col;
4525  }
4526  /* something really bad happened in the SAX callback */
4527  if (ctxt->instate != XML_PARSER_CONTENT)
4528  return;
4529  }
4530  ctxt->input->cur = in;
4531  if (*in == 0xD) {
4532  in++;
4533  if (*in == 0xA) {
4534  ctxt->input->cur = in;
4535  in++;
4536  ctxt->input->line++; ctxt->input->col = 1;
4537  continue; /* while */
4538  }
4539  in--;
4540  }
4541  if (*in == '<') {
4542  return;
4543  }
4544  if (*in == '&') {
4545  return;
4546  }
4547  SHRINK;
4548  GROW;
4549  if (ctxt->instate == XML_PARSER_EOF)
4550  return;
4551  in = ctxt->input->cur;
4552  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4553  nbchar = 0;
4554  }
4555  ctxt->input->line = line;
4556  ctxt->input->col = col;
4557  xmlParseCharDataComplex(ctxt, cdata);
4558 }
4559 
4569 static void
4572  int nbchar = 0;
4573  int cur, l;
4574  int count = 0;
4575 
4576  SHRINK;
4577  GROW;
4578  cur = CUR_CHAR(l);
4579  while ((cur != '<') && /* checked */
4580  (cur != '&') &&
4581  (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4582  if ((cur == ']') && (NXT(1) == ']') &&
4583  (NXT(2) == '>')) {
4584  if (cdata) break;
4585  else {
4587  }
4588  }
4589  COPY_BUF(l,buf,nbchar,cur);
4590  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4591  buf[nbchar] = 0;
4592 
4593  /*
4594  * OK the segment is to be consumed as chars.
4595  */
4596  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4597  if (areBlanks(ctxt, buf, nbchar, 0)) {
4598  if (ctxt->sax->ignorableWhitespace != NULL)
4599  ctxt->sax->ignorableWhitespace(ctxt->userData,
4600  buf, nbchar);
4601  } else {
4602  if (ctxt->sax->characters != NULL)
4603  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4604  if ((ctxt->sax->characters !=
4605  ctxt->sax->ignorableWhitespace) &&
4606  (*ctxt->space == -1))
4607  *ctxt->space = -2;
4608  }
4609  }
4610  nbchar = 0;
4611  /* something really bad happened in the SAX callback */
4612  if (ctxt->instate != XML_PARSER_CONTENT)
4613  return;
4614  }
4615  count++;
4616  if (count > 50) {
4617  SHRINK;
4618  GROW;
4619  count = 0;
4620  if (ctxt->instate == XML_PARSER_EOF)
4621  return;
4622  }
4623  NEXTL(l);
4624  cur = CUR_CHAR(l);
4625  }
4626  if (nbchar != 0) {
4627  buf[nbchar] = 0;
4628  /*
4629  * OK the segment is to be consumed as chars.
4630  */
4631  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4632  if (areBlanks(ctxt, buf, nbchar, 0)) {
4633  if (ctxt->sax->ignorableWhitespace != NULL)
4634  ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4635  } else {
4636  if (ctxt->sax->characters != NULL)
4637  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4638  if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4639  (*ctxt->space == -1))
4640  *ctxt->space = -2;
4641  }
4642  }
4643  }
4644  if ((cur != 0) && (!IS_CHAR(cur))) {
4645  /* Generate the error and skip the offending character */
4646  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4647  "PCDATA invalid Char value %d\n",
4648  cur);
4649  NEXTL(l);
4650  }
4651 }
4652 
4675 xmlChar *
4677  xmlChar *URI = NULL;
4678 
4679  SHRINK;
4680 
4681  *publicID = NULL;
4682  if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4683  SKIP(6);
4684  if (SKIP_BLANKS == 0) {
4685  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4686  "Space required after 'SYSTEM'\n");
4687  }
4688  URI = xmlParseSystemLiteral(ctxt);
4689  if (URI == NULL) {
4691  }
4692  } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4693  SKIP(6);
4694  if (SKIP_BLANKS == 0) {
4695  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4696  "Space required after 'PUBLIC'\n");
4697  }
4698  *publicID = xmlParsePubidLiteral(ctxt);
4699  if (*publicID == NULL) {
4701  }
4702  if (strict) {
4703  /*
4704  * We don't handle [83] so "S SystemLiteral" is required.
4705  */
4706  if (SKIP_BLANKS == 0) {
4707  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4708  "Space required after the Public Identifier\n");
4709  }
4710  } else {
4711  /*
4712  * We handle [83] so we return immediately, if
4713  * "S SystemLiteral" is not detected. We skip blanks if no
4714  * system literal was found, but this is harmless since we must
4715  * be at the end of a NotationDecl.
4716  */
4717  if (SKIP_BLANKS == 0) return(NULL);
4718  if ((CUR != '\'') && (CUR != '"')) return(NULL);
4719  }
4720  URI = xmlParseSystemLiteral(ctxt);
4721  if (URI == NULL) {
4723  }
4724  }
4725  return(URI);
4726 }
4727 
4742 static void
4744  size_t len, size_t size) {
4745  int q, ql;
4746  int r, rl;
4747  int cur, l;
4748  size_t count = 0;
4749  int inputid;
4750 
4751  inputid = ctxt->input->id;
4752 
4753  if (buf == NULL) {
4754  len = 0;
4756  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4757  if (buf == NULL) {
4758  xmlErrMemory(ctxt, NULL);
4759  return;
4760  }
4761  }
4762  GROW; /* Assure there's enough input data */
4763  q = CUR_CHAR(ql);
4764  if (q == 0)
4765  goto not_terminated;
4766  if (!IS_CHAR(q)) {
4767  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4768  "xmlParseComment: invalid xmlChar value %d\n",
4769  q);
4770  xmlFree (buf);
4771  return;
4772  }
4773  NEXTL(ql);
4774  r = CUR_CHAR(rl);
4775  if (r == 0)
4776  goto not_terminated;
4777  if (!IS_CHAR(r)) {
4778  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779  "xmlParseComment: invalid xmlChar value %d\n",
4780  q);
4781  xmlFree (buf);
4782  return;
4783  }
4784  NEXTL(rl);
4785  cur = CUR_CHAR(l);
4786  if (cur == 0)
4787  goto not_terminated;
4788  while (IS_CHAR(cur) && /* checked */
4789  ((cur != '>') ||
4790  (r != '-') || (q != '-'))) {
4791  if ((r == '-') && (q == '-')) {
4793  }
4794  if ((len > XML_MAX_TEXT_LENGTH) &&
4795  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4796  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797  "Comment too big found", NULL);
4798  xmlFree (buf);
4799  return;
4800  }
4801  if (len + 5 >= size) {
4802  xmlChar *new_buf;
4803  size_t new_size;
4804 
4805  new_size = size * 2;
4806  new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4807  if (new_buf == NULL) {
4808  xmlFree (buf);
4809  xmlErrMemory(ctxt, NULL);
4810  return;
4811  }
4812  buf = new_buf;
4813  size = new_size;
4814  }
4815  COPY_BUF(ql,buf,len,q);
4816  q = r;
4817  ql = rl;
4818  r = cur;
4819  rl = l;
4820 
4821  count++;
4822  if (count > 50) {
4823  SHRINK;
4824  GROW;
4825  count = 0;
4826  if (ctxt->instate == XML_PARSER_EOF) {
4827  xmlFree(buf);
4828  return;
4829  }
4830  }
4831  NEXTL(l);
4832  cur = CUR_CHAR(l);
4833  if (cur == 0) {
4834  SHRINK;
4835  GROW;
4836  cur = CUR_CHAR(l);
4837  }
4838  }
4839  buf[len] = 0;
4840  if (cur == 0) {
4841  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4842  "Comment not terminated \n<!--%.50s\n", buf);
4843  } else if (!IS_CHAR(cur)) {
4844  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845  "xmlParseComment: invalid xmlChar value %d\n",
4846  cur);
4847  } else {
4848  if (inputid != ctxt->input->id) {
4849  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4850  "Comment doesn't start and stop in the same"
4851  " entity\n");
4852  }
4853  NEXT;
4854  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855  (!ctxt->disableSAX))
4856  ctxt->sax->comment(ctxt->userData, buf);
4857  }
4858  xmlFree(buf);
4859  return;
4860 not_terminated:
4861  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862  "Comment not terminated\n", NULL);
4863  xmlFree(buf);
4864  return;
4865 }
4866 
4877 void
4879  xmlChar *buf = NULL;
4880  size_t size = XML_PARSER_BUFFER_SIZE;
4881  size_t len = 0;
4883  const xmlChar *in;
4884  size_t nbchar = 0;
4885  int ccol;
4886  int inputid;
4887 
4888  /*
4889  * Check that there is a comment right here.
4890  */
4891  if ((RAW != '<') || (NXT(1) != '!') ||
4892  (NXT(2) != '-') || (NXT(3) != '-')) return;
4893  state = ctxt->instate;
4894  ctxt->instate = XML_PARSER_COMMENT;
4895  inputid = ctxt->input->id;
4896  SKIP(4);
4897  SHRINK;
4898  GROW;
4899 
4900  /*
4901  * Accelerated common case where input don't need to be
4902  * modified before passing it to the handler.
4903  */
4904  in = ctxt->input->cur;
4905  do {
4906  if (*in == 0xA) {
4907  do {
4908  ctxt->input->line++; ctxt->input->col = 1;
4909  in++;
4910  } while (*in == 0xA);
4911  }
4912 get_more:
4913  ccol = ctxt->input->col;
4914  while (((*in > '-') && (*in <= 0x7F)) ||
4915  ((*in >= 0x20) && (*in < '-')) ||
4916  (*in == 0x09)) {
4917  in++;
4918  ccol++;
4919  }
4920  ctxt->input->col = ccol;
4921  if (*in == 0xA) {
4922  do {
4923  ctxt->input->line++; ctxt->input->col = 1;
4924  in++;
4925  } while (*in == 0xA);
4926  goto get_more;
4927  }
4928  nbchar = in - ctxt->input->cur;
4929  /*
4930  * save current set of data
4931  */
4932  if (nbchar > 0) {
4933  if ((ctxt->sax != NULL) &&
4934  (ctxt->sax->comment != NULL)) {
4935  if (buf == NULL) {
4936  if ((*in == '-') && (in[1] == '-'))
4937  size = nbchar + 1;
4938  else
4939  size = XML_PARSER_BUFFER_SIZE + nbchar;
4940  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941  if (buf == NULL) {
4942  xmlErrMemory(ctxt, NULL);
4943  ctxt->instate = state;
4944  return;
4945  }
4946  len = 0;
4947  } else if (len + nbchar + 1 >= size) {
4948  xmlChar *new_buf;
4949  size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950  new_buf = (xmlChar *) xmlRealloc(buf,
4951  size * sizeof(xmlChar));
4952  if (new_buf == NULL) {
4953  xmlFree (buf);
4954  xmlErrMemory(ctxt, NULL);
4955  ctxt->instate = state;
4956  return;
4957  }
4958  buf = new_buf;
4959  }
4960  memcpy(&buf[len], ctxt->input->cur, nbchar);
4961  len += nbchar;
4962  buf[len] = 0;
4963  }
4964  }
4965  if ((len > XML_MAX_TEXT_LENGTH) &&
4966  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968  "Comment too big found", NULL);
4969  xmlFree (buf);
4970  return;
4971  }
4972  ctxt->input->cur = in;
4973  if (*in == 0xA) {
4974  in++;
4975  ctxt->input->line++; ctxt->input->col = 1;
4976  }
4977  if (*in == 0xD) {
4978  in++;
4979  if (*in == 0xA) {
4980  ctxt->input->cur = in;
4981  in++;
4982  ctxt->input->line++; ctxt->input->col = 1;
4983  continue; /* while */
4984  }
4985  in--;
4986  }
4987  SHRINK;
4988  GROW;
4989  if (ctxt->instate == XML_PARSER_EOF) {
4990  xmlFree(buf);
4991  return;
4992  }
4993  in = ctxt->input->cur;
4994  if (*in == '-') {
4995  if (in[1] == '-') {
4996  if (in[2] == '>') {
4997  if (ctxt->input->id != inputid) {
4998  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999  "comment doesn't start and stop in the"
5000  " same entity\n");
5001  }
5002  SKIP(3);
5003  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004  (!ctxt->disableSAX)) {
5005  if (buf != NULL)
5006  ctxt->sax->comment(ctxt->userData, buf);
5007  else
5008  ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5009  }
5010  if (buf != NULL)
5011  xmlFree(buf);
5012  if (ctxt->instate != XML_PARSER_EOF)
5013  ctxt->instate = state;
5014  return;
5015  }
5016  if (buf != NULL) {
5017  xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5018  "Double hyphen within comment: "