ReactOS  0.4.15-dev-1070-ge1a01de
parser.c
Go to the documentation of this file.
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  * implemented on top of the SAX interfaces
4  *
5  * References:
6  * The XML specification:
7  * http://www.w3.org/TR/REC-xml
8  * Original 1.0 version:
9  * http://www.w3.org/TR/1998/REC-xml-19980210
10  * XML second edition working draft
11  * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 static void
92 
93 static xmlParserCtxtPtr
95  const xmlChar *base, xmlParserCtxtPtr pctx);
96 
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98 
99 static int
101 
102 static void
104 
105 /************************************************************************
106  * *
107  * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108  * *
109  ************************************************************************/
110 
111 #define XML_PARSER_BIG_ENTITY 1000
112 #define XML_PARSER_LOT_ENTITY 5000
113 
114 /*
115  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116  * replacement over the size in byte of the input indicates that you have
117  * and exponential behaviour. A value of 10 correspond to at least 3 entity
118  * replacement per byte of input.
119  */
120 #define XML_PARSER_NON_LINEAR 10
121 
122 /*
123  * xmlParserEntityCheck
124  *
125  * Function to check non-linear entity expansion behaviour
126  * This is here to detect and stop exponential linear entity expansion
127  * This is not a limitation of the parser but a safety
128  * boundary feature. It can be disabled with the XML_PARSE_HUGE
129  * parser option.
130  */
131 static int
133  xmlEntityPtr ent, size_t replacement)
134 {
135  size_t consumed = 0;
136 
137  if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138  return (0);
139  if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140  return (1);
141 
142  /*
143  * This may look absurd but is needed to detect
144  * entities problems
145  */
146  if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
147  (ent->content != NULL) && (ent->checked == 0) &&
148  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
149  unsigned long oldnbent = ctxt->nbentities, diff;
150  xmlChar *rep;
151 
152  ent->checked = 1;
153 
154  ++ctxt->depth;
155  rep = xmlStringDecodeEntities(ctxt, ent->content,
156  XML_SUBSTITUTE_REF, 0, 0, 0);
157  --ctxt->depth;
158  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
159  ent->content[0] = 0;
160  }
161 
162  diff = ctxt->nbentities - oldnbent + 1;
163  if (diff > INT_MAX / 2)
164  diff = INT_MAX / 2;
165  ent->checked = diff * 2;
166  if (rep != NULL) {
167  if (xmlStrchr(rep, '<'))
168  ent->checked |= 1;
169  xmlFree(rep);
170  rep = NULL;
171  }
172  }
173  if (replacement != 0) {
174  if (replacement < XML_MAX_TEXT_LENGTH)
175  return(0);
176 
177  /*
178  * If the volume of entity copy reaches 10 times the
179  * amount of parsed data and over the large text threshold
180  * then that's very likely to be an abuse.
181  */
182  if (ctxt->input != NULL) {
183  consumed = ctxt->input->consumed +
184  (ctxt->input->cur - ctxt->input->base);
185  }
186  consumed += ctxt->sizeentities;
187 
188  if (replacement < XML_PARSER_NON_LINEAR * consumed)
189  return(0);
190  } else if (size != 0) {
191  /*
192  * Do the check based on the replacement size of the entity
193  */
195  return(0);
196 
197  /*
198  * A limit on the amount of text data reasonably used
199  */
200  if (ctxt->input != NULL) {
201  consumed = ctxt->input->consumed +
202  (ctxt->input->cur - ctxt->input->base);
203  }
204  consumed += ctxt->sizeentities;
205 
206  if ((size < XML_PARSER_NON_LINEAR * consumed) &&
207  (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
208  return (0);
209  } else if (ent != NULL) {
210  /*
211  * use the number of parsed entities in the replacement
212  */
213  size = ent->checked / 2;
214 
215  /*
216  * The amount of data parsed counting entities size only once
217  */
218  if (ctxt->input != NULL) {
219  consumed = ctxt->input->consumed +
220  (ctxt->input->cur - ctxt->input->base);
221  }
222  consumed += ctxt->sizeentities;
223 
224  /*
225  * Check the density of entities for the amount of data
226  * knowing an entity reference will take at least 3 bytes
227  */
228  if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
229  return (0);
230  } else {
231  /*
232  * strange we got no data for checking
233  */
234  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
236  (ctxt->nbentities <= 10000))
237  return (0);
238  }
240  return (1);
241 }
242 
251 unsigned int xmlParserMaxDepth = 256;
252 
253 
254 
255 #define SAX2 1
256 #define XML_PARSER_BIG_BUFFER_SIZE 300
257 #define XML_PARSER_BUFFER_SIZE 100
258 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
259 
269 #define XML_PARSER_CHUNK_SIZE 100
270 
271 /*
272  * List of XML prefixed PI allowed by W3C specs
273  */
274 
275 static const char *xmlW3CPIs[] = {
276  "xml-stylesheet",
277  "xml-model",
278  NULL
279 };
280 
281 
282 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
284  const xmlChar **str);
285 
286 static xmlParserErrors
289  void *user_data, int depth, const xmlChar *URL,
290  const xmlChar *ID, xmlNodePtr *list);
291 
292 static int
294  const char *encoding);
295 #ifdef LIBXML_LEGACY_ENABLED
296 static void
297 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
298  xmlNodePtr lastNode);
299 #endif /* LIBXML_LEGACY_ENABLED */
300 
301 static xmlParserErrors
303  const xmlChar *string, void *user_data, xmlNodePtr *lst);
304 
305 static int
307 
308 /************************************************************************
309  * *
310  * Some factorized error routines *
311  * *
312  ************************************************************************/
313 
322 static void
324  const xmlChar * localname)
325 {
326  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
327  (ctxt->instate == XML_PARSER_EOF))
328  return;
329  if (ctxt != NULL)
331 
332  if (prefix == NULL)
333  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
335  (const char *) localname, NULL, NULL, 0, 0,
336  "Attribute %s redefined\n", localname);
337  else
338  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
340  (const char *) prefix, (const char *) localname,
341  NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
342  localname);
343  if (ctxt != NULL) {
344  ctxt->wellFormed = 0;
345  if (ctxt->recovery == 0)
346  ctxt->disableSAX = 1;
347  }
348 }
349 
358 static void
360 {
361  const char *errmsg;
362 
363  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
364  (ctxt->instate == XML_PARSER_EOF))
365  return;
366  switch (error) {
368  errmsg = "CharRef: invalid hexadecimal value";
369  break;
371  errmsg = "CharRef: invalid decimal value";
372  break;
374  errmsg = "CharRef: invalid value";
375  break;
377  errmsg = "internal error";
378  break;
380  errmsg = "PEReference at end of document";
381  break;
383  errmsg = "PEReference in prolog";
384  break;
386  errmsg = "PEReference in epilog";
387  break;
389  errmsg = "PEReference: no name";
390  break;
392  errmsg = "PEReference: expecting ';'";
393  break;
394  case XML_ERR_ENTITY_LOOP:
395  errmsg = "Detected an entity reference loop";
396  break;
398  errmsg = "EntityValue: \" or ' expected";
399  break;
401  errmsg = "PEReferences forbidden in internal subset";
402  break;
404  errmsg = "EntityValue: \" or ' expected";
405  break;
407  errmsg = "AttValue: \" or ' expected";
408  break;
410  errmsg = "Unescaped '<' not allowed in attributes values";
411  break;
413  errmsg = "SystemLiteral \" or ' expected";
414  break;
416  errmsg = "Unfinished System or Public ID \" or ' expected";
417  break;
419  errmsg = "Sequence ']]>' not allowed in content";
420  break;
422  errmsg = "SYSTEM or PUBLIC, the URI is missing";
423  break;
425  errmsg = "PUBLIC, the Public Identifier is missing";
426  break;
428  errmsg = "Comment must not contain '--' (double-hyphen)";
429  break;
431  errmsg = "xmlParsePI : no target name";
432  break;
434  errmsg = "Invalid PI name";
435  break;
437  errmsg = "NOTATION: Name expected here";
438  break;
440  errmsg = "'>' required to close NOTATION declaration";
441  break;
443  errmsg = "Entity value required";
444  break;
446  errmsg = "Fragment not allowed";
447  break;
449  errmsg = "'(' required to start ATTLIST enumeration";
450  break;
452  errmsg = "NmToken expected in ATTLIST enumeration";
453  break;
455  errmsg = "')' required to finish ATTLIST enumeration";
456  break;
458  errmsg = "MixedContentDecl : '|' or ')*' expected";
459  break;
461  errmsg = "MixedContentDecl : '#PCDATA' expected";
462  break;
464  errmsg = "ContentDecl : Name or '(' expected";
465  break;
467  errmsg = "ContentDecl : ',' '|' or ')' expected";
468  break;
470  errmsg =
471  "PEReference: forbidden within markup decl in internal subset";
472  break;
473  case XML_ERR_GT_REQUIRED:
474  errmsg = "expected '>'";
475  break;
477  errmsg = "XML conditional section '[' expected";
478  break;
480  errmsg = "Content error in the external subset";
481  break;
483  errmsg =
484  "conditional section INCLUDE or IGNORE keyword expected";
485  break;
487  errmsg = "XML conditional section not closed";
488  break;
490  errmsg = "Text declaration '<?xml' required";
491  break;
493  errmsg = "parsing XML declaration: '?>' expected";
494  break;
496  errmsg = "external parsed entities cannot be standalone";
497  break;
499  errmsg = "EntityRef: expecting ';'";
500  break;
502  errmsg = "DOCTYPE improperly terminated";
503  break;
505  errmsg = "EndTag: '</' not found";
506  break;
508  errmsg = "expected '='";
509  break;
511  errmsg = "String not closed expecting \" or '";
512  break;
514  errmsg = "String not started expecting ' or \"";
515  break;
517  errmsg = "Invalid XML encoding name";
518  break;
520  errmsg = "standalone accepts only 'yes' or 'no'";
521  break;
523  errmsg = "Document is empty";
524  break;
526  errmsg = "Extra content at the end of the document";
527  break;
529  errmsg = "chunk is not well balanced";
530  break;
532  errmsg = "extra content at the end of well balanced chunk";
533  break;
535  errmsg = "Malformed declaration expecting version";
536  break;
538  errmsg = "Name too long use XML_PARSE_HUGE option";
539  break;
540 #if 0
541  case:
542  errmsg = "";
543  break;
544 #endif
545  default:
546  errmsg = "Unregistered error message";
547  }
548  if (ctxt != NULL)
549  ctxt->errNo = error;
550  if (info == NULL) {
551  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
552  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
553  errmsg);
554  } else {
555  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
556  XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
557  errmsg, info);
558  }
559  if (ctxt != NULL) {
560  ctxt->wellFormed = 0;
561  if (ctxt->recovery == 0)
562  ctxt->disableSAX = 1;
563  }
564 }
565 
574 static void LIBXML_ATTR_FORMAT(3,0)
575 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
576  const char *msg)
577 {
578  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579  (ctxt->instate == XML_PARSER_EOF))
580  return;
581  if (ctxt != NULL)
582  ctxt->errNo = error;
583  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584  XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
585  if (ctxt != NULL) {
586  ctxt->wellFormed = 0;
587  if (ctxt->recovery == 0)
588  ctxt->disableSAX = 1;
589  }
590 }
591 
602 static void LIBXML_ATTR_FORMAT(3,0)
603 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
604  const char *msg, const xmlChar *str1, const xmlChar *str2)
605 {
606  xmlStructuredErrorFunc schannel = NULL;
607 
608  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609  (ctxt->instate == XML_PARSER_EOF))
610  return;
611  if ((ctxt != NULL) && (ctxt->sax != NULL) &&
612  (ctxt->sax->initialized == XML_SAX2_MAGIC))
613  schannel = ctxt->sax->serror;
614  if (ctxt != NULL) {
615  __xmlRaiseError(schannel,
616  (ctxt->sax) ? ctxt->sax->warning : NULL,
617  ctxt->userData,
618  ctxt, NULL, XML_FROM_PARSER, error,
619  XML_ERR_WARNING, NULL, 0,
620  (const char *) str1, (const char *) str2, NULL, 0, 0,
621  msg, (const char *) str1, (const char *) str2);
622  } else {
623  __xmlRaiseError(schannel, NULL, NULL,
624  ctxt, NULL, XML_FROM_PARSER, error,
625  XML_ERR_WARNING, NULL, 0,
626  (const char *) str1, (const char *) str2, NULL, 0, 0,
627  msg, (const char *) str1, (const char *) str2);
628  }
629 }
630 
640 static void LIBXML_ATTR_FORMAT(3,0)
641 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
642  const char *msg, const xmlChar *str1, const xmlChar *str2)
643 {
644  xmlStructuredErrorFunc schannel = NULL;
645 
646  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
647  (ctxt->instate == XML_PARSER_EOF))
648  return;
649  if (ctxt != NULL) {
650  ctxt->errNo = error;
651  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
652  schannel = ctxt->sax->serror;
653  }
654  if (ctxt != NULL) {
655  __xmlRaiseError(schannel,
656  ctxt->vctxt.error, ctxt->vctxt.userData,
657  ctxt, NULL, XML_FROM_DTD, error,
658  XML_ERR_ERROR, NULL, 0, (const char *) str1,
659  (const char *) str2, NULL, 0, 0,
660  msg, (const char *) str1, (const char *) str2);
661  ctxt->valid = 0;
662  } else {
663  __xmlRaiseError(schannel, NULL, NULL,
664  ctxt, NULL, XML_FROM_DTD, error,
665  XML_ERR_ERROR, NULL, 0, (const char *) str1,
666  (const char *) str2, NULL, 0, 0,
667  msg, (const char *) str1, (const char *) str2);
668  }
669 }
670 
680 static void LIBXML_ATTR_FORMAT(3,0)
681 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
682  const char *msg, int val)
683 {
684  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
685  (ctxt->instate == XML_PARSER_EOF))
686  return;
687  if (ctxt != NULL)
688  ctxt->errNo = error;
689  __xmlRaiseError(NULL, NULL, NULL,
691  NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
692  if (ctxt != NULL) {
693  ctxt->wellFormed = 0;
694  if (ctxt->recovery == 0)
695  ctxt->disableSAX = 1;
696  }
697 }
698 
710 static void LIBXML_ATTR_FORMAT(3,0)
711 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
712  const char *msg, const xmlChar *str1, int val,
713  const xmlChar *str2)
714 {
715  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
716  (ctxt->instate == XML_PARSER_EOF))
717  return;
718  if (ctxt != NULL)
719  ctxt->errNo = error;
720  __xmlRaiseError(NULL, NULL, NULL,
722  NULL, 0, (const char *) str1, (const char *) str2,
723  NULL, val, 0, msg, str1, val, str2);
724  if (ctxt != NULL) {
725  ctxt->wellFormed = 0;
726  if (ctxt->recovery == 0)
727  ctxt->disableSAX = 1;
728  }
729 }
730 
740 static void LIBXML_ATTR_FORMAT(3,0)
741 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742  const char *msg, const xmlChar * val)
743 {
744  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745  (ctxt->instate == XML_PARSER_EOF))
746  return;
747  if (ctxt != NULL)
748  ctxt->errNo = error;
749  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
751  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
752  val);
753  if (ctxt != NULL) {
754  ctxt->wellFormed = 0;
755  if (ctxt->recovery == 0)
756  ctxt->disableSAX = 1;
757  }
758 }
759 
769 static void LIBXML_ATTR_FORMAT(3,0)
770 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771  const char *msg, const xmlChar * val)
772 {
773  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774  (ctxt->instate == XML_PARSER_EOF))
775  return;
776  if (ctxt != NULL)
777  ctxt->errNo = error;
778  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
780  NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781  val);
782 }
783 
794 static void LIBXML_ATTR_FORMAT(3,0)
795 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
796  const char *msg,
797  const xmlChar * info1, const xmlChar * info2,
798  const xmlChar * info3)
799 {
800  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
801  (ctxt->instate == XML_PARSER_EOF))
802  return;
803  if (ctxt != NULL)
804  ctxt->errNo = error;
805  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
806  XML_ERR_ERROR, NULL, 0, (const char *) info1,
807  (const char *) info2, (const char *) info3, 0, 0, msg,
808  info1, info2, info3);
809  if (ctxt != NULL)
810  ctxt->nsWellFormed = 0;
811 }
812 
823 static void LIBXML_ATTR_FORMAT(3,0)
824 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825  const char *msg,
826  const xmlChar * info1, const xmlChar * info2,
827  const xmlChar * info3)
828 {
829  if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830  (ctxt->instate == XML_PARSER_EOF))
831  return;
832  __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
833  XML_ERR_WARNING, NULL, 0, (const char *) info1,
834  (const char *) info2, (const char *) info3, 0, 0, msg,
835  info1, info2, info3);
836 }
837 
838 /************************************************************************
839  * *
840  * Library wide options *
841  * *
842  ************************************************************************/
843 
854 int
856 {
857  switch (feature) {
858  case XML_WITH_THREAD:
859 #ifdef LIBXML_THREAD_ENABLED
860  return(1);
861 #else
862  return(0);
863 #endif
864  case XML_WITH_TREE:
865 #ifdef LIBXML_TREE_ENABLED
866  return(1);
867 #else
868  return(0);
869 #endif
870  case XML_WITH_OUTPUT:
871 #ifdef LIBXML_OUTPUT_ENABLED
872  return(1);
873 #else
874  return(0);
875 #endif
876  case XML_WITH_PUSH:
877 #ifdef LIBXML_PUSH_ENABLED
878  return(1);
879 #else
880  return(0);
881 #endif
882  case XML_WITH_READER:
883 #ifdef LIBXML_READER_ENABLED
884  return(1);
885 #else
886  return(0);
887 #endif
888  case XML_WITH_PATTERN:
889 #ifdef LIBXML_PATTERN_ENABLED
890  return(1);
891 #else
892  return(0);
893 #endif
894  case XML_WITH_WRITER:
895 #ifdef LIBXML_WRITER_ENABLED
896  return(1);
897 #else
898  return(0);
899 #endif
900  case XML_WITH_SAX1:
901 #ifdef LIBXML_SAX1_ENABLED
902  return(1);
903 #else
904  return(0);
905 #endif
906  case XML_WITH_FTP:
907 #ifdef LIBXML_FTP_ENABLED
908  return(1);
909 #else
910  return(0);
911 #endif
912  case XML_WITH_HTTP:
913 #ifdef LIBXML_HTTP_ENABLED
914  return(1);
915 #else
916  return(0);
917 #endif
918  case XML_WITH_VALID:
919 #ifdef LIBXML_VALID_ENABLED
920  return(1);
921 #else
922  return(0);
923 #endif
924  case XML_WITH_HTML:
925 #ifdef LIBXML_HTML_ENABLED
926  return(1);
927 #else
928  return(0);
929 #endif
930  case XML_WITH_LEGACY:
931 #ifdef LIBXML_LEGACY_ENABLED
932  return(1);
933 #else
934  return(0);
935 #endif
936  case XML_WITH_C14N:
937 #ifdef LIBXML_C14N_ENABLED
938  return(1);
939 #else
940  return(0);
941 #endif
942  case XML_WITH_CATALOG:
943 #ifdef LIBXML_CATALOG_ENABLED
944  return(1);
945 #else
946  return(0);
947 #endif
948  case XML_WITH_XPATH:
949 #ifdef LIBXML_XPATH_ENABLED
950  return(1);
951 #else
952  return(0);
953 #endif
954  case XML_WITH_XPTR:
955 #ifdef LIBXML_XPTR_ENABLED
956  return(1);
957 #else
958  return(0);
959 #endif
960  case XML_WITH_XINCLUDE:
961 #ifdef LIBXML_XINCLUDE_ENABLED
962  return(1);
963 #else
964  return(0);
965 #endif
966  case XML_WITH_ICONV:
967 #ifdef LIBXML_ICONV_ENABLED
968  return(1);
969 #else
970  return(0);
971 #endif
972  case XML_WITH_ISO8859X:
973 #ifdef LIBXML_ISO8859X_ENABLED
974  return(1);
975 #else
976  return(0);
977 #endif
978  case XML_WITH_UNICODE:
979 #ifdef LIBXML_UNICODE_ENABLED
980  return(1);
981 #else
982  return(0);
983 #endif
984  case XML_WITH_REGEXP:
985 #ifdef LIBXML_REGEXP_ENABLED
986  return(1);
987 #else
988  return(0);
989 #endif
990  case XML_WITH_AUTOMATA:
991 #ifdef LIBXML_AUTOMATA_ENABLED
992  return(1);
993 #else
994  return(0);
995 #endif
996  case XML_WITH_EXPR:
997 #ifdef LIBXML_EXPR_ENABLED
998  return(1);
999 #else
1000  return(0);
1001 #endif
1002  case XML_WITH_SCHEMAS:
1003 #ifdef LIBXML_SCHEMAS_ENABLED
1004  return(1);
1005 #else
1006  return(0);
1007 #endif
1008  case XML_WITH_SCHEMATRON:
1009 #ifdef LIBXML_SCHEMATRON_ENABLED
1010  return(1);
1011 #else
1012  return(0);
1013 #endif
1014  case XML_WITH_MODULES:
1015 #ifdef LIBXML_MODULES_ENABLED
1016  return(1);
1017 #else
1018  return(0);
1019 #endif
1020  case XML_WITH_DEBUG:
1021 #ifdef LIBXML_DEBUG_ENABLED
1022  return(1);
1023 #else
1024  return(0);
1025 #endif
1026  case XML_WITH_DEBUG_MEM:
1027 #ifdef DEBUG_MEMORY_LOCATION
1028  return(1);
1029 #else
1030  return(0);
1031 #endif
1032  case XML_WITH_DEBUG_RUN:
1033 #ifdef LIBXML_DEBUG_RUNTIME
1034  return(1);
1035 #else
1036  return(0);
1037 #endif
1038  case XML_WITH_ZLIB:
1039 #ifdef LIBXML_ZLIB_ENABLED
1040  return(1);
1041 #else
1042  return(0);
1043 #endif
1044  case XML_WITH_LZMA:
1045 #ifdef LIBXML_LZMA_ENABLED
1046  return(1);
1047 #else
1048  return(0);
1049 #endif
1050  case XML_WITH_ICU:
1051 #ifdef LIBXML_ICU_ENABLED
1052  return(1);
1053 #else
1054  return(0);
1055 #endif
1056  default:
1057  break;
1058  }
1059  return(0);
1060 }
1061 
1062 /************************************************************************
1063  * *
1064  * SAX2 defaulted attributes handling *
1065  * *
1066  ************************************************************************/
1067 
1074 static void
1076  if (ctxt == NULL) return;
1077 #ifdef LIBXML_SAX1_ENABLED
1078  if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1079  ((ctxt->sax->startElementNs != NULL) ||
1080  (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1081 #else
1082  ctxt->sax2 = 1;
1083 #endif /* LIBXML_SAX1_ENABLED */
1084 
1085  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1086  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1087  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1088  if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1089  (ctxt->str_xml_ns == NULL)) {
1090  xmlErrMemory(ctxt, NULL);
1091  }
1092 }
1093 
1094 typedef struct _xmlDefAttrs xmlDefAttrs;
1097  int nbAttrs; /* number of defaulted attributes on that element */
1098  int maxAttrs; /* the size of the array */
1099 #if __STDC_VERSION__ >= 199901L
1100  /* Using a C99 flexible array member avoids UBSan errors. */
1101  const xmlChar *values[]; /* array of localname/prefix/values/external */
1102 #else
1103  const xmlChar *values[5];
1104 #endif
1105 };
1106 
1124 static xmlChar *
1126 {
1127  if ((src == NULL) || (dst == NULL))
1128  return(NULL);
1129 
1130  while (*src == 0x20) src++;
1131  while (*src != 0) {
1132  if (*src == 0x20) {
1133  while (*src == 0x20) src++;
1134  if (*src != 0)
1135  *dst++ = 0x20;
1136  } else {
1137  *dst++ = *src++;
1138  }
1139  }
1140  *dst = 0;
1141  if (dst == src)
1142  return(NULL);
1143  return(dst);
1144 }
1145 
1157 static const xmlChar *
1159 {
1160  int i;
1161  int remove_head = 0;
1162  int need_realloc = 0;
1163  const xmlChar *cur;
1164 
1165  if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1166  return(NULL);
1167  i = *len;
1168  if (i <= 0)
1169  return(NULL);
1170 
1171  cur = src;
1172  while (*cur == 0x20) {
1173  cur++;
1174  remove_head++;
1175  }
1176  while (*cur != 0) {
1177  if (*cur == 0x20) {
1178  cur++;
1179  if ((*cur == 0x20) || (*cur == 0)) {
1180  need_realloc = 1;
1181  break;
1182  }
1183  } else
1184  cur++;
1185  }
1186  if (need_realloc) {
1187  xmlChar *ret;
1188 
1189  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1190  if (ret == NULL) {
1191  xmlErrMemory(ctxt, NULL);
1192  return(NULL);
1193  }
1195  *len = (int) strlen((const char *)ret);
1196  return(ret);
1197  } else if (remove_head) {
1198  *len -= remove_head;
1199  memmove(src, src + remove_head, 1 + *len);
1200  return(src);
1201  }
1202  return(NULL);
1203 }
1204 
1214 static void
1216  const xmlChar *fullname,
1217  const xmlChar *fullattr,
1218  const xmlChar *value) {
1220  int len;
1221  const xmlChar *name;
1222  const xmlChar *prefix;
1223 
1224  /*
1225  * Allows to detect attribute redefinitions
1226  */
1227  if (ctxt->attsSpecial != NULL) {
1228  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1229  return;
1230  }
1231 
1232  if (ctxt->attsDefault == NULL) {
1233  ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1234  if (ctxt->attsDefault == NULL)
1235  goto mem_error;
1236  }
1237 
1238  /*
1239  * split the element name into prefix:localname , the string found
1240  * are within the DTD and then not associated to namespace names.
1241  */
1243  if (name == NULL) {
1244  name = xmlDictLookup(ctxt->dict, fullname, -1);
1245  prefix = NULL;
1246  } else {
1247  name = xmlDictLookup(ctxt->dict, name, -1);
1248  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1249  }
1250 
1251  /*
1252  * make sure there is some storage
1253  */
1254  defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1255  if (defaults == NULL) {
1257  (4 * 5) * sizeof(const xmlChar *));
1258  if (defaults == NULL)
1259  goto mem_error;
1260  defaults->nbAttrs = 0;
1261  defaults->maxAttrs = 4;
1262  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263  defaults, NULL) < 0) {
1264  xmlFree(defaults);
1265  goto mem_error;
1266  }
1267  } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1269 
1271  (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1272  if (temp == NULL)
1273  goto mem_error;
1274  defaults = temp;
1275  defaults->maxAttrs *= 2;
1276  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1277  defaults, NULL) < 0) {
1278  xmlFree(defaults);
1279  goto mem_error;
1280  }
1281  }
1282 
1283  /*
1284  * Split the element name into prefix:localname , the string found
1285  * are within the DTD and hen not associated to namespace names.
1286  */
1287  name = xmlSplitQName3(fullattr, &len);
1288  if (name == NULL) {
1289  name = xmlDictLookup(ctxt->dict, fullattr, -1);
1290  prefix = NULL;
1291  } else {
1292  name = xmlDictLookup(ctxt->dict, name, -1);
1293  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1294  }
1295 
1296  defaults->values[5 * defaults->nbAttrs] = name;
1297  defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1298  /* intern the string and precompute the end */
1299  len = xmlStrlen(value);
1300  value = xmlDictLookup(ctxt->dict, value, len);
1301  defaults->values[5 * defaults->nbAttrs + 2] = value;
1302  defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1303  if (ctxt->external)
1304  defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1305  else
1306  defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1307  defaults->nbAttrs++;
1308 
1309  return;
1310 
1311 mem_error:
1312  xmlErrMemory(ctxt, NULL);
1313  return;
1314 }
1315 
1325 static void
1327  const xmlChar *fullname,
1328  const xmlChar *fullattr,
1329  int type)
1330 {
1331  if (ctxt->attsSpecial == NULL) {
1332  ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1333  if (ctxt->attsSpecial == NULL)
1334  goto mem_error;
1335  }
1336 
1337  if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1338  return;
1339 
1340  xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1341  (void *) (ptrdiff_t) type);
1342  return;
1343 
1344 mem_error:
1345  xmlErrMemory(ctxt, NULL);
1346  return;
1347 }
1348 
1354 static void
1355 xmlCleanSpecialAttrCallback(void *payload, void *data,
1356  const xmlChar *fullname, const xmlChar *fullattr,
1357  const xmlChar *unused ATTRIBUTE_UNUSED) {
1359 
1360  if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1361  xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1362  }
1363 }
1364 
1373 static void
1375 {
1376  if (ctxt->attsSpecial == NULL)
1377  return;
1378 
1380 
1381  if (xmlHashSize(ctxt->attsSpecial) == 0) {
1382  xmlHashFree(ctxt->attsSpecial, NULL);
1383  ctxt->attsSpecial = NULL;
1384  }
1385  return;
1386 }
1387 
1446 int
1448 {
1449  const xmlChar *cur = lang, *nxt;
1450 
1451  if (cur == NULL)
1452  return (0);
1453  if (((cur[0] == 'i') && (cur[1] == '-')) ||
1454  ((cur[0] == 'I') && (cur[1] == '-')) ||
1455  ((cur[0] == 'x') && (cur[1] == '-')) ||
1456  ((cur[0] == 'X') && (cur[1] == '-'))) {
1457  /*
1458  * Still allow IANA code and user code which were coming
1459  * from the previous version of the XML-1.0 specification
1460  * it's deprecated but we should not fail
1461  */
1462  cur += 2;
1463  while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1464  ((cur[0] >= 'a') && (cur[0] <= 'z')))
1465  cur++;
1466  return(cur[0] == 0);
1467  }
1468  nxt = cur;
1469  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471  nxt++;
1472  if (nxt - cur >= 4) {
1473  /*
1474  * Reserved
1475  */
1476  if ((nxt - cur > 8) || (nxt[0] != 0))
1477  return(0);
1478  return(1);
1479  }
1480  if (nxt - cur < 2)
1481  return(0);
1482  /* we got an ISO 639 code */
1483  if (nxt[0] == 0)
1484  return(1);
1485  if (nxt[0] != '-')
1486  return(0);
1487 
1488  nxt++;
1489  cur = nxt;
1490  /* now we can have extlang or script or region or variant */
1491  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492  goto region_m49;
1493 
1494  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496  nxt++;
1497  if (nxt - cur == 4)
1498  goto script;
1499  if (nxt - cur == 2)
1500  goto region;
1501  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502  goto variant;
1503  if (nxt - cur != 3)
1504  return(0);
1505  /* we parsed an extlang */
1506  if (nxt[0] == 0)
1507  return(1);
1508  if (nxt[0] != '-')
1509  return(0);
1510 
1511  nxt++;
1512  cur = nxt;
1513  /* now we can have script or region or variant */
1514  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515  goto region_m49;
1516 
1517  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519  nxt++;
1520  if (nxt - cur == 2)
1521  goto region;
1522  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523  goto variant;
1524  if (nxt - cur != 4)
1525  return(0);
1526  /* we parsed a script */
1527 script:
1528  if (nxt[0] == 0)
1529  return(1);
1530  if (nxt[0] != '-')
1531  return(0);
1532 
1533  nxt++;
1534  cur = nxt;
1535  /* now we can have region or variant */
1536  if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1537  goto region_m49;
1538 
1539  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1540  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1541  nxt++;
1542 
1543  if ((nxt - cur >= 5) && (nxt - cur <= 8))
1544  goto variant;
1545  if (nxt - cur != 2)
1546  return(0);
1547  /* we parsed a region */
1548 region:
1549  if (nxt[0] == 0)
1550  return(1);
1551  if (nxt[0] != '-')
1552  return(0);
1553 
1554  nxt++;
1555  cur = nxt;
1556  /* now we can just have a variant */
1557  while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1558  ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1559  nxt++;
1560 
1561  if ((nxt - cur < 5) || (nxt - cur > 8))
1562  return(0);
1563 
1564  /* we parsed a variant */
1565 variant:
1566  if (nxt[0] == 0)
1567  return(1);
1568  if (nxt[0] != '-')
1569  return(0);
1570  /* extensions and private use subtags not checked */
1571  return (1);
1572 
1573 region_m49:
1574  if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1575  ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1576  nxt += 3;
1577  goto region;
1578  }
1579  return(0);
1580 }
1581 
1582 /************************************************************************
1583  * *
1584  * Parser stacks related functions and macros *
1585  * *
1586  ************************************************************************/
1587 
1589  const xmlChar ** str);
1590 
1591 #ifdef SAX2
1592 
1603 static int
1604 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1605 {
1606  if (ctxt->options & XML_PARSE_NSCLEAN) {
1607  int i;
1608  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1609  if (ctxt->nsTab[i] == prefix) {
1610  /* in scope */
1611  if (ctxt->nsTab[i + 1] == URL)
1612  return(-2);
1613  /* out of scope keep it */
1614  break;
1615  }
1616  }
1617  }
1618  if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1619  ctxt->nsMax = 10;
1620  ctxt->nsNr = 0;
1621  ctxt->nsTab = (const xmlChar **)
1622  xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1623  if (ctxt->nsTab == NULL) {
1624  xmlErrMemory(ctxt, NULL);
1625  ctxt->nsMax = 0;
1626  return (-1);
1627  }
1628  } else if (ctxt->nsNr >= ctxt->nsMax) {
1629  const xmlChar ** tmp;
1630  ctxt->nsMax *= 2;
1631  tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1632  ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1633  if (tmp == NULL) {
1634  xmlErrMemory(ctxt, NULL);
1635  ctxt->nsMax /= 2;
1636  return (-1);
1637  }
1638  ctxt->nsTab = tmp;
1639  }
1640  ctxt->nsTab[ctxt->nsNr++] = prefix;
1641  ctxt->nsTab[ctxt->nsNr++] = URL;
1642  return (ctxt->nsNr);
1643 }
1653 static int
1655 {
1656  int i;
1657 
1658  if (ctxt->nsTab == NULL) return(0);
1659  if (ctxt->nsNr < nr) {
1660  xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1661  nr = ctxt->nsNr;
1662  }
1663  if (ctxt->nsNr <= 0)
1664  return (0);
1665 
1666  for (i = 0;i < nr;i++) {
1667  ctxt->nsNr--;
1668  ctxt->nsTab[ctxt->nsNr] = NULL;
1669  }
1670  return(nr);
1671 }
1672 #endif
1673 
1674 static int
1676  const xmlChar **atts;
1677  int *attallocs;
1678  int maxatts;
1679 
1680  if (ctxt->atts == NULL) {
1681  maxatts = 55; /* allow for 10 attrs by default */
1682  atts = (const xmlChar **)
1683  xmlMalloc(maxatts * sizeof(xmlChar *));
1684  if (atts == NULL) goto mem_error;
1685  ctxt->atts = atts;
1686  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1687  if (attallocs == NULL) goto mem_error;
1688  ctxt->attallocs = attallocs;
1689  ctxt->maxatts = maxatts;
1690  } else if (nr + 5 > ctxt->maxatts) {
1691  maxatts = (nr + 5) * 2;
1692  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1693  maxatts * sizeof(const xmlChar *));
1694  if (atts == NULL) goto mem_error;
1695  ctxt->atts = atts;
1696  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1697  (maxatts / 5) * sizeof(int));
1698  if (attallocs == NULL) goto mem_error;
1699  ctxt->attallocs = attallocs;
1700  ctxt->maxatts = maxatts;
1701  }
1702  return(ctxt->maxatts);
1703 mem_error:
1704  xmlErrMemory(ctxt, NULL);
1705  return(-1);
1706 }
1707 
1717 int
1719 {
1720  if ((ctxt == NULL) || (value == NULL))
1721  return(-1);
1722  if (ctxt->inputNr >= ctxt->inputMax) {
1723  ctxt->inputMax *= 2;
1724  ctxt->inputTab =
1726  ctxt->inputMax *
1727  sizeof(ctxt->inputTab[0]));
1728  if (ctxt->inputTab == NULL) {
1729  xmlErrMemory(ctxt, NULL);
1731  ctxt->inputMax /= 2;
1732  value = NULL;
1733  return (-1);
1734  }
1735  }
1736  ctxt->inputTab[ctxt->inputNr] = value;
1737  ctxt->input = value;
1738  return (ctxt->inputNr++);
1739 }
1750 {
1752 
1753  if (ctxt == NULL)
1754  return(NULL);
1755  if (ctxt->inputNr <= 0)
1756  return (NULL);
1757  ctxt->inputNr--;
1758  if (ctxt->inputNr > 0)
1759  ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1760  else
1761  ctxt->input = NULL;
1762  ret = ctxt->inputTab[ctxt->inputNr];
1763  ctxt->inputTab[ctxt->inputNr] = NULL;
1764  return (ret);
1765 }
1775 int
1777 {
1778  if (ctxt == NULL) return(0);
1779  if (ctxt->nodeNr >= ctxt->nodeMax) {
1780  xmlNodePtr *tmp;
1781 
1782  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1783  ctxt->nodeMax * 2 *
1784  sizeof(ctxt->nodeTab[0]));
1785  if (tmp == NULL) {
1786  xmlErrMemory(ctxt, NULL);
1787  return (-1);
1788  }
1789  ctxt->nodeTab = tmp;
1790  ctxt->nodeMax *= 2;
1791  }
1792  if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1793  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1794  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1795  "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1797  xmlHaltParser(ctxt);
1798  return(-1);
1799  }
1800  ctxt->nodeTab[ctxt->nodeNr] = value;
1801  ctxt->node = value;
1802  return (ctxt->nodeNr++);
1803 }
1804 
1813 xmlNodePtr
1815 {
1816  xmlNodePtr ret;
1817 
1818  if (ctxt == NULL) return(NULL);
1819  if (ctxt->nodeNr <= 0)
1820  return (NULL);
1821  ctxt->nodeNr--;
1822  if (ctxt->nodeNr > 0)
1823  ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1824  else
1825  ctxt->node = NULL;
1826  ret = ctxt->nodeTab[ctxt->nodeNr];
1827  ctxt->nodeTab[ctxt->nodeNr] = NULL;
1828  return (ret);
1829 }
1830 
1842 static int
1844  const xmlChar *prefix, const xmlChar *URI, int nsNr)
1845 {
1846  if (ctxt->nameNr >= ctxt->nameMax) {
1847  const xmlChar * *tmp;
1848  void **tmp2;
1849  ctxt->nameMax *= 2;
1850  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1851  ctxt->nameMax *
1852  sizeof(ctxt->nameTab[0]));
1853  if (tmp == NULL) {
1854  ctxt->nameMax /= 2;
1855  goto mem_error;
1856  }
1857  ctxt->nameTab = tmp;
1858  tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1859  ctxt->nameMax * 3 *
1860  sizeof(ctxt->pushTab[0]));
1861  if (tmp2 == NULL) {
1862  ctxt->nameMax /= 2;
1863  goto mem_error;
1864  }
1865  ctxt->pushTab = tmp2;
1866  } else if (ctxt->pushTab == NULL) {
1867  ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
1868  sizeof(ctxt->pushTab[0]));
1869  if (ctxt->pushTab == NULL)
1870  goto mem_error;
1871  }
1872  ctxt->nameTab[ctxt->nameNr] = value;
1873  ctxt->name = value;
1874  ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1875  ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1876  ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1877  return (ctxt->nameNr++);
1878 mem_error:
1879  xmlErrMemory(ctxt, NULL);
1880  return (-1);
1881 }
1882 #ifdef LIBXML_PUSH_ENABLED
1883 
1891 static const xmlChar *
1892 nameNsPop(xmlParserCtxtPtr ctxt)
1893 {
1894  const xmlChar *ret;
1895 
1896  if (ctxt->nameNr <= 0)
1897  return (NULL);
1898  ctxt->nameNr--;
1899  if (ctxt->nameNr > 0)
1900  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1901  else
1902  ctxt->name = NULL;
1903  ret = ctxt->nameTab[ctxt->nameNr];
1904  ctxt->nameTab[ctxt->nameNr] = NULL;
1905  return (ret);
1906 }
1907 #endif /* LIBXML_PUSH_ENABLED */
1908 
1918 int
1920 {
1921  if (ctxt == NULL) return (-1);
1922 
1923  if (ctxt->nameNr >= ctxt->nameMax) {
1924  const xmlChar * *tmp;
1925  tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1926  ctxt->nameMax * 2 *
1927  sizeof(ctxt->nameTab[0]));
1928  if (tmp == NULL) {
1929  goto mem_error;
1930  }
1931  ctxt->nameTab = tmp;
1932  ctxt->nameMax *= 2;
1933  }
1934  ctxt->nameTab[ctxt->nameNr] = value;
1935  ctxt->name = value;
1936  return (ctxt->nameNr++);
1937 mem_error:
1938  xmlErrMemory(ctxt, NULL);
1939  return (-1);
1940 }
1949 const xmlChar *
1951 {
1952  const xmlChar *ret;
1953 
1954  if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1955  return (NULL);
1956  ctxt->nameNr--;
1957  if (ctxt->nameNr > 0)
1958  ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1959  else
1960  ctxt->name = NULL;
1961  ret = ctxt->nameTab[ctxt->nameNr];
1962  ctxt->nameTab[ctxt->nameNr] = NULL;
1963  return (ret);
1964 }
1965 
1966 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1967  if (ctxt->spaceNr >= ctxt->spaceMax) {
1968  int *tmp;
1969 
1970  ctxt->spaceMax *= 2;
1971  tmp = (int *) xmlRealloc(ctxt->spaceTab,
1972  ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1973  if (tmp == NULL) {
1974  xmlErrMemory(ctxt, NULL);
1975  ctxt->spaceMax /=2;
1976  return(-1);
1977  }
1978  ctxt->spaceTab = tmp;
1979  }
1980  ctxt->spaceTab[ctxt->spaceNr] = val;
1981  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1982  return(ctxt->spaceNr++);
1983 }
1984 
1985 static int spacePop(xmlParserCtxtPtr ctxt) {
1986  int ret;
1987  if (ctxt->spaceNr <= 0) return(0);
1988  ctxt->spaceNr--;
1989  if (ctxt->spaceNr > 0)
1990  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1991  else
1992  ctxt->space = &ctxt->spaceTab[0];
1993  ret = ctxt->spaceTab[ctxt->spaceNr];
1994  ctxt->spaceTab[ctxt->spaceNr] = -1;
1995  return(ret);
1996 }
1997 
1998 /*
1999  * Macros for accessing the content. Those should be used only by the parser,
2000  * and not exported.
2001  *
2002  * Dirty macros, i.e. one often need to make assumption on the context to
2003  * use them
2004  *
2005  * CUR_PTR return the current pointer to the xmlChar to be parsed.
2006  * To be used with extreme caution since operations consuming
2007  * characters may move the input buffer to a different location !
2008  * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2009  * This should be used internally by the parser
2010  * only to compare to ASCII values otherwise it would break when
2011  * running with UTF-8 encoding.
2012  * RAW same as CUR but in the input buffer, bypass any token
2013  * extraction that may have been done
2014  * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2015  * to compare on ASCII based substring.
2016  * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2017  * strings without newlines within the parser.
2018  * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2019  * defined char within the parser.
2020  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021  *
2022  * NEXT Skip to the next character, this does the proper decoding
2023  * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2024  * NEXTL(l) Skip the current unicode character of l xmlChars long.
2025  * CUR_CHAR(l) returns the current unicode character (int), set l
2026  * to the number of xmlChars used for the encoding [0-5].
2027  * CUR_SCHAR same but operate on a string instead of the context
2028  * COPY_BUF copy the current unicode char to the target buffer, increment
2029  * the index
2030  * GROW, SHRINK handling of input buffers
2031  */
2032 
2033 #define RAW (*ctxt->input->cur)
2034 #define CUR (*ctxt->input->cur)
2035 #define NXT(val) ctxt->input->cur[(val)]
2036 #define CUR_PTR ctxt->input->cur
2037 #define BASE_PTR ctxt->input->base
2038 
2039 #define CMP4( s, c1, c2, c3, c4 ) \
2040  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2041  ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2042 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2043  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2044 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2045  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2046 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2047  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2048 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2049  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2050 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2051  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2052  ((unsigned char *) s)[ 8 ] == c9 )
2053 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2054  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2055  ((unsigned char *) s)[ 9 ] == c10 )
2056 
2057 #define SKIP(val) do { \
2058  ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2059  if (*ctxt->input->cur == 0) \
2060  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2061  } while (0)
2062 
2063 #define SKIPL(val) do { \
2064  int skipl; \
2065  for(skipl=0; skipl<val; skipl++) { \
2066  if (*(ctxt->input->cur) == '\n') { \
2067  ctxt->input->line++; ctxt->input->col = 1; \
2068  } else ctxt->input->col++; \
2069  ctxt->nbChars++; \
2070  ctxt->input->cur++; \
2071  } \
2072  if (*ctxt->input->cur == 0) \
2073  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2074  } while (0)
2075 
2076 #define SHRINK if ((ctxt->progressive == 0) && \
2077  (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2078  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2079  xmlSHRINK (ctxt);
2080 
2081 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2082  xmlParserInputShrink(ctxt->input);
2083  if (*ctxt->input->cur == 0)
2085 }
2086 
2087 #define GROW if ((ctxt->progressive == 0) && \
2088  (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2089  xmlGROW (ctxt);
2090 
2091 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2092  ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2093  ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2094 
2095  if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2096  (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2097  ((ctxt->input->buf) &&
2099  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2100  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2101  xmlHaltParser(ctxt);
2102  return;
2103  }
2105  if ((ctxt->input->cur > ctxt->input->end) ||
2106  (ctxt->input->cur < ctxt->input->base)) {
2107  xmlHaltParser(ctxt);
2108  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2109  return;
2110  }
2111  if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2113 }
2114 
2115 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2116 
2117 #define NEXT xmlNextChar(ctxt)
2118 
2119 #define NEXT1 { \
2120  ctxt->input->col++; \
2121  ctxt->input->cur++; \
2122  ctxt->nbChars++; \
2123  if (*ctxt->input->cur == 0) \
2124  xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2125  }
2126 
2127 #define NEXTL(l) do { \
2128  if (*(ctxt->input->cur) == '\n') { \
2129  ctxt->input->line++; ctxt->input->col = 1; \
2130  } else ctxt->input->col++; \
2131  ctxt->input->cur += l; \
2132  } while (0)
2133 
2134 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2135 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2136 
2137 #define COPY_BUF(l,b,i,v) \
2138  if (l == 1) b[i++] = (xmlChar) v; \
2139  else i += xmlCopyCharMultiByte(&b[i],v)
2140 
2151 int
2153  int res = 0;
2154 
2155  /*
2156  * It's Okay to use CUR/NEXT here since all the blanks are on
2157  * the ASCII range.
2158  */
2159  if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2160  const xmlChar *cur;
2161  /*
2162  * if we are in the document content, go really fast
2163  */
2164  cur = ctxt->input->cur;
2165  while (IS_BLANK_CH(*cur)) {
2166  if (*cur == '\n') {
2167  ctxt->input->line++; ctxt->input->col = 1;
2168  } else {
2169  ctxt->input->col++;
2170  }
2171  cur++;
2172  res++;
2173  if (*cur == 0) {
2174  ctxt->input->cur = cur;
2176  cur = ctxt->input->cur;
2177  }
2178  }
2179  ctxt->input->cur = cur;
2180  } else {
2181  int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2182 
2183  while (1) {
2184  if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2185  NEXT;
2186  } else if (CUR == '%') {
2187  /*
2188  * Need to handle support of entities branching here
2189  */
2190  if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2191  break;
2192  xmlParsePEReference(ctxt);
2193  } else if (CUR == 0) {
2194  if (ctxt->inputNr <= 1)
2195  break;
2196  xmlPopInput(ctxt);
2197  } else {
2198  break;
2199  }
2200 
2201  /*
2202  * Also increase the counter when entering or exiting a PERef.
2203  * The spec says: "When a parameter-entity reference is recognized
2204  * in the DTD and included, its replacement text MUST be enlarged
2205  * by the attachment of one leading and one following space (#x20)
2206  * character."
2207  */
2208  res++;
2209  }
2210  }
2211  return(res);
2212 }
2213 
2214 /************************************************************************
2215  * *
2216  * Commodity functions to handle entities *
2217  * *
2218  ************************************************************************/
2219 
2229 xmlChar
2231  if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2234  "Popping input %d\n", ctxt->inputNr);
2235  if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2236  (ctxt->instate != XML_PARSER_EOF))
2238  "Unfinished entity outside the DTD");
2240  if (*ctxt->input->cur == 0)
2242  return(CUR);
2243 }
2244 
2254 int
2256  int ret;
2257  if (input == NULL) return(-1);
2258 
2259  if (xmlParserDebugEntities) {
2260  if ((ctxt->input != NULL) && (ctxt->input->filename))
2262  "%s(%d): ", ctxt->input->filename,
2263  ctxt->input->line);
2265  "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2266  }
2267  if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2268  (ctxt->inputNr > 1024)) {
2270  while (ctxt->inputNr > 1)
2272  return(-1);
2273  }
2274  ret = inputPush(ctxt, input);
2275  if (ctxt->instate == XML_PARSER_EOF)
2276  return(-1);
2277  GROW;
2278  return(ret);
2279 }
2280 
2296 int
2298  int val = 0;
2299  int count = 0;
2300 
2301  /*
2302  * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2303  */
2304  if ((RAW == '&') && (NXT(1) == '#') &&
2305  (NXT(2) == 'x')) {
2306  SKIP(3);
2307  GROW;
2308  while (RAW != ';') { /* loop blocked by count */
2309  if (count++ > 20) {
2310  count = 0;
2311  GROW;
2312  if (ctxt->instate == XML_PARSER_EOF)
2313  return(0);
2314  }
2315  if ((RAW >= '0') && (RAW <= '9'))
2316  val = val * 16 + (CUR - '0');
2317  else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2318  val = val * 16 + (CUR - 'a') + 10;
2319  else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2320  val = val * 16 + (CUR - 'A') + 10;
2321  else {
2323  val = 0;
2324  break;
2325  }
2326  if (val > 0x110000)
2327  val = 0x110000;
2328 
2329  NEXT;
2330  count++;
2331  }
2332  if (RAW == ';') {
2333  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2334  ctxt->input->col++;
2335  ctxt->nbChars ++;
2336  ctxt->input->cur++;
2337  }
2338  } else if ((RAW == '&') && (NXT(1) == '#')) {
2339  SKIP(2);
2340  GROW;
2341  while (RAW != ';') { /* loop blocked by count */
2342  if (count++ > 20) {
2343  count = 0;
2344  GROW;
2345  if (ctxt->instate == XML_PARSER_EOF)
2346  return(0);
2347  }
2348  if ((RAW >= '0') && (RAW <= '9'))
2349  val = val * 10 + (CUR - '0');
2350  else {
2352  val = 0;
2353  break;
2354  }
2355  if (val > 0x110000)
2356  val = 0x110000;
2357 
2358  NEXT;
2359  count++;
2360  }
2361  if (RAW == ';') {
2362  /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363  ctxt->input->col++;
2364  ctxt->nbChars ++;
2365  ctxt->input->cur++;
2366  }
2367  } else {
2369  }
2370 
2371  /*
2372  * [ WFC: Legal Character ]
2373  * Characters referred to using character references must match the
2374  * production for Char.
2375  */
2376  if (val >= 0x110000) {
2377  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2378  "xmlParseCharRef: character reference out of bounds\n",
2379  val);
2380  } else if (IS_CHAR(val)) {
2381  return(val);
2382  } else {
2383  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2384  "xmlParseCharRef: invalid xmlChar value %d\n",
2385  val);
2386  }
2387  return(0);
2388 }
2389 
2408 static int
2410  const xmlChar *ptr;
2411  xmlChar cur;
2412  int val = 0;
2413 
2414  if ((str == NULL) || (*str == NULL)) return(0);
2415  ptr = *str;
2416  cur = *ptr;
2417  if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2418  ptr += 3;
2419  cur = *ptr;
2420  while (cur != ';') { /* Non input consuming loop */
2421  if ((cur >= '0') && (cur <= '9'))
2422  val = val * 16 + (cur - '0');
2423  else if ((cur >= 'a') && (cur <= 'f'))
2424  val = val * 16 + (cur - 'a') + 10;
2425  else if ((cur >= 'A') && (cur <= 'F'))
2426  val = val * 16 + (cur - 'A') + 10;
2427  else {
2429  val = 0;
2430  break;
2431  }
2432  if (val > 0x110000)
2433  val = 0x110000;
2434 
2435  ptr++;
2436  cur = *ptr;
2437  }
2438  if (cur == ';')
2439  ptr++;
2440  } else if ((cur == '&') && (ptr[1] == '#')){
2441  ptr += 2;
2442  cur = *ptr;
2443  while (cur != ';') { /* Non input consuming loops */
2444  if ((cur >= '0') && (cur <= '9'))
2445  val = val * 10 + (cur - '0');
2446  else {
2448  val = 0;
2449  break;
2450  }
2451  if (val > 0x110000)
2452  val = 0x110000;
2453 
2454  ptr++;
2455  cur = *ptr;
2456  }
2457  if (cur == ';')
2458  ptr++;
2459  } else {
2461  return(0);
2462  }
2463  *str = ptr;
2464 
2465  /*
2466  * [ WFC: Legal Character ]
2467  * Characters referred to using character references must match the
2468  * production for Char.
2469  */
2470  if (val >= 0x110000) {
2471  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2472  "xmlParseStringCharRef: character reference out of bounds\n",
2473  val);
2474  } else if (IS_CHAR(val)) {
2475  return(val);
2476  } else {
2477  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2478  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2479  val);
2480  }
2481  return(0);
2482 }
2483 
2516 void
2518  switch(ctxt->instate) {
2520  return;
2521  case XML_PARSER_COMMENT:
2522  return;
2523  case XML_PARSER_START_TAG:
2524  return;
2525  case XML_PARSER_END_TAG:
2526  return;
2527  case XML_PARSER_EOF:
2529  return;
2530  case XML_PARSER_PROLOG:
2531  case XML_PARSER_START:
2532  case XML_PARSER_MISC:
2534  return;
2536  case XML_PARSER_CONTENT:
2538  case XML_PARSER_PI:
2541  /* we just ignore it there */
2542  return;
2543  case XML_PARSER_EPILOG:
2545  return;
2547  /*
2548  * NOTE: in the case of entity values, we don't do the
2549  * substitution here since we need the literal
2550  * entity value to be able to save the internal
2551  * subset of the document.
2552  * This will be handled by xmlStringDecodeEntities
2553  */
2554  return;
2555  case XML_PARSER_DTD:
2556  /*
2557  * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2558  * In the internal DTD subset, parameter-entity references
2559  * can occur only where markup declarations can occur, not
2560  * within markup declarations.
2561  * In that case this is handled in xmlParseMarkupDecl
2562  */
2563  if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2564  return;
2565  if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2566  return;
2567  break;
2568  case XML_PARSER_IGNORE:
2569  return;
2570  }
2571 
2572  xmlParsePEReference(ctxt);
2573 }
2574 
2575 /*
2576  * Macro used to grow the current buffer.
2577  * buffer##_size is expected to be a size_t
2578  * mem_error: is expected to handle memory allocation failures
2579  */
2580 #define growBuffer(buffer, n) { \
2581  xmlChar *tmp; \
2582  size_t new_size = buffer##_size * 2 + n; \
2583  if (new_size < buffer##_size) goto mem_error; \
2584  tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2585  if (tmp == NULL) goto mem_error; \
2586  buffer = tmp; \
2587  buffer##_size = new_size; \
2588 }
2589 
2609 xmlChar *
2611  int what, xmlChar end, xmlChar end2, xmlChar end3) {
2612  xmlChar *buffer = NULL;
2613  size_t buffer_size = 0;
2614  size_t nbchars = 0;
2615 
2616  xmlChar *current = NULL;
2617  xmlChar *rep = NULL;
2618  const xmlChar *last;
2619  xmlEntityPtr ent;
2620  int c,l;
2621 
2622  if ((ctxt == NULL) || (str == NULL) || (len < 0))
2623  return(NULL);
2624  last = str + len;
2625 
2626  if (((ctxt->depth > 40) &&
2627  ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2628  (ctxt->depth > 1024)) {
2630  return(NULL);
2631  }
2632 
2633  /*
2634  * allocate a translation buffer.
2635  */
2638  if (buffer == NULL) goto mem_error;
2639 
2640  /*
2641  * OK loop until we reach one of the ending char or a size limit.
2642  * we are operating on already parsed values.
2643  */
2644  if (str < last)
2645  c = CUR_SCHAR(str, l);
2646  else
2647  c = 0;
2648  while ((c != 0) && (c != end) && /* non input consuming loop */
2649  (c != end2) && (c != end3)) {
2650 
2651  if (c == 0) break;
2652  if ((c == '&') && (str[1] == '#')) {
2653  int val = xmlParseStringCharRef(ctxt, &str);
2654  if (val == 0)
2655  goto int_error;
2656  COPY_BUF(0,buffer,nbchars,val);
2657  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2659  }
2660  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2663  "String decoding Entity Reference: %.30s\n",
2664  str);
2665  ent = xmlParseStringEntityRef(ctxt, &str);
2666  xmlParserEntityCheck(ctxt, 0, ent, 0);
2667  if (ent != NULL)
2668  ctxt->nbentities += ent->checked / 2;
2669  if ((ent != NULL) &&
2671  if (ent->content != NULL) {
2672  COPY_BUF(0,buffer,nbchars,ent->content[0]);
2673  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2675  }
2676  } else {
2677  xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2678  "predefined entity has no content\n");
2679  goto int_error;
2680  }
2681  } else if ((ent != NULL) && (ent->content != NULL)) {
2682  ctxt->depth++;
2683  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2684  0, 0, 0);
2685  ctxt->depth--;
2686  if (rep == NULL)
2687  goto int_error;
2688 
2689  current = rep;
2690  while (*current != 0) { /* non input consuming loop */
2691  buffer[nbchars++] = *current++;
2692  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2693  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2694  goto int_error;
2696  }
2697  }
2698  xmlFree(rep);
2699  rep = NULL;
2700  } else if (ent != NULL) {
2701  int i = xmlStrlen(ent->name);
2702  const xmlChar *cur = ent->name;
2703 
2704  buffer[nbchars++] = '&';
2705  if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2707  }
2708  for (;i > 0;i--)
2709  buffer[nbchars++] = *cur++;
2710  buffer[nbchars++] = ';';
2711  }
2712  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2715  "String decoding PE Reference: %.30s\n", str);
2716  ent = xmlParseStringPEReference(ctxt, &str);
2717  xmlParserEntityCheck(ctxt, 0, ent, 0);
2718  if (ent != NULL)
2719  ctxt->nbentities += ent->checked / 2;
2720  if (ent != NULL) {
2721  if (ent->content == NULL) {
2722  /*
2723  * Note: external parsed entities will not be loaded,
2724  * it is not required for a non-validating parser to
2725  * complete external PEReferences coming from the
2726  * internal subset
2727  */
2728  if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2729  ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2730  (ctxt->validate != 0)) {
2731  xmlLoadEntityContent(ctxt, ent);
2732  } else {
2733  xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2734  "not validating will not read content for PE entity %s\n",
2735  ent->name, NULL);
2736  }
2737  }
2738  ctxt->depth++;
2739  rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2740  0, 0, 0);
2741  ctxt->depth--;
2742  if (rep == NULL)
2743  goto int_error;
2744  current = rep;
2745  while (*current != 0) { /* non input consuming loop */
2746  buffer[nbchars++] = *current++;
2747  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2748  if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2749  goto int_error;
2751  }
2752  }
2753  xmlFree(rep);
2754  rep = NULL;
2755  }
2756  } else {
2757  COPY_BUF(l,buffer,nbchars,c);
2758  str += l;
2759  if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2761  }
2762  }
2763  if (str < last)
2764  c = CUR_SCHAR(str, l);
2765  else
2766  c = 0;
2767  }
2768  buffer[nbchars] = 0;
2769  return(buffer);
2770 
2771 mem_error:
2772  xmlErrMemory(ctxt, NULL);
2773 int_error:
2774  if (rep != NULL)
2775  xmlFree(rep);
2776  if (buffer != NULL)
2777  xmlFree(buffer);
2778  return(NULL);
2779 }
2780 
2799 xmlChar *
2801  xmlChar end, xmlChar end2, xmlChar end3) {
2802  if ((ctxt == NULL) || (str == NULL)) return(NULL);
2803  return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2804  end, end2, end3));
2805 }
2806 
2807 /************************************************************************
2808  * *
2809  * Commodity functions, cleanup needed ? *
2810  * *
2811  ************************************************************************/
2812 
2825 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2826  int blank_chars) {
2827  int i, ret;
2828  xmlNodePtr lastChild;
2829 
2830  /*
2831  * Don't spend time trying to differentiate them, the same callback is
2832  * used !
2833  */
2834  if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2835  return(0);
2836 
2837  /*
2838  * Check for xml:space value.
2839  */
2840  if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2841  (*(ctxt->space) == -2))
2842  return(0);
2843 
2844  /*
2845  * Check that the string is made of blanks
2846  */
2847  if (blank_chars == 0) {
2848  for (i = 0;i < len;i++)
2849  if (!(IS_BLANK_CH(str[i]))) return(0);
2850  }
2851 
2852  /*
2853  * Look if the element is mixed content in the DTD if available
2854  */
2855  if (ctxt->node == NULL) return(0);
2856  if (ctxt->myDoc != NULL) {
2857  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2858  if (ret == 0) return(1);
2859  if (ret == 1) return(0);
2860  }
2861 
2862  /*
2863  * Otherwise, heuristic :-\
2864  */
2865  if ((RAW != '<') && (RAW != 0xD)) return(0);
2866  if ((ctxt->node->children == NULL) &&
2867  (RAW == '<') && (NXT(1) == '/')) return(0);
2868 
2869  lastChild = xmlGetLastChild(ctxt->node);
2870  if (lastChild == NULL) {
2871  if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2872  (ctxt->node->content != NULL)) return(0);
2873  } else if (xmlNodeIsText(lastChild))
2874  return(0);
2875  else if ((ctxt->node->children != NULL) &&
2876  (xmlNodeIsText(ctxt->node->children)))
2877  return(0);
2878  return(1);
2879 }
2880 
2881 /************************************************************************
2882  * *
2883  * Extra stuff for namespace support *
2884  * Relates to http://www.w3.org/TR/WD-xml-names *
2885  * *
2886  ************************************************************************/
2887 
2906 xmlChar *
2909  xmlChar *buffer = NULL;
2910  int len = 0;
2911  int max = XML_MAX_NAMELEN;
2912  xmlChar *ret = NULL;
2913  const xmlChar *cur = name;
2914  int c;
2915 
2916  if (prefix == NULL) return(NULL);
2917  *prefix = NULL;
2918 
2919  if (cur == NULL) return(NULL);
2920 
2921 #ifndef XML_XML_NAMESPACE
2922  /* xml: prefix is not really a namespace */
2923  if ((cur[0] == 'x') && (cur[1] == 'm') &&
2924  (cur[2] == 'l') && (cur[3] == ':'))
2925  return(xmlStrdup(name));
2926 #endif
2927 
2928  /* nasty but well=formed */
2929  if (cur[0] == ':')
2930  return(xmlStrdup(name));
2931 
2932  c = *cur++;
2933  while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2934  buf[len++] = c;
2935  c = *cur++;
2936  }
2937  if (len >= max) {
2938  /*
2939  * Okay someone managed to make a huge name, so he's ready to pay
2940  * for the processing speed.
2941  */
2942  max = len * 2;
2943 
2944  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2945  if (buffer == NULL) {
2946  xmlErrMemory(ctxt, NULL);
2947  return(NULL);
2948  }
2949  memcpy(buffer, buf, len);
2950  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2951  if (len + 10 > max) {
2952  xmlChar *tmp;
2953 
2954  max *= 2;
2955  tmp = (xmlChar *) xmlRealloc(buffer,
2956  max * sizeof(xmlChar));
2957  if (tmp == NULL) {
2958  xmlFree(buffer);
2959  xmlErrMemory(ctxt, NULL);
2960  return(NULL);
2961  }
2962  buffer = tmp;
2963  }
2964  buffer[len++] = c;
2965  c = *cur++;
2966  }
2967  buffer[len] = 0;
2968  }
2969 
2970  if ((c == ':') && (*cur == 0)) {
2971  if (buffer != NULL)
2972  xmlFree(buffer);
2973  *prefix = NULL;
2974  return(xmlStrdup(name));
2975  }
2976 
2977  if (buffer == NULL)
2978  ret = xmlStrndup(buf, len);
2979  else {
2980  ret = buffer;
2981  buffer = NULL;
2982  max = XML_MAX_NAMELEN;
2983  }
2984 
2985 
2986  if (c == ':') {
2987  c = *cur;
2988  *prefix = ret;
2989  if (c == 0) {
2990  return(xmlStrndup(BAD_CAST "", 0));
2991  }
2992  len = 0;
2993 
2994  /*
2995  * Check that the first character is proper to start
2996  * a new name
2997  */
2998  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2999  ((c >= 0x41) && (c <= 0x5A)) ||
3000  (c == '_') || (c == ':'))) {
3001  int l;
3002  int first = CUR_SCHAR(cur, l);
3003 
3004  if (!IS_LETTER(first) && (first != '_')) {
3005  xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3006  "Name %s is not XML Namespace compliant\n",
3007  name);
3008  }
3009  }
3010  cur++;
3011 
3012  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3013  buf[len++] = c;
3014  c = *cur++;
3015  }
3016  if (len >= max) {
3017  /*
3018  * Okay someone managed to make a huge name, so he's ready to pay
3019  * for the processing speed.
3020  */
3021  max = len * 2;
3022 
3023  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3024  if (buffer == NULL) {
3025  xmlErrMemory(ctxt, NULL);
3026  return(NULL);
3027  }
3028  memcpy(buffer, buf, len);
3029  while (c != 0) { /* tested bigname2.xml */
3030  if (len + 10 > max) {
3031  xmlChar *tmp;
3032 
3033  max *= 2;
3034  tmp = (xmlChar *) xmlRealloc(buffer,
3035  max * sizeof(xmlChar));
3036  if (tmp == NULL) {
3037  xmlErrMemory(ctxt, NULL);
3038  xmlFree(buffer);
3039  return(NULL);
3040  }
3041  buffer = tmp;
3042  }
3043  buffer[len++] = c;
3044  c = *cur++;
3045  }
3046  buffer[len] = 0;
3047  }
3048 
3049  if (buffer == NULL)
3050  ret = xmlStrndup(buf, len);
3051  else {
3052  ret = buffer;
3053  }
3054  }
3055 
3056  return(ret);
3057 }
3058 
3059 /************************************************************************
3060  * *
3061  * The parser itself *
3062  * Relates to http://www.w3.org/TR/REC-xml *
3063  * *
3064  ************************************************************************/
3065 
3066 /************************************************************************
3067  * *
3068  * Routines to parse Name, NCName and NmToken *
3069  * *
3070  ************************************************************************/
3071 #ifdef DEBUG
3072 static unsigned long nbParseName = 0;
3073 static unsigned long nbParseNmToken = 0;
3074 static unsigned long nbParseNCName = 0;
3075 static unsigned long nbParseNCNameComplex = 0;
3076 static unsigned long nbParseNameComplex = 0;
3077 static unsigned long nbParseStringName = 0;
3078 #endif
3079 
3080 /*
3081  * The two following functions are related to the change of accepted
3082  * characters for Name and NmToken in the Revision 5 of XML-1.0
3083  * They correspond to the modified production [4] and the new production [4a]
3084  * changes in that revision. Also note that the macros used for the
3085  * productions Letter, Digit, CombiningChar and Extender are not needed
3086  * anymore.
3087  * We still keep compatibility to pre-revision5 parsing semantic if the
3088  * new XML_PARSE_OLD10 option is given to the parser.
3089  */
3090 static int
3092  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3093  /*
3094  * Use the new checks of production [4] [4a] amd [5] of the
3095  * Update 5 of XML-1.0
3096  */
3097  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3098  (((c >= 'a') && (c <= 'z')) ||
3099  ((c >= 'A') && (c <= 'Z')) ||
3100  (c == '_') || (c == ':') ||
3101  ((c >= 0xC0) && (c <= 0xD6)) ||
3102  ((c >= 0xD8) && (c <= 0xF6)) ||
3103  ((c >= 0xF8) && (c <= 0x2FF)) ||
3104  ((c >= 0x370) && (c <= 0x37D)) ||
3105  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3106  ((c >= 0x200C) && (c <= 0x200D)) ||
3107  ((c >= 0x2070) && (c <= 0x218F)) ||
3108  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3109  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3110  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3111  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3112  ((c >= 0x10000) && (c <= 0xEFFFF))))
3113  return(1);
3114  } else {
3115  if (IS_LETTER(c) || (c == '_') || (c == ':'))
3116  return(1);
3117  }
3118  return(0);
3119 }
3120 
3121 static int
3123  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3124  /*
3125  * Use the new checks of production [4] [4a] amd [5] of the
3126  * Update 5 of XML-1.0
3127  */
3128  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3129  (((c >= 'a') && (c <= 'z')) ||
3130  ((c >= 'A') && (c <= 'Z')) ||
3131  ((c >= '0') && (c <= '9')) || /* !start */
3132  (c == '_') || (c == ':') ||
3133  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3134  ((c >= 0xC0) && (c <= 0xD6)) ||
3135  ((c >= 0xD8) && (c <= 0xF6)) ||
3136  ((c >= 0xF8) && (c <= 0x2FF)) ||
3137  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3138  ((c >= 0x370) && (c <= 0x37D)) ||
3139  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3140  ((c >= 0x200C) && (c <= 0x200D)) ||
3141  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3142  ((c >= 0x2070) && (c <= 0x218F)) ||
3143  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3144  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3145  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3146  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3147  ((c >= 0x10000) && (c <= 0xEFFFF))))
3148  return(1);
3149  } else {
3150  if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3151  (c == '.') || (c == '-') ||
3152  (c == '_') || (c == ':') ||
3153  (IS_COMBINING(c)) ||
3154  (IS_EXTENDER(c)))
3155  return(1);
3156  }
3157  return(0);
3158 }
3159 
3161  int *len, int *alloc, int normalize);
3162 
3163 static const xmlChar *
3165  int len = 0, l;
3166  int c;
3167  int count = 0;
3168 
3169 #ifdef DEBUG
3170  nbParseNameComplex++;
3171 #endif
3172 
3173  /*
3174  * Handler for more complex cases
3175  */
3176  GROW;
3177  if (ctxt->instate == XML_PARSER_EOF)
3178  return(NULL);
3179  c = CUR_CHAR(l);
3180  if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3181  /*
3182  * Use the new checks of production [4] [4a] amd [5] of the
3183  * Update 5 of XML-1.0
3184  */
3185  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3186  (!(((c >= 'a') && (c <= 'z')) ||
3187  ((c >= 'A') && (c <= 'Z')) ||
3188  (c == '_') || (c == ':') ||
3189  ((c >= 0xC0) && (c <= 0xD6)) ||
3190  ((c >= 0xD8) && (c <= 0xF6)) ||
3191  ((c >= 0xF8) && (c <= 0x2FF)) ||
3192  ((c >= 0x370) && (c <= 0x37D)) ||
3193  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3194  ((c >= 0x200C) && (c <= 0x200D)) ||
3195  ((c >= 0x2070) && (c <= 0x218F)) ||
3196  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3197  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3198  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3199  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3200  ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3201  return(NULL);
3202  }
3203  len += l;
3204  NEXTL(l);
3205  c = CUR_CHAR(l);
3206  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3207  (((c >= 'a') && (c <= 'z')) ||
3208  ((c >= 'A') && (c <= 'Z')) ||
3209  ((c >= '0') && (c <= '9')) || /* !start */
3210  (c == '_') || (c == ':') ||
3211  (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3212  ((c >= 0xC0) && (c <= 0xD6)) ||
3213  ((c >= 0xD8) && (c <= 0xF6)) ||
3214  ((c >= 0xF8) && (c <= 0x2FF)) ||
3215  ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3216  ((c >= 0x370) && (c <= 0x37D)) ||
3217  ((c >= 0x37F) && (c <= 0x1FFF)) ||
3218  ((c >= 0x200C) && (c <= 0x200D)) ||
3219  ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3220  ((c >= 0x2070) && (c <= 0x218F)) ||
3221  ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3222  ((c >= 0x3001) && (c <= 0xD7FF)) ||
3223  ((c >= 0xF900) && (c <= 0xFDCF)) ||
3224  ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3225  ((c >= 0x10000) && (c <= 0xEFFFF))
3226  )) {
3227  if (count++ > XML_PARSER_CHUNK_SIZE) {
3228  count = 0;
3229  GROW;
3230  if (ctxt->instate == XML_PARSER_EOF)
3231  return(NULL);
3232  }
3233  len += l;
3234  NEXTL(l);
3235  c = CUR_CHAR(l);
3236  }
3237  } else {
3238  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3239  (!IS_LETTER(c) && (c != '_') &&
3240  (c != ':'))) {
3241  return(NULL);
3242  }
3243  len += l;
3244  NEXTL(l);
3245  c = CUR_CHAR(l);
3246 
3247  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3248  ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3249  (c == '.') || (c == '-') ||
3250  (c == '_') || (c == ':') ||
3251  (IS_COMBINING(c)) ||
3252  (IS_EXTENDER(c)))) {
3253  if (count++ > XML_PARSER_CHUNK_SIZE) {
3254  count = 0;
3255  GROW;
3256  if (ctxt->instate == XML_PARSER_EOF)
3257  return(NULL);
3258  }
3259  len += l;
3260  NEXTL(l);
3261  c = CUR_CHAR(l);
3262  }
3263  }
3264  if ((len > XML_MAX_NAME_LENGTH) &&
3265  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3266  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3267  return(NULL);
3268  }
3269  if (ctxt->input->cur - ctxt->input->base < len) {
3270  /*
3271  * There were a couple of bugs where PERefs lead to to a change
3272  * of the buffer. Check the buffer size to avoid passing an invalid
3273  * pointer to xmlDictLookup.
3274  */
3276  "unexpected change of input buffer");
3277  return (NULL);
3278  }
3279  if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3280  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3281  return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3282 }
3283 
3300 const xmlChar *
3302  const xmlChar *in;
3303  const xmlChar *ret;
3304  int count = 0;
3305 
3306  GROW;
3307 
3308 #ifdef DEBUG
3309  nbParseName++;
3310 #endif
3311 
3312  /*
3313  * Accelerator for simple ASCII names
3314  */
3315  in = ctxt->input->cur;
3316  if (((*in >= 0x61) && (*in <= 0x7A)) ||
3317  ((*in >= 0x41) && (*in <= 0x5A)) ||
3318  (*in == '_') || (*in == ':')) {
3319  in++;
3320  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3321  ((*in >= 0x41) && (*in <= 0x5A)) ||
3322  ((*in >= 0x30) && (*in <= 0x39)) ||
3323  (*in == '_') || (*in == '-') ||
3324  (*in == ':') || (*in == '.'))
3325  in++;
3326  if ((*in > 0) && (*in < 0x80)) {
3327  count = in - ctxt->input->cur;
3328  if ((count > XML_MAX_NAME_LENGTH) &&
3329  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3330  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3331  return(NULL);
3332  }
3333  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3334  ctxt->input->cur = in;
3335  ctxt->nbChars += count;
3336  ctxt->input->col += count;
3337  if (ret == NULL)
3338  xmlErrMemory(ctxt, NULL);
3339  return(ret);
3340  }
3341  }
3342  /* accelerator for special cases */
3343  return(xmlParseNameComplex(ctxt));
3344 }
3345 
3346 static const xmlChar *
3348  int len = 0, l;
3349  int c;
3350  int count = 0;
3351  size_t startPosition = 0;
3352 
3353 #ifdef DEBUG
3354  nbParseNCNameComplex++;
3355 #endif
3356 
3357  /*
3358  * Handler for more complex cases
3359  */
3360  GROW;
3361  startPosition = CUR_PTR - BASE_PTR;
3362  c = CUR_CHAR(l);
3363  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3364  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3365  return(NULL);
3366  }
3367 
3368  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3369  (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3370  if (count++ > XML_PARSER_CHUNK_SIZE) {
3371  if ((len > XML_MAX_NAME_LENGTH) &&
3372  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3373  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3374  return(NULL);
3375  }
3376  count = 0;
3377  GROW;
3378  if (ctxt->instate == XML_PARSER_EOF)
3379  return(NULL);
3380  }
3381  len += l;
3382  NEXTL(l);
3383  c = CUR_CHAR(l);
3384  if (c == 0) {
3385  count = 0;
3386  /*
3387  * when shrinking to extend the buffer we really need to preserve
3388  * the part of the name we already parsed. Hence rolling back
3389  * by current length.
3390  */
3391  ctxt->input->cur -= l;
3392  GROW;
3393  if (ctxt->instate == XML_PARSER_EOF)
3394  return(NULL);
3395  ctxt->input->cur += l;
3396  c = CUR_CHAR(l);
3397  }
3398  }
3399  if ((len > XML_MAX_NAME_LENGTH) &&
3400  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3401  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3402  return(NULL);
3403  }
3404  return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3405 }
3406 
3422 static const xmlChar *
3424  const xmlChar *in, *e;
3425  const xmlChar *ret;
3426  int count = 0;
3427 
3428 #ifdef DEBUG
3429  nbParseNCName++;
3430 #endif
3431 
3432  /*
3433  * Accelerator for simple ASCII names
3434  */
3435  in = ctxt->input->cur;
3436  e = ctxt->input->end;
3437  if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3438  ((*in >= 0x41) && (*in <= 0x5A)) ||
3439  (*in == '_')) && (in < e)) {
3440  in++;
3441  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3442  ((*in >= 0x41) && (*in <= 0x5A)) ||
3443  ((*in >= 0x30) && (*in <= 0x39)) ||
3444  (*in == '_') || (*in == '-') ||
3445  (*in == '.')) && (in < e))
3446  in++;
3447  if (in >= e)
3448  goto complex;
3449  if ((*in > 0) && (*in < 0x80)) {
3450  count = in - ctxt->input->cur;
3451  if ((count > XML_MAX_NAME_LENGTH) &&
3452  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3453  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3454  return(NULL);
3455  }
3456  ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3457  ctxt->input->cur = in;
3458  ctxt->nbChars += count;
3459  ctxt->input->col += count;
3460  if (ret == NULL) {
3461  xmlErrMemory(ctxt, NULL);
3462  }
3463  return(ret);
3464  }
3465  }
3466 complex:
3467  return(xmlParseNCNameComplex(ctxt));
3468 }
3469 
3481 static const xmlChar *
3483  register const xmlChar *cmp = other;
3484  register const xmlChar *in;
3485  const xmlChar *ret;
3486 
3487  GROW;
3488  if (ctxt->instate == XML_PARSER_EOF)
3489  return(NULL);
3490 
3491  in = ctxt->input->cur;
3492  while (*in != 0 && *in == *cmp) {
3493  ++in;
3494  ++cmp;
3495  ctxt->input->col++;
3496  }
3497  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3498  /* success */
3499  ctxt->input->cur = in;
3500  return (const xmlChar*) 1;
3501  }
3502  /* failure (or end of input buffer), check with full function */
3503  ret = xmlParseName (ctxt);
3504  /* strings coming from the dictionary direct compare possible */
3505  if (ret == other) {
3506  return (const xmlChar*) 1;
3507  }
3508  return ret;
3509 }
3510 
3529 static xmlChar *
3532  const xmlChar *cur = *str;
3533  int len = 0, l;
3534  int c;
3535 
3536 #ifdef DEBUG
3537  nbParseStringName++;
3538 #endif
3539 
3540  c = CUR_SCHAR(cur, l);
3541  if (!xmlIsNameStartChar(ctxt, c)) {
3542  return(NULL);
3543  }
3544 
3545  COPY_BUF(l,buf,len,c);
3546  cur += l;
3547  c = CUR_SCHAR(cur, l);
3548  while (xmlIsNameChar(ctxt, c)) {
3549  COPY_BUF(l,buf,len,c);
3550  cur += l;
3551  c = CUR_SCHAR(cur, l);
3552  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3553  /*
3554  * Okay someone managed to make a huge name, so he's ready to pay
3555  * for the processing speed.
3556  */
3557  xmlChar *buffer;
3558  int max = len * 2;
3559 
3560  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3561  if (buffer == NULL) {
3562  xmlErrMemory(ctxt, NULL);
3563  return(NULL);
3564  }
3565  memcpy(buffer, buf, len);
3566  while (xmlIsNameChar(ctxt, c)) {
3567  if (len + 10 > max) {
3568  xmlChar *tmp;
3569 
3570  if ((len > XML_MAX_NAME_LENGTH) &&
3571  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3572  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3573  xmlFree(buffer);
3574  return(NULL);
3575  }
3576  max *= 2;
3577  tmp = (xmlChar *) xmlRealloc(buffer,
3578  max * sizeof(xmlChar));
3579  if (tmp == NULL) {
3580  xmlErrMemory(ctxt, NULL);
3581  xmlFree(buffer);
3582  return(NULL);
3583  }
3584  buffer = tmp;
3585  }
3586  COPY_BUF(l,buffer,len,c);
3587  cur += l;
3588  c = CUR_SCHAR(cur, l);
3589  }
3590  buffer[len] = 0;
3591  *str = cur;
3592  return(buffer);
3593  }
3594  }
3595  if ((len > XML_MAX_NAME_LENGTH) &&
3596  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3597  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3598  return(NULL);
3599  }
3600  *str = cur;
3601  return(xmlStrndup(buf, len));
3602 }
3603 
3617 xmlChar *
3620  int len = 0, l;
3621  int c;
3622  int count = 0;
3623 
3624 #ifdef DEBUG
3625  nbParseNmToken++;
3626 #endif
3627 
3628  GROW;
3629  if (ctxt->instate == XML_PARSER_EOF)
3630  return(NULL);
3631  c = CUR_CHAR(l);
3632 
3633  while (xmlIsNameChar(ctxt, c)) {
3634  if (count++ > XML_PARSER_CHUNK_SIZE) {
3635  count = 0;
3636  GROW;
3637  }
3638  COPY_BUF(l,buf,len,c);
3639  NEXTL(l);
3640  c = CUR_CHAR(l);
3641  if (c == 0) {
3642  count = 0;
3643  GROW;
3644  if (ctxt->instate == XML_PARSER_EOF)
3645  return(NULL);
3646  c = CUR_CHAR(l);
3647  }
3648  if (len >= XML_MAX_NAMELEN) {
3649  /*
3650  * Okay someone managed to make a huge token, so he's ready to pay
3651  * for the processing speed.
3652  */
3653  xmlChar *buffer;
3654  int max = len * 2;
3655 
3656  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3657  if (buffer == NULL) {
3658  xmlErrMemory(ctxt, NULL);
3659  return(NULL);
3660  }
3661  memcpy(buffer, buf, len);
3662  while (xmlIsNameChar(ctxt, c)) {
3663  if (count++ > XML_PARSER_CHUNK_SIZE) {
3664  count = 0;
3665  GROW;
3666  if (ctxt->instate == XML_PARSER_EOF) {
3667  xmlFree(buffer);
3668  return(NULL);
3669  }
3670  }
3671  if (len + 10 > max) {
3672  xmlChar *tmp;
3673 
3674  if ((max > XML_MAX_NAME_LENGTH) &&
3675  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3676  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3677  xmlFree(buffer);
3678  return(NULL);
3679  }
3680  max *= 2;
3681  tmp = (xmlChar *) xmlRealloc(buffer,
3682  max * sizeof(xmlChar));
3683  if (tmp == NULL) {
3684  xmlErrMemory(ctxt, NULL);
3685  xmlFree(buffer);
3686  return(NULL);
3687  }
3688  buffer = tmp;
3689  }
3690  COPY_BUF(l,buffer,len,c);
3691  NEXTL(l);
3692  c = CUR_CHAR(l);
3693  }
3694  buffer[len] = 0;
3695  return(buffer);
3696  }
3697  }
3698  if (len == 0)
3699  return(NULL);
3700  if ((len > XML_MAX_NAME_LENGTH) &&
3701  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3702  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3703  return(NULL);
3704  }
3705  return(xmlStrndup(buf, len));
3706 }
3707 
3721 xmlChar *
3723  xmlChar *buf = NULL;
3724  int len = 0;
3726  int c, l;
3727  xmlChar stop;
3728  xmlChar *ret = NULL;
3729  const xmlChar *cur = NULL;
3731 
3732  if (RAW == '"') stop = '"';
3733  else if (RAW == '\'') stop = '\'';
3734  else {
3736  return(NULL);
3737  }
3738  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3739  if (buf == NULL) {
3740  xmlErrMemory(ctxt, NULL);
3741  return(NULL);
3742  }
3743 
3744  /*
3745  * The content of the entity definition is copied in a buffer.
3746  */
3747 
3749  input = ctxt->input;
3750  GROW;
3751  if (ctxt->instate == XML_PARSER_EOF)
3752  goto error;
3753  NEXT;
3754  c = CUR_CHAR(l);
3755  /*
3756  * NOTE: 4.4.5 Included in Literal
3757  * When a parameter entity reference appears in a literal entity
3758  * value, ... a single or double quote character in the replacement
3759  * text is always treated as a normal data character and will not
3760  * terminate the literal.
3761  * In practice it means we stop the loop only when back at parsing
3762  * the initial entity and the quote is found
3763  */
3764  while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3765  (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3766  if (len + 5 >= size) {
3767  xmlChar *tmp;
3768 
3769  size *= 2;
3770  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3771  if (tmp == NULL) {
3772  xmlErrMemory(ctxt, NULL);
3773  goto error;
3774  }
3775  buf = tmp;
3776  }
3777  COPY_BUF(l,buf,len,c);
3778  NEXTL(l);
3779 
3780  GROW;
3781  c = CUR_CHAR(l);
3782  if (c == 0) {
3783  GROW;
3784  c = CUR_CHAR(l);
3785  }
3786  }
3787  buf[len] = 0;
3788  if (ctxt->instate == XML_PARSER_EOF)
3789  goto error;
3790  if (c != stop) {
3792  goto error;
3793  }
3794  NEXT;
3795 
3796  /*
3797  * Raise problem w.r.t. '&' and '%' being used in non-entities
3798  * reference constructs. Note Charref will be handled in
3799  * xmlStringDecodeEntities()
3800  */
3801  cur = buf;
3802  while (*cur != 0) { /* non input consuming */
3803  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3804  xmlChar *name;
3805  xmlChar tmp = *cur;
3806  int nameOk = 0;
3807 
3808  cur++;
3809  name = xmlParseStringName(ctxt, &cur);
3810  if (name != NULL) {
3811  nameOk = 1;
3812  xmlFree(name);
3813  }
3814  if ((nameOk == 0) || (*cur != ';')) {
3815  xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3816  "EntityValue: '%c' forbidden except for entities references\n",
3817  tmp);
3818  goto error;
3819  }
3820  if ((tmp == '%') && (ctxt->inSubset == 1) &&
3821  (ctxt->inputNr == 1)) {
3823  goto error;
3824  }
3825  if (*cur == 0)
3826  break;
3827  }
3828  cur++;
3829  }
3830 
3831  /*
3832  * Then PEReference entities are substituted.
3833  *
3834  * NOTE: 4.4.7 Bypassed
3835  * When a general entity reference appears in the EntityValue in
3836  * an entity declaration, it is bypassed and left as is.
3837  * so XML_SUBSTITUTE_REF is not set here.
3838  */
3839  ++ctxt->depth;
3841  0, 0, 0);
3842  --ctxt->depth;
3843  if (orig != NULL) {
3844  *orig = buf;
3845  buf = NULL;
3846  }
3847 
3848 error:
3849  if (buf != NULL)
3850  xmlFree(buf);
3851  return(ret);
3852 }
3853 
3866 static xmlChar *
3868  xmlChar limit = 0;
3869  xmlChar *buf = NULL;
3870  xmlChar *rep = NULL;
3871  size_t len = 0;
3872  size_t buf_size = 0;
3873  int c, l, in_space = 0;
3874  xmlChar *current = NULL;
3875  xmlEntityPtr ent;
3876 
3877  if (NXT(0) == '"') {
3879  limit = '"';
3880  NEXT;
3881  } else if (NXT(0) == '\'') {
3882  limit = '\'';
3884  NEXT;
3885  } else {
3887  return(NULL);
3888  }
3889 
3890  /*
3891  * allocate a translation buffer.
3892  */
3893  buf_size = XML_PARSER_BUFFER_SIZE;
3894  buf = (xmlChar *) xmlMallocAtomic(buf_size);
3895  if (buf == NULL) goto mem_error;
3896 
3897  /*
3898  * OK loop until we reach one of the ending char or a size limit.
3899  */
3900  c = CUR_CHAR(l);
3901  while (((NXT(0) != limit) && /* checked */
3902  (IS_CHAR(c)) && (c != '<')) &&
3903  (ctxt->instate != XML_PARSER_EOF)) {
3904  /*
3905  * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3906  * special option is given
3907  */
3908  if ((len > XML_MAX_TEXT_LENGTH) &&
3909  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3910  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3911  "AttValue length too long\n");
3912  goto mem_error;
3913  }
3914  if (c == 0) break;
3915  if (c == '&') {
3916  in_space = 0;
3917  if (NXT(1) == '#') {
3918  int val = xmlParseCharRef(ctxt);
3919 
3920  if (val == '&') {
3921  if (ctxt->replaceEntities) {
3922  if (len + 10 > buf_size) {
3923  growBuffer(buf, 10);
3924  }
3925  buf[len++] = '&';
3926  } else {
3927  /*
3928  * The reparsing will be done in xmlStringGetNodeList()
3929  * called by the attribute() function in SAX.c
3930  */
3931  if (len + 10 > buf_size) {
3932  growBuffer(buf, 10);
3933  }
3934  buf[len++] = '&';
3935  buf[len++] = '#';
3936  buf[len++] = '3';
3937  buf[len++] = '8';
3938  buf[len++] = ';';
3939  }
3940  } else if (val != 0) {
3941  if (len + 10 > buf_size) {
3942  growBuffer(buf, 10);
3943  }
3944  len += xmlCopyChar(0, &buf[len], val);
3945  }
3946  } else {
3947  ent = xmlParseEntityRef(ctxt);
3948  ctxt->nbentities++;
3949  if (ent != NULL)
3950  ctxt->nbentities += ent->owner;
3951  if ((ent != NULL) &&
3953  if (len + 10 > buf_size) {
3954  growBuffer(buf, 10);
3955  }
3956  if ((ctxt->replaceEntities == 0) &&
3957  (ent->content[0] == '&')) {
3958  buf[len++] = '&';
3959  buf[len++] = '#';
3960  buf[len++] = '3';
3961  buf[len++] = '8';
3962  buf[len++] = ';';
3963  } else {
3964  buf[len++] = ent->content[0];
3965  }
3966  } else if ((ent != NULL) &&
3967  (ctxt->replaceEntities != 0)) {
3968  if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3969  ++ctxt->depth;
3970  rep = xmlStringDecodeEntities(ctxt, ent->content,
3972  0, 0, 0);
3973  --ctxt->depth;
3974  if (rep != NULL) {
3975  current = rep;
3976  while (*current != 0) { /* non input consuming */
3977  if ((*current == 0xD) || (*current == 0xA) ||
3978  (*current == 0x9)) {
3979  buf[len++] = 0x20;
3980  current++;
3981  } else
3982  buf[len++] = *current++;
3983  if (len + 10 > buf_size) {
3984  growBuffer(buf, 10);
3985  }
3986  }
3987  xmlFree(rep);
3988  rep = NULL;
3989  }
3990  } else {
3991  if (len + 10 > buf_size) {
3992  growBuffer(buf, 10);
3993  }
3994  if (ent->content != NULL)
3995  buf[len++] = ent->content[0];
3996  }
3997  } else if (ent != NULL) {
3998  int i = xmlStrlen(ent->name);
3999  const xmlChar *cur = ent->name;
4000 
4001  /*
4002  * This may look absurd but is needed to detect
4003  * entities problems
4004  */
4005  if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4006  (ent->content != NULL) && (ent->checked == 0)) {
4007  unsigned long oldnbent = ctxt->nbentities, diff;
4008 
4009  ++ctxt->depth;
4010  rep = xmlStringDecodeEntities(ctxt, ent->content,
4011  XML_SUBSTITUTE_REF, 0, 0, 0);
4012  --ctxt->depth;
4013 
4014  diff = ctxt->nbentities - oldnbent + 1;
4015  if (diff > INT_MAX / 2)
4016  diff = INT_MAX / 2;
4017  ent->checked = diff * 2;
4018  if (rep != NULL) {
4019  if (xmlStrchr(rep, '<'))
4020  ent->checked |= 1;
4021  xmlFree(rep);
4022  rep = NULL;
4023  } else {
4024  ent->content[0] = 0;
4025  }
4026  }
4027 
4028  /*
4029  * Just output the reference
4030  */
4031  buf[len++] = '&';
4032  while (len + i + 10 > buf_size) {
4033  growBuffer(buf, i + 10);
4034  }
4035  for (;i > 0;i--)
4036  buf[len++] = *cur++;
4037  buf[len++] = ';';
4038  }
4039  }
4040  } else {
4041  if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4042  if ((len != 0) || (!normalize)) {
4043  if ((!normalize) || (!in_space)) {
4044  COPY_BUF(l,buf,len,0x20);
4045  while (len + 10 > buf_size) {
4046  growBuffer(buf, 10);
4047  }
4048  }
4049  in_space = 1;
4050  }
4051  } else {
4052  in_space = 0;
4053  COPY_BUF(l,buf,len,c);
4054  if (len + 10 > buf_size) {
4055  growBuffer(buf, 10);
4056  }
4057  }
4058  NEXTL(l);
4059  }
4060  GROW;
4061  c = CUR_CHAR(l);
4062  }
4063  if (ctxt->instate == XML_PARSER_EOF)
4064  goto error;
4065 
4066  if ((in_space) && (normalize)) {
4067  while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4068  }
4069  buf[len] = 0;
4070  if (RAW == '<') {
4072  } else if (RAW != limit) {
4073  if ((c != 0) && (!IS_CHAR(c))) {
4074  xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4075  "invalid character in attribute value\n");
4076  } else {
4077  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4078  "AttValue: ' expected\n");
4079  }
4080  } else
4081  NEXT;
4082 
4083  /*
4084  * There we potentially risk an overflow, don't allow attribute value of
4085  * length more than INT_MAX it is a very reasonable assumption !
4086  */
4087  if (len >= INT_MAX) {
4088  xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4089  "AttValue length too long\n");
4090  goto mem_error;
4091  }
4092 
4093  if (attlen != NULL) *attlen = (int) len;
4094  return(buf);
4095 
4096 mem_error:
4097  xmlErrMemory(ctxt, NULL);
4098 error:
4099  if (buf != NULL)
4100  xmlFree(buf);
4101  if (rep != NULL)
4102  xmlFree(rep);
4103  return(NULL);
4104 }
4105 
4140 xmlChar *
4142  if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4143  return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4144 }
4145 
4157 xmlChar *
4159  xmlChar *buf = NULL;
4160  int len = 0;
4162  int cur, l;
4163  xmlChar stop;
4164  int state = ctxt->instate;
4165  int count = 0;
4166 
4167  SHRINK;
4168  if (RAW == '"') {
4169  NEXT;
4170  stop = '"';
4171  } else if (RAW == '\'') {
4172  NEXT;
4173  stop = '\'';
4174  } else {
4176  return(NULL);
4177  }
4178 
4179  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4180  if (buf == NULL) {
4181  xmlErrMemory(ctxt, NULL);
4182  return(NULL);
4183  }
4185  cur = CUR_CHAR(l);
4186  while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4187  if (len + 5 >= size) {
4188  xmlChar *tmp;
4189 
4190  if ((size > XML_MAX_NAME_LENGTH) &&
4191  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4192  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4193  xmlFree(buf);
4194  ctxt->instate = (xmlParserInputState) state;
4195  return(NULL);
4196  }
4197  size *= 2;
4198  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4199  if (tmp == NULL) {
4200  xmlFree(buf);
4201  xmlErrMemory(ctxt, NULL);
4202  ctxt->instate = (xmlParserInputState) state;
4203  return(NULL);
4204  }
4205  buf = tmp;
4206  }
4207  count++;
4208  if (count > 50) {
4209  GROW;
4210  count = 0;
4211  if (ctxt->instate == XML_PARSER_EOF) {
4212  xmlFree(buf);
4213  return(NULL);
4214  }
4215  }
4216  COPY_BUF(l,buf,len,cur);
4217  NEXTL(l);
4218  cur = CUR_CHAR(l);
4219  if (cur == 0) {
4220  GROW;
4221  SHRINK;
4222  cur = CUR_CHAR(l);
4223  }
4224  }
4225  buf[len] = 0;
4226  ctxt->instate = (xmlParserInputState) state;
4227  if (!IS_CHAR(cur)) {
4229  } else {
4230  NEXT;
4231  }
4232  return(buf);
4233 }
4234 
4246 xmlChar *
4248  xmlChar *buf = NULL;
4249  int len = 0;
4251  xmlChar cur;
4252  xmlChar stop;
4253  int count = 0;
4254  xmlParserInputState oldstate = ctxt->instate;
4255 
4256  SHRINK;
4257  if (RAW == '"') {
4258  NEXT;
4259  stop = '"';
4260  } else if (RAW == '\'') {
4261  NEXT;
4262  stop = '\'';
4263  } else {
4265  return(NULL);
4266  }
4267  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4268  if (buf == NULL) {
4269  xmlErrMemory(ctxt, NULL);
4270  return(NULL);
4271  }
4273  cur = CUR;
4274  while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4275  if (len + 1 >= size) {
4276  xmlChar *tmp;
4277 
4278  if ((size > XML_MAX_NAME_LENGTH) &&
4279  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4280  xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4281  xmlFree(buf);
4282  return(NULL);
4283  }
4284  size *= 2;
4285  tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4286  if (tmp == NULL) {
4287  xmlErrMemory(ctxt, NULL);
4288  xmlFree(buf);
4289  return(NULL);
4290  }
4291  buf = tmp;
4292  }
4293  buf[len++] = cur;
4294  count++;
4295  if (count > 50) {
4296  GROW;
4297  count = 0;
4298  if (ctxt->instate == XML_PARSER_EOF) {
4299  xmlFree(buf);
4300  return(NULL);
4301  }
4302  }
4303  NEXT;
4304  cur = CUR;
4305  if (cur == 0) {
4306  GROW;
4307  SHRINK;
4308  cur = CUR;
4309  }
4310  }
4311  buf[len] = 0;
4312  if (cur != stop) {
4314  } else {
4315  NEXT;
4316  }
4317  ctxt->instate = oldstate;
4318  return(buf);
4319 }
4320 
4321 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4322 
4323 /*
4324  * used for the test in the inner loop of the char data testing
4325  */
4326 static const unsigned char test_char_data[256] = {
4327  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328  0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4329  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4332  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4333  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4334  0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4335  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4336  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4337  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4338  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4339  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4340  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4341  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4342  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4343  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4344  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4346  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4347  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4348  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4349  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4350  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4351  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4352  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4353  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4354  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4355  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4359 };
4360 
4377 void
4379  const xmlChar *in;
4380  int nbchar = 0;
4381  int line = ctxt->input->line;
4382  int col = ctxt->input->col;
4383  int ccol;
4384 
4385  SHRINK;
4386  GROW;
4387  /*
4388  * Accelerated common case where input don't need to be
4389  * modified before passing it to the handler.
4390  */
4391  if (!cdata) {
4392  in = ctxt->input->cur;
4393  do {
4394 get_more_space:
4395  while (*in == 0x20) { in++; ctxt->input->col++; }
4396  if (*in == 0xA) {
4397  do {
4398  ctxt->input->line++; ctxt->input->col = 1;
4399  in++;
4400  } while (*in == 0xA);
4401  goto get_more_space;
4402  }
4403  if (*in == '<') {
4404  nbchar = in - ctxt->input->cur;
4405  if (nbchar > 0) {
4406  const xmlChar *tmp = ctxt->input->cur;
4407  ctxt->input->cur = in;
4408 
4409  if ((ctxt->sax != NULL) &&
4410  (ctxt->sax->ignorableWhitespace !=
4411  ctxt->sax->characters)) {
4412  if (areBlanks(ctxt, tmp, nbchar, 1)) {
4413  if (ctxt->sax->ignorableWhitespace != NULL)
4414  ctxt->sax->ignorableWhitespace(ctxt->userData,
4415  tmp, nbchar);
4416  } else {
4417  if (ctxt->sax->characters != NULL)
4418  ctxt->sax->characters(ctxt->userData,
4419  tmp, nbchar);
4420  if (*ctxt->space == -1)
4421  *ctxt->space = -2;
4422  }
4423  } else if ((ctxt->sax != NULL) &&
4424  (ctxt->sax->characters != NULL)) {
4425  ctxt->sax->characters(ctxt->userData,
4426  tmp, nbchar);
4427  }
4428  }
4429  return;
4430  }
4431 
4432 get_more:
4433  ccol = ctxt->input->col;
4434  while (test_char_data[*in]) {
4435  in++;
4436  ccol++;
4437  }
4438  ctxt->input->col = ccol;
4439  if (*in == 0xA) {
4440  do {
4441  ctxt->input->line++; ctxt->input->col = 1;
4442  in++;
4443  } while (*in == 0xA);
4444  goto get_more;
4445  }
4446  if (*in == ']') {
4447  if ((in[1] == ']') && (in[2] == '>')) {
4449  ctxt->input->cur = in + 1;
4450  return;
4451  }
4452  in++;
4453  ctxt->input->col++;
4454  goto get_more;
4455  }
4456  nbchar = in - ctxt->input->cur;
4457  if (nbchar > 0) {
4458  if ((ctxt->sax != NULL) &&
4459  (ctxt->sax->ignorableWhitespace !=
4460  ctxt->sax->characters) &&
4461  (IS_BLANK_CH(*ctxt->input->cur))) {
4462  const xmlChar *tmp = ctxt->input->cur;
4463  ctxt->input->cur = in;
4464 
4465  if (areBlanks(ctxt, tmp, nbchar, 0)) {
4466  if (ctxt->sax->ignorableWhitespace != NULL)
4467  ctxt->sax->ignorableWhitespace(ctxt->userData,
4468  tmp, nbchar);
4469  } else {
4470  if (ctxt->sax->characters != NULL)
4471  ctxt->sax->characters(ctxt->userData,
4472  tmp, nbchar);
4473  if (*ctxt->space == -1)
4474  *ctxt->space = -2;
4475  }
4476  line = ctxt->input->line;
4477  col = ctxt->input->col;
4478  } else if (ctxt->sax != NULL) {
4479  if (ctxt->sax->characters != NULL)
4480  ctxt->sax->characters(ctxt->userData,
4481  ctxt->input->cur, nbchar);
4482  line = ctxt->input->line;
4483  col = ctxt->input->col;
4484  }
4485  /* something really bad happened in the SAX callback */
4486  if (ctxt->instate != XML_PARSER_CONTENT)
4487  return;
4488  }
4489  ctxt->input->cur = in;
4490  if (*in == 0xD) {
4491  in++;
4492  if (*in == 0xA) {
4493  ctxt->input->cur = in;
4494  in++;
4495  ctxt->input->line++; ctxt->input->col = 1;
4496  continue; /* while */
4497  }
4498  in--;
4499  }
4500  if (*in == '<') {
4501  return;
4502  }
4503  if (*in == '&') {
4504  return;
4505  }
4506  SHRINK;
4507  GROW;
4508  if (ctxt->instate == XML_PARSER_EOF)
4509  return;
4510  in = ctxt->input->cur;
4511  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4512  nbchar = 0;
4513  }
4514  ctxt->input->line = line;
4515  ctxt->input->col = col;
4516  xmlParseCharDataComplex(ctxt, cdata);
4517 }
4518 
4528 static void
4531  int nbchar = 0;
4532  int cur, l;
4533  int count = 0;
4534 
4535  SHRINK;
4536  GROW;
4537  cur = CUR_CHAR(l);
4538  while ((cur != '<') && /* checked */
4539  (cur != '&') &&
4540  (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4541  if ((cur == ']') && (NXT(1) == ']') &&
4542  (NXT(2) == '>')) {
4543  if (cdata) break;
4544  else {
4546  }
4547  }
4548  COPY_BUF(l,buf,nbchar,cur);
4549  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4550  buf[nbchar] = 0;
4551 
4552  /*
4553  * OK the segment is to be consumed as chars.
4554  */
4555  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4556  if (areBlanks(ctxt, buf, nbchar, 0)) {
4557  if (ctxt->sax->ignorableWhitespace != NULL)
4558  ctxt->sax->ignorableWhitespace(ctxt->userData,
4559  buf, nbchar);
4560  } else {
4561  if (ctxt->sax->characters != NULL)
4562  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4563  if ((ctxt->sax->characters !=
4564  ctxt->sax->ignorableWhitespace) &&
4565  (*ctxt->space == -1))
4566  *ctxt->space = -2;
4567  }
4568  }
4569  nbchar = 0;
4570  /* something really bad happened in the SAX callback */
4571  if (ctxt->instate != XML_PARSER_CONTENT)
4572  return;
4573  }
4574  count++;
4575  if (count > 50) {
4576  GROW;
4577  count = 0;
4578  if (ctxt->instate == XML_PARSER_EOF)
4579  return;
4580  }
4581  NEXTL(l);
4582  cur = CUR_CHAR(l);
4583  }
4584  if (nbchar != 0) {
4585  buf[nbchar] = 0;
4586  /*
4587  * OK the segment is to be consumed as chars.
4588  */
4589  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4590  if (areBlanks(ctxt, buf, nbchar, 0)) {
4591  if (ctxt->sax->ignorableWhitespace != NULL)
4592  ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593  } else {
4594  if (ctxt->sax->characters != NULL)
4595  ctxt->sax->characters(ctxt->userData, buf, nbchar);
4596  if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597  (*ctxt->space == -1))
4598  *ctxt->space = -2;
4599  }
4600  }
4601  }
4602  if ((cur != 0) && (!IS_CHAR(cur))) {
4603  /* Generate the error and skip the offending character */
4604  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4605  "PCDATA invalid Char value %d\n",
4606  cur);
4607  NEXTL(l);
4608  }
4609 }
4610 
4633 xmlChar *
4635  xmlChar *URI = NULL;
4636 
4637  SHRINK;
4638 
4639  *publicID = NULL;
4640  if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4641  SKIP(6);
4642  if (SKIP_BLANKS == 0) {
4643  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4644  "Space required after 'SYSTEM'\n");
4645  }
4646  URI = xmlParseSystemLiteral(ctxt);
4647  if (URI == NULL) {
4649  }
4650  } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4651  SKIP(6);
4652  if (SKIP_BLANKS == 0) {
4653  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4654  "Space required after 'PUBLIC'\n");
4655  }
4656  *publicID = xmlParsePubidLiteral(ctxt);
4657  if (*publicID == NULL) {
4659  }
4660  if (strict) {
4661  /*
4662  * We don't handle [83] so "S SystemLiteral" is required.
4663  */
4664  if (SKIP_BLANKS == 0) {
4665  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4666  "Space required after the Public Identifier\n");
4667  }
4668  } else {
4669  /*
4670  * We handle [83] so we return immediately, if
4671  * "S SystemLiteral" is not detected. We skip blanks if no
4672  * system literal was found, but this is harmless since we must
4673  * be at the end of a NotationDecl.
4674  */
4675  if (SKIP_BLANKS == 0) return(NULL);
4676  if ((CUR != '\'') && (CUR != '"')) return(NULL);
4677  }
4678  URI = xmlParseSystemLiteral(ctxt);
4679  if (URI == NULL) {
4681  }
4682  }
4683  return(URI);
4684 }
4685 
4700 static void
4702  size_t len, size_t size) {
4703  int q, ql;
4704  int r, rl;
4705  int cur, l;
4706  size_t count = 0;
4707  int inputid;
4708 
4709  inputid = ctxt->input->id;
4710 
4711  if (buf == NULL) {
4712  len = 0;
4714  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4715  if (buf == NULL) {
4716  xmlErrMemory(ctxt, NULL);
4717  return;
4718  }
4719  }
4720  GROW; /* Assure there's enough input data */
4721  q = CUR_CHAR(ql);
4722  if (q == 0)
4723  goto not_terminated;
4724  if (!IS_CHAR(q)) {
4725  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4726  "xmlParseComment: invalid xmlChar value %d\n",
4727  q);
4728  xmlFree (buf);
4729  return;
4730  }
4731  NEXTL(ql);
4732  r = CUR_CHAR(rl);
4733  if (r == 0)
4734  goto not_terminated;
4735  if (!IS_CHAR(r)) {
4736  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4737  "xmlParseComment: invalid xmlChar value %d\n",
4738  q);
4739  xmlFree (buf);
4740  return;
4741  }
4742  NEXTL(rl);
4743  cur = CUR_CHAR(l);
4744  if (cur == 0)
4745  goto not_terminated;
4746  while (IS_CHAR(cur) && /* checked */
4747  ((cur != '>') ||
4748  (r != '-') || (q != '-'))) {
4749  if ((r == '-') && (q == '-')) {
4751  }
4752  if ((len > XML_MAX_TEXT_LENGTH) &&
4753  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4754  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4755  "Comment too big found", NULL);
4756  xmlFree (buf);
4757  return;
4758  }
4759  if (len + 5 >= size) {
4760  xmlChar *new_buf;
4761  size_t new_size;
4762 
4763  new_size = size * 2;
4764  new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4765  if (new_buf == NULL) {
4766  xmlFree (buf);
4767  xmlErrMemory(ctxt, NULL);
4768  return;
4769  }
4770  buf = new_buf;
4771  size = new_size;
4772  }
4773  COPY_BUF(ql,buf,len,q);
4774  q = r;
4775  ql = rl;
4776  r = cur;
4777  rl = l;
4778 
4779  count++;
4780  if (count > 50) {
4781  GROW;
4782  count = 0;
4783  if (ctxt->instate == XML_PARSER_EOF) {
4784  xmlFree(buf);
4785  return;
4786  }
4787  }
4788  NEXTL(l);
4789  cur = CUR_CHAR(l);
4790  if (cur == 0) {
4791  SHRINK;
4792  GROW;
4793  cur = CUR_CHAR(l);
4794  }
4795  }
4796  buf[len] = 0;
4797  if (cur == 0) {
4798  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4799  "Comment not terminated \n<!--%.50s\n", buf);
4800  } else if (!IS_CHAR(cur)) {
4801  xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4802  "xmlParseComment: invalid xmlChar value %d\n",
4803  cur);
4804  } else {
4805  if (inputid != ctxt->input->id) {
4806  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4807  "Comment doesn't start and stop in the same"
4808  " entity\n");
4809  }
4810  NEXT;
4811  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4812  (!ctxt->disableSAX))
4813  ctxt->sax->comment(ctxt->userData, buf);
4814  }
4815  xmlFree(buf);
4816  return;
4817 not_terminated:
4818  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4819  "Comment not terminated\n", NULL);
4820  xmlFree(buf);
4821  return;
4822 }
4823 
4834 void
4836  xmlChar *buf = NULL;
4837  size_t size = XML_PARSER_BUFFER_SIZE;
4838  size_t len = 0;
4840  const xmlChar *in;
4841  size_t nbchar = 0;
4842  int ccol;
4843  int inputid;
4844 
4845  /*
4846  * Check that there is a comment right here.
4847  */
4848  if ((RAW != '<') || (NXT(1) != '!') ||
4849  (NXT(2) != '-') || (NXT(3) != '-')) return;
4850  state = ctxt->instate;
4851  ctxt->instate = XML_PARSER_COMMENT;
4852  inputid = ctxt->input->id;
4853  SKIP(4);
4854  SHRINK;
4855  GROW;
4856 
4857  /*
4858  * Accelerated common case where input don't need to be
4859  * modified before passing it to the handler.
4860  */
4861  in = ctxt->input->cur;
4862  do {
4863  if (*in == 0xA) {
4864  do {
4865  ctxt->input->line++; ctxt->input->col = 1;
4866  in++;
4867  } while (*in == 0xA);
4868  }
4869 get_more:
4870  ccol = ctxt->input->col;
4871  while (((*in > '-') && (*in <= 0x7F)) ||
4872  ((*in >= 0x20) && (*in < '-')) ||
4873  (*in == 0x09)) {
4874  in++;
4875  ccol++;
4876  }
4877  ctxt->input->col = ccol;
4878  if (*in == 0xA) {
4879  do {
4880  ctxt->input->line++; ctxt->input->col = 1;
4881  in++;
4882  } while (*in == 0xA);
4883  goto get_more;
4884  }
4885  nbchar = in - ctxt->input->cur;
4886  /*
4887  * save current set of data
4888  */
4889  if (nbchar > 0) {
4890  if ((ctxt->sax != NULL) &&
4891  (ctxt->sax->comment != NULL)) {
4892  if (buf == NULL) {
4893  if ((*in == '-') && (in[1] == '-'))
4894  size = nbchar + 1;
4895  else
4896  size = XML_PARSER_BUFFER_SIZE + nbchar;
4897  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4898  if (buf == NULL) {
4899  xmlErrMemory(ctxt, NULL);
4900  ctxt->instate = state;
4901  return;
4902  }
4903  len = 0;
4904  } else if (len + nbchar + 1 >= size) {
4905  xmlChar *new_buf;
4906  size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4907  new_buf = (xmlChar *) xmlRealloc(buf,
4908  size * sizeof(xmlChar));
4909  if (new_buf == NULL) {
4910  xmlFree (buf);
4911  xmlErrMemory(ctxt, NULL);
4912  ctxt->instate = state;
4913  return;
4914  }
4915  buf = new_buf;
4916  }
4917  memcpy(&buf[len], ctxt->input->cur, nbchar);
4918  len += nbchar;
4919  buf[len] = 0;
4920  }
4921  }
4922  if ((len > XML_MAX_TEXT_LENGTH) &&
4923  ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4924  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4925  "Comment too big found", NULL);
4926  xmlFree (buf);
4927  return;
4928  }
4929  ctxt->input->cur = in;
4930  if (*in == 0xA) {
4931  in++;
4932  ctxt->input->line++; ctxt->input->col = 1;
4933  }
4934  if (*in == 0xD) {
4935  in++;
4936  if (*in == 0xA) {
4937  ctxt->input->cur = in;
4938  in++;
4939  ctxt->input->line++; ctxt->input->col = 1;
4940  continue; /* while */
4941  }
4942  in--;
4943  }
4944  SHRINK;
4945  GROW;
4946  if (ctxt->instate == XML_PARSER_EOF) {
4947  xmlFree(buf);
4948  return;
4949  }
4950  in = ctxt->input->cur;
4951  if (*in == '-') {
4952  if (in[1] == '-') {
4953  if (in[2] == '>') {
4954  if (ctxt->input->id != inputid) {
4955  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4956  "comment doesn't start and stop in the"
4957  " same entity\n");
4958  }
4959  SKIP(3);
4960  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4961  (!ctxt->disableSAX)) {
4962  if (buf != NULL)
4963  ctxt->sax->comment(ctxt->userData, buf);
4964  else
4965  ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4966  }
4967  if (buf != NULL)
4968  xmlFree(buf);
4969  if (ctxt->instate != XML_PARSER_EOF)
4970  ctxt->instate = state;
4971  return;
4972  }
4973  if (buf != NULL) {
4974  xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4975  "Double hyphen within comment: "
4976  "<!--%.50s\n",
4977  buf);
4978  } else
4979  xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4980  "Double hyphen within comment\n", NULL);
4981  if (ctxt->instate == XML_PARSER_EOF) {
4982  xmlFree(buf);
4983  return;
4984  }
4985  in++;
4986  ctxt->input->col++;
4987  }
4988  in++;
4989  ctxt->input->col++;
4990  goto get_more;
4991  }
4992  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4994  ctxt->instate = state;
4995  return;
4996 }
4997 
4998 
5010 const xmlChar *
5012  const xmlChar *name;
5013 
5014  name = xmlParseName(ctxt);
5015  if ((name != NULL) &&
5016  ((name[0] == 'x') || (name[0] == 'X')) &&
5017  ((name[1] == 'm') || (name[1] == 'M')) &&
5018  ((name[2] == 'l') || (name[2] == 'L'))) {
5019  int i;
5020  if ((name[0] == 'x') && (name[1] == 'm') &&
5021  (name[2] == 'l') && (name[3] == 0)) {
5022  xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5023  "XML declaration allowed only at the start of the document\n");
5024  return(name);
5025  } else if (name[3] == 0) {
5027  return(name);
5028  }
5029  for (i = 0;;i++) {
5030  if (xmlW3CPIs[i] == NULL) break;
5031  if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5032  return(name);
5033  }
5034  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5035  "xmlParsePITarget: invalid name prefix 'xml'\n",
5036  NULL, NULL);
5037  }
5038  if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5039  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5040  "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5041  }
5042  return(name);
5043 }
5044 
5045 #ifdef LIBXML_CATALOG_ENABLED
5046 
5061 static void
5062 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5063  xmlChar *URL = NULL;
5064  const xmlChar *tmp, *base;
5065  xmlChar marker;
5066 
5067  tmp = catalog;
5068  while (IS_BLANK_CH(*tmp)) tmp++;
5069  if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5070  goto error;
5071  tmp += 7;
5072  while (IS_BLANK_CH(*tmp)) tmp++;
5073  if (*tmp != '=') {
5074  return;
5075  }
5076  tmp++;
5077  while (IS_BLANK_CH(*tmp)) tmp++;
5078  marker = *tmp;
5079  if ((marker != '\'') && (marker != '"'))
5080  goto error;
5081  tmp++;
5082  base = tmp;
5083  while ((*tmp != 0) && (*tmp != marker)) tmp++;
5084  if (*tmp == 0)
5085  goto error;
5086  URL = xmlStrndup(base, tmp - base);
5087  tmp++;
5088  while (IS_BLANK_CH(*tmp)) tmp++;
5089  if (*tmp != 0)
5090  goto error;
5091 
5092  if (URL != NULL) {
5093  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5094  xmlFree(URL);
5095  }
5096  return;
5097 
5098 error:
5099  xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5100  "Catalog PI syntax error: %s\n",
5101  catalog, NULL);
5102  if (URL != NULL)
5103  xmlFree(URL);
5104 }
5105 #endif
5106 
5118 void
5120  xmlChar *buf = NULL;
5121  size_t len = 0;
5122  size_t size = XML_PARSER_BUFFER_SIZE;
5123  int cur, l;
5124  const xmlChar *target;
5126  int count = 0;
5127 
5128  if ((RAW == '<') && (NXT(1) == '?')) {
5129  int inputid = ctxt->input->id;
5130  state = ctxt->instate;
5131  ctxt->instate = XML_PARSER_PI;
5132  /*
5133  * this is a Processing Instruction.
5134  */
5135  SKIP(2);
5136  SHRINK;
5137 
5138  /*
5139  * Parse the target name and check for special support like
5140  * namespace.
5141  */
5142  target = xmlParsePITarget(ctxt);
5143  if (target != NULL) {
5144  if ((RAW == '?') && (NXT(1) == '>')) {
5145  if (inputid != ctxt->input->id) {
5146  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5147  "PI declaration doesn't start and stop in"
5148  " the same entity\n");
5149  }
5150  SKIP(2);
5151 
5152  /*
5153  * SAX: PI detected.
5154  */
5155  if ((ctxt->sax) && (!ctxt->disableSAX) &&
5156  (ctxt->sax->processingInstruction != NULL))
5157  ctxt->sax->processingInstruction(ctxt->userData,
5158  target, NULL);
5159  if (ctxt->instate != XML_PARSER_EOF)
5160  ctxt->instate = state;
5161  return;
5162  }
5163  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5164  if (buf == NULL) {
5165  xmlErrMemory(ctxt, NULL);
5166  ctxt->instate = state;
5167  return;
5168  }
5169  if (SKIP_BLANKS == 0) {
5170  xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5171  "ParsePI: PI %s space expected\n", target);
5172  }
5173  cur = CUR_CHAR(l);
5174  while (IS_CHAR(cur) && /* checked */
5175  ((cur != '?') || (NXT(1) != '>'))) {
5176  if (len + 5 >= size) {
5177  xmlChar *tmp;
5178  size_t new_size = size * 2;
5179  tmp = (xmlChar *) xmlRealloc(buf, new_size);
5180  if (tmp == NULL) {
5181  xmlErrMemory(ctxt, NULL);
5182  xmlFree(buf);
5183  ctxt->instate = state;
5184  return;
5185  }
5186  buf = tmp;
5187  size = new_size;
5188  }
5189