11#ifdef LIBXML_HTML_ENABLED
34#define HTML_MAX_NAMELEN 1000
35#define HTML_PARSER_BIG_BUFFER_SIZE 1000
36#define HTML_PARSER_BUFFER_SIZE 100
41static int htmlOmittedDefaultValue = 1;
43xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt,
int len,
45static void htmlParseComment(htmlParserCtxtPtr ctxt);
75 "Memory allocation failed : %s\n",
extra);
79 NULL,
NULL, 0, 0,
"Memory allocation failed\n");
96 if ((ctxt !=
NULL) && (ctxt->disableSAX != 0) &&
103 (
const char *) str1, (
const char *) str2,
107 ctxt->wellFormed = 0;
123 if ((ctxt !=
NULL) && (ctxt->disableSAX != 0) &&
132 ctxt->wellFormed = 0;
151htmlnamePush(htmlParserCtxtPtr ctxt,
const xmlChar *
value)
157 if (ctxt->nameNr >= ctxt->nameMax) {
159 ctxt->nameTab = (
const xmlChar * *)
162 sizeof(ctxt->nameTab[0]));
163 if (ctxt->nameTab ==
NULL) {
164 htmlErrMemory(ctxt,
NULL);
168 ctxt->nameTab[ctxt->nameNr] =
value;
170 return (ctxt->nameNr++);
181htmlnamePop(htmlParserCtxtPtr ctxt)
185 if (ctxt->nameNr <= 0)
188 if (ctxt->nameNr < 0)
190 if (ctxt->nameNr > 0)
191 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
194 ret = ctxt->nameTab[ctxt->nameNr];
195 ctxt->nameTab[ctxt->nameNr] =
NULL;
209htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *
value)
211 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
212 if (ctxt->nodeInfoMax == 0)
213 ctxt->nodeInfoMax = 5;
214 ctxt->nodeInfoMax *= 2;
215 ctxt->nodeInfoTab = (htmlParserNodeInfo *)
216 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
218 sizeof(ctxt->nodeInfoTab[0]));
219 if (ctxt->nodeInfoTab ==
NULL) {
220 htmlErrMemory(ctxt,
NULL);
224 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *
value;
225 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
226 return (ctxt->nodeInfoNr++);
237static htmlParserNodeInfo *
238htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
240 if (ctxt->nodeInfoNr <= 0)
243 if (ctxt->nodeInfoNr < 0)
245 if (ctxt->nodeInfoNr > 0)
246 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
248 ctxt->nodeInfo =
NULL;
249 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
281#define UPPER (toupper(*ctxt->input->cur))
283#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val)
285#define NXT(val) ctxt->input->cur[(val)]
287#define UPP(val) (toupper(ctxt->input->cur[(val)]))
289#define CUR_PTR ctxt->input->cur
290#define BASE_PTR ctxt->input->base
292#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
293 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
294 xmlParserInputShrink(ctxt->input)
296#define GROW if ((ctxt->progressive == 0) && \
297 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
300#define CURRENT ((int) (*ctxt->input->cur))
302#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
307#define CUR ((int) (*ctxt->input->cur))
308#define NEXT xmlNextChar(ctxt)
310#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
313#define NEXTL(l) do { \
314 if (*(ctxt->input->cur) == '\n') { \
315 ctxt->input->line++; ctxt->input->col = 1; \
316 } else ctxt->input->col++; \
317 ctxt->token = 0; ctxt->input->cur += l; \
326#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
327#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
329#define COPY_BUF(l,b,i,v) \
330 if (l == 1) b[i++] = (xmlChar) v; \
331 else i += xmlCopyChar(l,&b[i],v)
375 while (((*
cur >=
'A') && (*
cur <=
'Z')) ||
376 ((*
cur >=
'a') && (*
cur <=
'z')) ||
377 ((*
cur >=
'0') && (*
cur <=
'9')) ||
378 (*
cur ==
'-') || (*
cur ==
'_') || (*
cur ==
':') || (*
cur ==
'/'))
401 const unsigned char *
cur;
408 if (ctxt->
token != 0) {
421 if ((
int) *ctxt->
input->
cur < 0x80) {
426 "Char 0x%X out of allowed range\n", 0);
435 guess = htmlFindEncoding(ctxt);
452 "Unsupported encoding %s", guess,
NULL);
478 if ((
cur[1] & 0xc0) != 0x80)
480 if ((
c & 0xe0) == 0xe0) {
486 if ((
cur[2] & 0xc0) != 0x80)
488 if ((
c & 0xf0) == 0xf0) {
493 if (((
c & 0xf8) != 0xf0) ||
494 ((
cur[3] & 0xc0) != 0x80))
498 val = (
cur[0] & 0x7) << 18;
499 val |= (
cur[1] & 0x3f) << 12;
500 val |= (
cur[2] & 0x3f) << 6;
507 val = (
cur[0] & 0xf) << 12;
508 val |= (
cur[1] & 0x3f) << 6;
516 val = (
cur[0] & 0x1f) << 6;
523 "Char 0x%X out of allowed range\n",
val);
530 "Char 0x%X out of allowed range\n", 0);
558 "Input is not proper UTF-8, indicate encoding !\n",
630#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
631#define NB_FONTSTYLE 8
632#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
634#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
636#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
637#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
638#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
639#define NB_BLOCK NB_HEADING + NB_LIST + 14
640#define FORMCTRL "input", "select", "textarea", "label", "button"
644#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
646#define LIST "ul", "ol", "dir", "menu"
650#define FLOW BLOCK,INLINE
651#define NB_FLOW NB_BLOCK + NB_INLINE
655static const char*
const html_flow[] = { FLOW,
NULL } ;
656static const char*
const html_inline[] = {
INLINE,
NULL } ;
659static const char*
const html_pcdata[] = {
NULL } ;
660#define html_cdata html_pcdata
665#define COREATTRS "id", "class", "style", "title"
666#define NB_COREATTRS 4
667#define I18N "lang", "dir"
669#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
671#define ATTRS COREATTRS,I18N,EVENTS
672#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
673#define CELLHALIGN "align", "char", "charoff"
674#define NB_CELLHALIGN 3
675#define CELLVALIGN "valign"
676#define NB_CELLVALIGN 1
678static const char*
const html_attrs[] = { ATTRS,
NULL } ;
679static const char*
const core_i18n_attrs[] = { COREATTRS, I18N,
NULL } ;
680static const char*
const core_attrs[] = { COREATTRS,
NULL } ;
681static const char*
const i18n_attrs[] = { I18N,
NULL } ;
685static const char*
const a_attrs[] = { ATTRS,
"charset",
"type",
"name",
686 "href",
"hreflang",
"rel",
"rev",
"accesskey",
"shape",
"coords",
687 "tabindex",
"onfocus",
"onblur",
NULL } ;
688static const char*
const target_attr[] = {
"target",
NULL } ;
689static const char*
const rows_cols_attr[] = {
"rows",
"cols",
NULL } ;
690static const char*
const alt_attr[] = {
"alt",
NULL } ;
691static const char*
const src_alt_attrs[] = {
"src",
"alt",
NULL } ;
692static const char*
const href_attrs[] = {
"href",
NULL } ;
693static const char*
const clear_attrs[] = {
"clear",
NULL } ;
694static const char*
const inline_p[] = {
INLINE,
"p",
NULL } ;
696static const char*
const flow_param[] = { FLOW,
"param",
NULL } ;
697static const char*
const applet_attrs[] = { COREATTRS ,
"codebase",
698 "archive",
"alt",
"name",
"height",
"width",
"align",
699 "hspace",
"vspace",
NULL } ;
700static const char*
const area_attrs[] = {
"shape",
"coords",
"href",
"nohref",
701 "tabindex",
"accesskey",
"onfocus",
"onblur",
NULL } ;
702static const char*
const basefont_attrs[] =
703 {
"id",
"size",
"color",
"face",
NULL } ;
704static const char*
const quote_attrs[] = { ATTRS,
"cite",
NULL } ;
705static const char*
const body_contents[] = { FLOW,
"ins",
"del",
NULL } ;
706static const char*
const body_attrs[] = { ATTRS,
"onload",
"onunload",
NULL } ;
707static const char*
const body_depr[] = {
"background",
"bgcolor",
"text",
708 "link",
"vlink",
"alink",
NULL } ;
709static const char*
const button_attrs[] = { ATTRS,
"name",
"value",
"type",
710 "disabled",
"tabindex",
"accesskey",
"onfocus",
"onblur",
NULL } ;
713static const char*
const col_attrs[] = { ATTRS,
"span",
"width", CELLHALIGN, CELLVALIGN,
NULL } ;
714static const char*
const col_elt[] = {
"col",
NULL } ;
715static const char*
const edit_attrs[] = { ATTRS,
"datetime",
"cite",
NULL } ;
716static const char*
const compact_attrs[] = { ATTRS,
"compact",
NULL } ;
717static const char*
const dl_contents[] = {
"dt",
"dd",
NULL } ;
718static const char*
const compact_attr[] = {
"compact",
NULL } ;
719static const char*
const label_attr[] = {
"label",
NULL } ;
720static const char*
const fieldset_contents[] = { FLOW,
"legend" } ;
721static const char*
const font_attrs[] = { COREATTRS, I18N,
"size",
"color",
"face" ,
NULL } ;
722static const char*
const form_contents[] = { HEADING,
LIST,
INLINE,
"pre",
"p",
"div",
"center",
"noscript",
"noframes",
"blockquote",
"isindex",
"hr",
"table",
"fieldset",
"address",
NULL } ;
723static const char*
const form_attrs[] = { ATTRS,
"method",
"enctype",
"accept",
"name",
"onsubmit",
"onreset",
"accept-charset",
NULL } ;
724static const char*
const frame_attrs[] = { COREATTRS,
"longdesc",
"name",
"src",
"frameborder",
"marginwidth",
"marginheight",
"noresize",
"scrolling" ,
NULL } ;
725static const char*
const frameset_attrs[] = { COREATTRS,
"rows",
"cols",
"onload",
"onunload",
NULL } ;
726static const char*
const frameset_contents[] = {
"frameset",
"frame",
"noframes",
NULL } ;
727static const char*
const head_attrs[] = { I18N,
"profile",
NULL } ;
728static const char*
const head_contents[] = {
"title",
"isindex",
"base",
"script",
"style",
"meta",
"link",
"object",
NULL } ;
729static const char*
const hr_depr[] = {
"align",
"noshade",
"size",
"width",
NULL } ;
730static const char*
const version_attr[] = {
"version",
NULL } ;
731static const char*
const html_content[] = {
"head",
"body",
"frameset",
NULL } ;
732static const char*
const iframe_attrs[] = { COREATTRS,
"longdesc",
"name",
"src",
"frameborder",
"marginwidth",
"marginheight",
"scrolling",
"align",
"height",
"width",
NULL } ;
733static const char*
const img_attrs[] = { ATTRS,
"longdesc",
"name",
"height",
"width",
"usemap",
"ismap",
NULL } ;
734static const char*
const embed_attrs[] = { COREATTRS,
"align",
"alt",
"border",
"code",
"codebase",
"frameborder",
"height",
"hidden",
"hspace",
"name",
"palette",
"pluginspace",
"pluginurl",
"src",
"type",
"units",
"vspace",
"width",
NULL } ;
735static const char*
const input_attrs[] = { ATTRS,
"type",
"name",
"value",
"checked",
"disabled",
"readonly",
"size",
"maxlength",
"src",
"alt",
"usemap",
"ismap",
"tabindex",
"accesskey",
"onfocus",
"onblur",
"onselect",
"onchange",
"accept",
NULL } ;
736static const char*
const prompt_attrs[] = { COREATTRS, I18N,
"prompt",
NULL } ;
737static const char*
const label_attrs[] = { ATTRS,
"for",
"accesskey",
"onfocus",
"onblur",
NULL } ;
738static const char*
const legend_attrs[] = { ATTRS,
"accesskey",
NULL } ;
739static const char*
const align_attr[] = {
"align",
NULL } ;
740static const char*
const link_attrs[] = { ATTRS,
"charset",
"href",
"hreflang",
"type",
"rel",
"rev",
"media",
NULL } ;
741static const char*
const map_contents[] = {
BLOCK,
"area",
NULL } ;
742static const char*
const name_attr[] = {
"name",
NULL } ;
743static const char*
const action_attr[] = {
"action",
NULL } ;
744static const char*
const blockli_elt[] = {
BLOCK,
"li",
NULL } ;
745static const char*
const meta_attrs[] = { I18N,
"http-equiv",
"name",
"scheme",
"charset",
NULL } ;
746static const char*
const content_attr[] = {
"content",
NULL } ;
747static const char*
const type_attr[] = {
"type",
NULL } ;
748static const char*
const noframes_content[] = {
"body", FLOW MODIFIER,
NULL } ;
749static const char*
const object_contents[] = { FLOW,
"param",
NULL } ;
750static const char*
const object_attrs[] = { ATTRS,
"declare",
"classid",
"codebase",
"data",
"type",
"codetype",
"archive",
"standby",
"height",
"width",
"usemap",
"name",
"tabindex",
NULL } ;
751static const char*
const object_depr[] = {
"align",
"border",
"hspace",
"vspace",
NULL } ;
752static const char*
const ol_attrs[] = {
"type",
"compact",
"start",
NULL} ;
753static const char*
const option_elt[] = {
"option",
NULL } ;
754static const char*
const optgroup_attrs[] = { ATTRS,
"disabled",
NULL } ;
755static const char*
const option_attrs[] = { ATTRS,
"disabled",
"label",
"selected",
"value",
NULL } ;
756static const char*
const param_attrs[] = {
"id",
"value",
"valuetype",
"type",
NULL } ;
757static const char*
const width_attr[] = {
"width",
NULL } ;
758static const char*
const pre_content[] = { PHRASE,
"tt",
"i",
"b",
"u",
"s",
"strike",
"a",
"br",
"script",
"map",
"q",
"span",
"bdo",
"iframe",
NULL } ;
759static const char*
const script_attrs[] = {
"charset",
"src",
"defer",
"event",
"for",
NULL } ;
760static const char*
const language_attr[] = {
"language",
NULL } ;
761static const char*
const select_content[] = {
"optgroup",
"option",
NULL } ;
762static const char*
const select_attrs[] = { ATTRS,
"name",
"size",
"multiple",
"disabled",
"tabindex",
"onfocus",
"onblur",
"onchange",
NULL } ;
763static const char*
const style_attrs[] = { I18N,
"media",
"title",
NULL } ;
764static const char*
const table_attrs[] = { ATTRS,
"summary",
"width",
"border",
"frame",
"rules",
"cellspacing",
"cellpadding",
"datapagesize",
NULL } ;
765static const char*
const table_depr[] = {
"align",
"bgcolor",
NULL } ;
766static const char*
const table_contents[] = {
"caption",
"col",
"colgroup",
"thead",
"tfoot",
"tbody",
"tr",
NULL} ;
767static const char*
const tr_elt[] = {
"tr",
NULL } ;
768static const char*
const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN,
NULL} ;
769static const char*
const th_td_depr[] = {
"nowrap",
"bgcolor",
"width",
"height",
NULL } ;
770static const char*
const th_td_attr[] = { ATTRS,
"abbr",
"axis",
"headers",
"scope",
"rowspan",
"colspan", CELLHALIGN, CELLVALIGN,
NULL } ;
771static const char*
const textarea_attrs[] = { ATTRS,
"name",
"disabled",
"readonly",
"tabindex",
"accesskey",
"onfocus",
"onblur",
"onselect",
"onchange",
NULL } ;
772static const char*
const tr_contents[] = {
"th",
"td",
NULL } ;
773static const char*
const bgcolor_attr[] = {
"bgcolor",
NULL } ;
774static const char*
const li_elt[] = {
"li",
NULL } ;
775static const char*
const ul_depr[] = {
"type",
"compact",
NULL} ;
776static const char*
const dir_attr[] = {
"dir",
NULL} ;
778#define DECL (const char**)
780static const htmlElemDesc
781html40ElementTable[] = {
782{
"a", 0, 0, 0, 0, 0, 0, 1,
"anchor ",
783 DECL html_inline ,
NULL , DECL a_attrs , DECL target_attr,
NULL
785{
"abbr", 0, 0, 0, 0, 0, 0, 1,
"abbreviated form",
788{
"acronym", 0, 0, 0, 0, 0, 0, 1,
"",
791{
"address", 0, 0, 0, 0, 0, 0, 0,
"information on author ",
794{
"applet", 0, 0, 0, 0, 1, 1, 2,
"java applet ",
797{
"area", 0, 2, 2, 1, 0, 0, 0,
"client-side image map area ",
798 EMPTY ,
NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
800{
"b", 0, 3, 0, 0, 0, 0, 1,
"bold text style",
803{
"base", 0, 2, 2, 1, 0, 0, 0,
"document base uri ",
806{
"basefont", 0, 2, 2, 1, 1, 1, 1,
"base font size " ,
809{
"bdo", 0, 0, 0, 0, 0, 0, 1,
"i18n bidi over-ride ",
810 DECL html_inline ,
NULL , DECL core_i18n_attrs,
NULL, DECL dir_attr
812{
"big", 0, 3, 0, 0, 0, 0, 1,
"large text style",
815{
"blockquote", 0, 0, 0, 0, 0, 0, 0,
"long quotation ",
818{
"body", 1, 1, 0, 0, 0, 0, 0,
"document body ",
819 DECL body_contents ,
"div" , DECL body_attrs, DECL body_depr,
NULL
821{
"br", 0, 2, 2, 1, 0, 0, 1,
"forced line break ",
824{
"button", 0, 0, 0, 0, 0, 0, 2,
"push button ",
825 DECL html_flow MODIFIER ,
NULL , DECL button_attrs,
NULL,
NULL
827{
"caption", 0, 0, 0, 0, 0, 0, 0,
"table caption ",
830{
"center", 0, 3, 0, 0, 1, 1, 0,
"shorthand for div align=center ",
833{
"cite", 0, 0, 0, 0, 0, 0, 1,
"citation",
836{
"code", 0, 0, 0, 0, 0, 0, 1,
"computer code fragment",
839{
"col", 0, 2, 2, 1, 0, 0, 0,
"table column ",
842{
"colgroup", 0, 1, 0, 0, 0, 0, 0,
"table column group ",
843 DECL col_elt ,
"col" , DECL col_attrs ,
NULL,
NULL
845{
"dd", 0, 1, 0, 0, 0, 0, 0,
"definition description ",
848{
"del", 0, 0, 0, 0, 0, 0, 2,
"deleted text ",
851{
"dfn", 0, 0, 0, 0, 0, 0, 1,
"instance definition",
854{
"dir", 0, 0, 0, 0, 1, 1, 0,
"directory list",
855 DECL blockli_elt,
"li" ,
NULL, DECL compact_attrs,
NULL
857{
"div", 0, 0, 0, 0, 0, 0, 0,
"generic language/style container",
858 DECL html_flow,
NULL, DECL html_attrs, DECL align_attr,
NULL
860{
"dl", 0, 0, 0, 0, 0, 0, 0,
"definition list ",
861 DECL dl_contents ,
"dd" , DECL html_attrs, DECL compact_attr,
NULL
863{
"dt", 0, 1, 0, 0, 0, 0, 0,
"definition term ",
866{
"em", 0, 3, 0, 0, 0, 0, 1,
"emphasis",
869{
"embed", 0, 1, 0, 0, 1, 1, 1,
"generic embedded object ",
872{
"fieldset", 0, 0, 0, 0, 0, 0, 0,
"form control group ",
873 DECL fieldset_contents ,
NULL, DECL html_attrs,
NULL,
NULL
875{
"font", 0, 3, 0, 0, 1, 1, 1,
"local change to font ",
878{
"form", 0, 0, 0, 0, 0, 0, 0,
"interactive form ",
879 DECL form_contents,
"fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
881{
"frame", 0, 2, 2, 1, 0, 2, 0,
"subwindow " ,
884{
"frameset", 0, 0, 0, 0, 0, 2, 0,
"window subdivision" ,
885 DECL frameset_contents,
"noframes" ,
NULL , DECL frameset_attrs,
NULL
887{
"h1", 0, 0, 0, 0, 0, 0, 0,
"heading ",
888 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
890{
"h2", 0, 0, 0, 0, 0, 0, 0,
"heading ",
891 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
893{
"h3", 0, 0, 0, 0, 0, 0, 0,
"heading ",
894 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
896{
"h4", 0, 0, 0, 0, 0, 0, 0,
"heading ",
897 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
899{
"h5", 0, 0, 0, 0, 0, 0, 0,
"heading ",
900 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
902{
"h6", 0, 0, 0, 0, 0, 0, 0,
"heading ",
903 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
905{
"head", 1, 1, 0, 0, 0, 0, 0,
"document head ",
908{
"hr", 0, 2, 2, 1, 0, 0, 0,
"horizontal rule " ,
911{
"html", 1, 1, 0, 0, 0, 0, 0,
"document root element ",
912 DECL html_content ,
NULL , DECL i18n_attrs, DECL version_attr,
NULL
914{
"i", 0, 3, 0, 0, 0, 0, 1,
"italic text style",
917{
"iframe", 0, 0, 0, 0, 0, 1, 2,
"inline subwindow ",
920{
"img", 0, 2, 2, 1, 0, 0, 1,
"embedded image ",
921 EMPTY,
NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
923{
"input", 0, 2, 2, 1, 0, 0, 1,
"form control ",
926{
"ins", 0, 0, 0, 0, 0, 0, 2,
"inserted text",
929{
"isindex", 0, 2, 2, 1, 1, 1, 0,
"single line prompt ",
932{
"kbd", 0, 0, 0, 0, 0, 0, 1,
"text to be entered by the user",
935{
"label", 0, 0, 0, 0, 0, 0, 1,
"form field label text ",
936 DECL html_inline MODIFIER,
NULL, DECL label_attrs ,
NULL,
NULL
938{
"legend", 0, 0, 0, 0, 0, 0, 0,
"fieldset legend ",
939 DECL html_inline,
NULL, DECL legend_attrs , DECL align_attr,
NULL
941{
"li", 0, 1, 1, 0, 0, 0, 0,
"list item ",
944{
"link", 0, 2, 2, 1, 0, 0, 0,
"a media-independent link ",
947{
"map", 0, 0, 0, 0, 0, 0, 2,
"client-side image map ",
948 DECL map_contents ,
NULL, DECL html_attrs ,
NULL, DECL name_attr
950{
"menu", 0, 0, 0, 0, 1, 1, 0,
"menu list ",
953{
"meta", 0, 2, 2, 1, 0, 0, 0,
"generic metainformation ",
956{
"noframes", 0, 0, 0, 0, 0, 2, 0,
"alternate content container for non frame-based rendering ",
957 DECL noframes_content,
"body" , DECL html_attrs,
NULL,
NULL
959{
"noscript", 0, 0, 0, 0, 0, 0, 0,
"alternate content container for non script-based rendering ",
960 DECL html_flow,
"div", DECL html_attrs,
NULL,
NULL
962{
"object", 0, 0, 0, 0, 0, 0, 2,
"generic embedded object ",
963 DECL object_contents ,
"div" , DECL object_attrs, DECL object_depr,
NULL
965{
"ol", 0, 0, 0, 0, 0, 0, 0,
"ordered list ",
966 DECL li_elt ,
"li" , DECL html_attrs, DECL ol_attrs,
NULL
968{
"optgroup", 0, 0, 0, 0, 0, 0, 0,
"option group ",
969 DECL option_elt ,
"option", DECL optgroup_attrs,
NULL, DECL label_attr
971{
"option", 0, 1, 0, 0, 0, 0, 0,
"selectable choice " ,
974{
"p", 0, 1, 0, 0, 0, 0, 0,
"paragraph ",
975 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
977{
"param", 0, 2, 2, 1, 0, 0, 0,
"named property value ",
980{
"pre", 0, 0, 0, 0, 0, 0, 0,
"preformatted text ",
981 DECL pre_content,
NULL, DECL html_attrs, DECL width_attr,
NULL
983{
"q", 0, 0, 0, 0, 0, 0, 1,
"short inline quotation ",
986{
"s", 0, 3, 0, 0, 1, 1, 1,
"strike-through text style",
989{
"samp", 0, 0, 0, 0, 0, 0, 1,
"sample program output, scripts, etc.",
992{
"script", 0, 0, 0, 0, 0, 0, 2,
"script statements ",
993 DECL html_cdata,
NULL, DECL script_attrs, DECL language_attr, DECL type_attr
995{
"select", 0, 0, 0, 0, 0, 0, 1,
"option selector ",
996 DECL select_content,
NULL, DECL select_attrs,
NULL,
NULL
998{
"small", 0, 3, 0, 0, 0, 0, 1,
"small text style",
1001{
"span", 0, 0, 0, 0, 0, 0, 1,
"generic language/style container ",
1004{
"strike", 0, 3, 0, 0, 1, 1, 1,
"strike-through text",
1007{
"strong", 0, 3, 0, 0, 0, 0, 1,
"strong emphasis",
1010{
"style", 0, 0, 0, 0, 0, 0, 0,
"style info ",
1011 DECL html_cdata,
NULL, DECL style_attrs,
NULL, DECL type_attr
1013{
"sub", 0, 3, 0, 0, 0, 0, 1,
"subscript",
1016{
"sup", 0, 3, 0, 0, 0, 0, 1,
"superscript ",
1019{
"table", 0, 0, 0, 0, 0, 0, 0,
"",
1020 DECL table_contents ,
"tr" , DECL table_attrs , DECL table_depr,
NULL
1022{
"tbody", 1, 0, 0, 0, 0, 0, 0,
"table body ",
1023 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
1025{
"td", 0, 0, 0, 0, 0, 0, 0,
"table data cell",
1026 DECL html_flow,
NULL, DECL th_td_attr, DECL th_td_depr,
NULL
1028{
"textarea", 0, 0, 0, 0, 0, 0, 1,
"multi-line text field ",
1029 DECL html_pcdata,
NULL, DECL textarea_attrs,
NULL, DECL rows_cols_attr
1031{
"tfoot", 0, 1, 0, 0, 0, 0, 0,
"table footer ",
1032 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
1034{
"th", 0, 1, 0, 0, 0, 0, 0,
"table header cell",
1035 DECL html_flow,
NULL, DECL th_td_attr, DECL th_td_depr,
NULL
1037{
"thead", 0, 1, 0, 0, 0, 0, 0,
"table header ",
1038 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
1040{
"title", 0, 0, 0, 0, 0, 0, 0,
"document title ",
1043{
"tr", 0, 0, 0, 0, 0, 0, 0,
"table row ",
1044 DECL tr_contents ,
"td" , DECL talign_attrs, DECL bgcolor_attr,
NULL
1046{
"tt", 0, 3, 0, 0, 0, 0, 1,
"teletype or monospaced text style",
1049{
"u", 0, 3, 0, 0, 1, 1, 1,
"underlined text style",
1052{
"ul", 0, 0, 0, 0, 0, 0, 0,
"unordered list ",
1053 DECL li_elt ,
"li" , DECL html_attrs, DECL ul_depr,
NULL
1055{
"var", 0, 0, 0, 0, 0, 0, 1,
"instance of a variable or program argument",
1063} htmlStartCloseEntry;
1068static const htmlStartCloseEntry htmlStartClose[] = {
1070 {
"a",
"fieldset" },
1074 {
"address",
"dd" },
1075 {
"address",
"dl" },
1076 {
"address",
"dt" },
1077 {
"address",
"form" },
1078 {
"address",
"li" },
1079 {
"address",
"ul" },
1085 {
"caption",
"col" },
1086 {
"caption",
"colgroup" },
1087 {
"caption",
"tbody" },
1088 {
"caption",
"tfoot" },
1089 {
"caption",
"thead" },
1090 {
"caption",
"tr" },
1092 {
"col",
"colgroup" },
1097 {
"colgroup",
"colgroup" },
1098 {
"colgroup",
"tbody" },
1099 {
"colgroup",
"tfoot" },
1100 {
"colgroup",
"thead" },
1101 {
"colgroup",
"tr" },
1112 {
"font",
"center" },
1116 {
"h1",
"fieldset" },
1121 {
"h2",
"fieldset" },
1126 {
"h3",
"fieldset" },
1131 {
"h4",
"fieldset" },
1136 {
"h5",
"fieldset" },
1141 {
"h6",
"fieldset" },
1148 {
"head",
"acronym" },
1149 {
"head",
"address" },
1153 {
"head",
"blockquote" },
1156 {
"head",
"center" },
1166 {
"head",
"fieldset" },
1169 {
"head",
"frameset" },
1178 {
"head",
"iframe" },
1182 {
"head",
"listing" },
1191 {
"head",
"small" },
1193 {
"head",
"strike" },
1194 {
"head",
"strong" },
1197 {
"head",
"table" },
1208 {
"legend",
"fieldset" },
1211 {
"link",
"frameset" },
1212 {
"listing",
"dd" },
1213 {
"listing",
"dl" },
1214 {
"listing",
"dt" },
1215 {
"listing",
"fieldset" },
1216 {
"listing",
"form" },
1217 {
"listing",
"li" },
1218 {
"listing",
"table" },
1219 {
"listing",
"ul" },
1227 {
"option",
"optgroup" },
1228 {
"option",
"option" },
1230 {
"p",
"blockquote" },
1235 {
"p",
"colgroup" },
1241 {
"p",
"fieldset" },
1243 {
"p",
"frameset" },
1270 {
"pre",
"fieldset" },
1276 {
"script",
"noscript" },
1281 {
"style",
"body" },
1282 {
"style",
"frameset" },
1283 {
"tbody",
"tbody" },
1284 {
"tbody",
"tfoot" },
1290 {
"tfoot",
"tbody" },
1296 {
"thead",
"tbody" },
1297 {
"thead",
"tfoot" },
1298 {
"title",
"body" },
1299 {
"title",
"frameset" },
1307 {
"ul",
"address" },
1315 {
"xmp",
"fieldset" },
1329static const char *
const htmlNoContentElements[] = {
1340static const char *
const htmlScriptAttributes[] = {
1374static const elementPriority htmlEndPriority[] = {
1404htmlInitAutoClose(
void) {
1408htmlCompareTags(
const void *
key,
const void *
member) {
1410 const htmlElemDesc *
desc = (
const htmlElemDesc *)
member;
1428 return((
const htmlElemDesc *)
bsearch(
tag, html40ElementTable,
1429 sizeof(html40ElementTable) /
sizeof(htmlElemDesc),
1430 sizeof(htmlElemDesc), htmlCompareTags));
1443 while ((htmlEndPriority[
i].
name !=
NULL) &&
1452htmlCompareStartClose(
const void *vkey,
const void *
member) {
1453 const htmlStartCloseEntry *
key = (
const htmlStartCloseEntry *) vkey;
1454 const htmlStartCloseEntry *
entry = (
const htmlStartCloseEntry *)
member;
1475htmlCheckAutoClose(
const xmlChar * newtag,
const xmlChar * oldtag)
1477 htmlStartCloseEntry
key;
1480 key.oldTag = (
const char *) oldtag;
1481 key.newTag = (
const char *) newtag;
1483 sizeof(htmlStartClose) /
sizeof(htmlStartCloseEntry),
1484 sizeof(htmlStartCloseEntry), htmlCompareStartClose);
1497htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt,
const xmlChar * newtag)
1499 const htmlElemDesc *
info;
1502 priority = htmlGetEndPriority(newtag);
1504 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
1514 if (htmlGetEndPriority(ctxt->nameTab[
i]) >
priority)
1521 info = htmlTagLookup(ctxt->name);
1524 "Opening and ending tag mismatch: %s and %s\n",
1525 newtag, ctxt->name);
1527 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1528 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1540htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1544 if (ctxt->nameNr == 0)
1546 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
1547 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1548 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1566htmlAutoClose(htmlParserCtxtPtr ctxt,
const xmlChar * newtag)
1568 while ((newtag !=
NULL) && (ctxt->name !=
NULL) &&
1569 (htmlCheckAutoClose(newtag, ctxt->name))) {
1570 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1571 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1574 if (newtag ==
NULL) {
1575 htmlAutoCloseOnEnd(ctxt);
1578 while ((newtag ==
NULL) && (ctxt->name !=
NULL) &&
1582 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1583 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1602htmlAutoCloseTag(htmlDocPtr doc,
const xmlChar *
name, htmlNodePtr
elem) {
1607 if (htmlCheckAutoClose(
elem->name,
name))
return(1);
1610 if (htmlAutoCloseTag(doc,
name,
child))
return(1);
1628htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr
elem) {
1634 if (htmlAutoCloseTag(doc,
elem->name,
child))
return(1);
1650htmlCheckImplied(htmlParserCtxtPtr ctxt,
const xmlChar *newtag) {
1653 if (ctxt->options & HTML_PARSE_NOIMPLIED)
1655 if (!htmlOmittedDefaultValue)
1659 if (ctxt->nameNr <= 0) {
1660 htmlnamePush(ctxt,
BAD_CAST"html");
1661 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1662 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"html",
NULL);
1666 if ((ctxt->nameNr <= 1) &&
1673 if (ctxt->html >= 3) {
1681 htmlnamePush(ctxt,
BAD_CAST"head");
1682 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1683 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"head",
NULL);
1687 if (ctxt->html >= 10) {
1691 for (
i = 0;
i < ctxt->nameNr;
i++) {
1700 htmlnamePush(ctxt,
BAD_CAST"body");
1701 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1702 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"body",
NULL);
1718htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1727 htmlCheckImplied(ctxt,
BAD_CAST"p");
1729 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1730 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"p",
NULL);
1733 if (!htmlOmittedDefaultValue)
1735 for (
i = 0; htmlNoContentElements[
i] !=
NULL;
i++) {
1738 htmlCheckImplied(ctxt,
BAD_CAST"p");
1740 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1741 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"p",
NULL);
1765 if ((
name[0] !=
'o') || (
name[1] !=
'n'))
1768 i <
sizeof(htmlScriptAttributes)/
sizeof(htmlScriptAttributes[0]);
1783static const htmlEntityDesc html40EntitiesTable[] = {
1787{ 34,
"quot",
"quotation mark = APL quote, U+0022 ISOnum" },
1788{ 38,
"amp",
"ampersand, U+0026 ISOnum" },
1789{ 39,
"apos",
"single quote" },
1790{ 60,
"lt",
"less-than sign, U+003C ISOnum" },
1791{ 62,
"gt",
"greater-than sign, U+003E ISOnum" },
1797{ 160,
"nbsp",
"no-break space = non-breaking space, U+00A0 ISOnum" },
1798{ 161,
"iexcl",
"inverted exclamation mark, U+00A1 ISOnum" },
1799{ 162,
"cent",
"cent sign, U+00A2 ISOnum" },
1800{ 163,
"pound",
"pound sign, U+00A3 ISOnum" },
1801{ 164,
"curren",
"currency sign, U+00A4 ISOnum" },
1802{ 165,
"yen",
"yen sign = yuan sign, U+00A5 ISOnum" },
1803{ 166,
"brvbar",
"broken bar = broken vertical bar, U+00A6 ISOnum" },
1804{ 167,
"sect",
"section sign, U+00A7 ISOnum" },
1805{ 168,
"uml",
"diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1806{ 169,
"copy",
"copyright sign, U+00A9 ISOnum" },
1807{ 170,
"ordf",
"feminine ordinal indicator, U+00AA ISOnum" },
1808{ 171,
"laquo",
"left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1809{ 172,
"not",
"not sign, U+00AC ISOnum" },
1810{ 173,
"shy",
"soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1811{ 174,
"reg",
"registered sign = registered trade mark sign, U+00AE ISOnum" },
1812{ 175,
"macr",
"macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1813{ 176,
"deg",
"degree sign, U+00B0 ISOnum" },
1814{ 177,
"plusmn",
"plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1815{ 178,
"sup2",
"superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1816{ 179,
"sup3",
"superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1817{ 180,
"acute",
"acute accent = spacing acute, U+00B4 ISOdia" },
1818{ 181,
"micro",
"micro sign, U+00B5 ISOnum" },
1819{ 182,
"para",
"pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1820{ 183,
"middot",
"middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1821{ 184,
"cedil",
"cedilla = spacing cedilla, U+00B8 ISOdia" },
1822{ 185,
"sup1",
"superscript one = superscript digit one, U+00B9 ISOnum" },
1823{ 186,
"ordm",
"masculine ordinal indicator, U+00BA ISOnum" },
1824{ 187,
"raquo",
"right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1825{ 188,
"frac14",
"vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1826{ 189,
"frac12",
"vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1827{ 190,
"frac34",
"vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1828{ 191,
"iquest",
"inverted question mark = turned question mark, U+00BF ISOnum" },
1829{ 192,
"Agrave",
"latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1830{ 193,
"Aacute",
"latin capital letter A with acute, U+00C1 ISOlat1" },
1831{ 194,
"Acirc",
"latin capital letter A with circumflex, U+00C2 ISOlat1" },
1832{ 195,
"Atilde",
"latin capital letter A with tilde, U+00C3 ISOlat1" },
1833{ 196,
"Auml",
"latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1834{ 197,
"Aring",
"latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1835{ 198,
"AElig",
"latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1836{ 199,
"Ccedil",
"latin capital letter C with cedilla, U+00C7 ISOlat1" },
1837{ 200,
"Egrave",
"latin capital letter E with grave, U+00C8 ISOlat1" },
1838{ 201,
"Eacute",
"latin capital letter E with acute, U+00C9 ISOlat1" },
1839{ 202,
"Ecirc",
"latin capital letter E with circumflex, U+00CA ISOlat1" },
1840{ 203,
"Euml",
"latin capital letter E with diaeresis, U+00CB ISOlat1" },
1841{ 204,
"Igrave",
"latin capital letter I with grave, U+00CC ISOlat1" },
1842{ 205,
"Iacute",
"latin capital letter I with acute, U+00CD ISOlat1" },
1843{ 206,
"Icirc",
"latin capital letter I with circumflex, U+00CE ISOlat1" },
1844{ 207,
"Iuml",
"latin capital letter I with diaeresis, U+00CF ISOlat1" },
1845{ 208,
"ETH",
"latin capital letter ETH, U+00D0 ISOlat1" },
1846{ 209,
"Ntilde",
"latin capital letter N with tilde, U+00D1 ISOlat1" },
1847{ 210,
"Ograve",
"latin capital letter O with grave, U+00D2 ISOlat1" },
1848{ 211,
"Oacute",
"latin capital letter O with acute, U+00D3 ISOlat1" },
1849{ 212,
"Ocirc",
"latin capital letter O with circumflex, U+00D4 ISOlat1" },
1850{ 213,
"Otilde",
"latin capital letter O with tilde, U+00D5 ISOlat1" },
1851{ 214,
"Ouml",
"latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1852{ 215,
"times",
"multiplication sign, U+00D7 ISOnum" },
1853{ 216,
"Oslash",
"latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1854{ 217,
"Ugrave",
"latin capital letter U with grave, U+00D9 ISOlat1" },
1855{ 218,
"Uacute",
"latin capital letter U with acute, U+00DA ISOlat1" },
1856{ 219,
"Ucirc",
"latin capital letter U with circumflex, U+00DB ISOlat1" },
1857{ 220,
"Uuml",
"latin capital letter U with diaeresis, U+00DC ISOlat1" },
1858{ 221,
"Yacute",
"latin capital letter Y with acute, U+00DD ISOlat1" },
1859{ 222,
"THORN",
"latin capital letter THORN, U+00DE ISOlat1" },
1860{ 223,
"szlig",
"latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1861{ 224,
"agrave",
"latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1862{ 225,
"aacute",
"latin small letter a with acute, U+00E1 ISOlat1" },
1863{ 226,
"acirc",
"latin small letter a with circumflex, U+00E2 ISOlat1" },
1864{ 227,
"atilde",
"latin small letter a with tilde, U+00E3 ISOlat1" },
1865{ 228,
"auml",
"latin small letter a with diaeresis, U+00E4 ISOlat1" },
1866{ 229,
"aring",
"latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1867{ 230,
"aelig",
"latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1868{ 231,
"ccedil",
"latin small letter c with cedilla, U+00E7 ISOlat1" },
1869{ 232,
"egrave",
"latin small letter e with grave, U+00E8 ISOlat1" },
1870{ 233,
"eacute",
"latin small letter e with acute, U+00E9 ISOlat1" },
1871{ 234,
"ecirc",
"latin small letter e with circumflex, U+00EA ISOlat1" },
1872{ 235,
"euml",
"latin small letter e with diaeresis, U+00EB ISOlat1" },
1873{ 236,
"igrave",
"latin small letter i with grave, U+00EC ISOlat1" },
1874{ 237,
"iacute",
"latin small letter i with acute, U+00ED ISOlat1" },
1875{ 238,
"icirc",
"latin small letter i with circumflex, U+00EE ISOlat1" },
1876{ 239,
"iuml",
"latin small letter i with diaeresis, U+00EF ISOlat1" },
1877{ 240,
"eth",
"latin small letter eth, U+00F0 ISOlat1" },
1878{ 241,
"ntilde",
"latin small letter n with tilde, U+00F1 ISOlat1" },
1879{ 242,
"ograve",
"latin small letter o with grave, U+00F2 ISOlat1" },
1880{ 243,
"oacute",
"latin small letter o with acute, U+00F3 ISOlat1" },
1881{ 244,
"ocirc",
"latin small letter o with circumflex, U+00F4 ISOlat1" },
1882{ 245,
"otilde",
"latin small letter o with tilde, U+00F5 ISOlat1" },
1883{ 246,
"ouml",
"latin small letter o with diaeresis, U+00F6 ISOlat1" },
1884{ 247,
"divide",
"division sign, U+00F7 ISOnum" },
1885{ 248,
"oslash",
"latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1886{ 249,
"ugrave",
"latin small letter u with grave, U+00F9 ISOlat1" },
1887{ 250,
"uacute",
"latin small letter u with acute, U+00FA ISOlat1" },
1888{ 251,
"ucirc",
"latin small letter u with circumflex, U+00FB ISOlat1" },
1889{ 252,
"uuml",
"latin small letter u with diaeresis, U+00FC ISOlat1" },
1890{ 253,
"yacute",
"latin small letter y with acute, U+00FD ISOlat1" },
1891{ 254,
"thorn",
"latin small letter thorn with, U+00FE ISOlat1" },
1892{ 255,
"yuml",
"latin small letter y with diaeresis, U+00FF ISOlat1" },
1894{ 338,
"OElig",
"latin capital ligature OE, U+0152 ISOlat2" },
1895{ 339,
"oelig",
"latin small ligature oe, U+0153 ISOlat2" },
1896{ 352,
"Scaron",
"latin capital letter S with caron, U+0160 ISOlat2" },
1897{ 353,
"scaron",
"latin small letter s with caron, U+0161 ISOlat2" },
1898{ 376,
"Yuml",
"latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1903{ 402,
"fnof",
"latin small f with hook = function = florin, U+0192 ISOtech" },
1905{ 710,
"circ",
"modifier letter circumflex accent, U+02C6 ISOpub" },
1906{ 732,
"tilde",
"small tilde, U+02DC ISOdia" },
1908{ 913,
"Alpha",
"greek capital letter alpha, U+0391" },
1909{ 914,
"Beta",
"greek capital letter beta, U+0392" },
1910{ 915,
"Gamma",
"greek capital letter gamma, U+0393 ISOgrk3" },
1911{ 916,
"Delta",
"greek capital letter delta, U+0394 ISOgrk3" },
1912{ 917,
"Epsilon",
"greek capital letter epsilon, U+0395" },
1913{ 918,
"Zeta",
"greek capital letter zeta, U+0396" },
1914{ 919,
"Eta",
"greek capital letter eta, U+0397" },
1915{ 920,
"Theta",
"greek capital letter theta, U+0398 ISOgrk3" },
1916{ 921,
"Iota",
"greek capital letter iota, U+0399" },
1917{ 922,
"Kappa",
"greek capital letter kappa, U+039A" },
1918{ 923,
"Lambda",
"greek capital letter lambda, U+039B ISOgrk3" },
1919{ 924,
"Mu",
"greek capital letter mu, U+039C" },
1920{ 925,
"Nu",
"greek capital letter nu, U+039D" },
1921{ 926,
"Xi",
"greek capital letter xi, U+039E ISOgrk3" },
1922{ 927,
"Omicron",
"greek capital letter omicron, U+039F" },
1923{ 928,
"Pi",
"greek capital letter pi, U+03A0 ISOgrk3" },
1924{ 929,
"Rho",
"greek capital letter rho, U+03A1" },
1925{ 931,
"Sigma",
"greek capital letter sigma, U+03A3 ISOgrk3" },
1926{ 932,
"Tau",
"greek capital letter tau, U+03A4" },
1927{ 933,
"Upsilon",
"greek capital letter upsilon, U+03A5 ISOgrk3" },
1928{ 934,
"Phi",
"greek capital letter phi, U+03A6 ISOgrk3" },
1929{ 935,
"Chi",
"greek capital letter chi, U+03A7" },
1930{ 936,
"Psi",
"greek capital letter psi, U+03A8 ISOgrk3" },
1931{ 937,
"Omega",
"greek capital letter omega, U+03A9 ISOgrk3" },
1933{ 945,
"alpha",
"greek small letter alpha, U+03B1 ISOgrk3" },
1934{ 946,
"beta",
"greek small letter beta, U+03B2 ISOgrk3" },
1935{ 947,
"gamma",
"greek small letter gamma, U+03B3 ISOgrk3" },
1936{ 948,
"delta",
"greek small letter delta, U+03B4 ISOgrk3" },
1937{ 949,
"epsilon",
"greek small letter epsilon, U+03B5 ISOgrk3" },
1938{ 950,
"zeta",
"greek small letter zeta, U+03B6 ISOgrk3" },
1939{ 951,
"eta",
"greek small letter eta, U+03B7 ISOgrk3" },
1940{ 952,
"theta",
"greek small letter theta, U+03B8 ISOgrk3" },
1941{ 953,
"iota",
"greek small letter iota, U+03B9 ISOgrk3" },
1942{ 954,
"kappa",
"greek small letter kappa, U+03BA ISOgrk3" },
1943{ 955,
"lambda",
"greek small letter lambda, U+03BB ISOgrk3" },
1944{ 956,
"mu",
"greek small letter mu, U+03BC ISOgrk3" },
1945{ 957,
"nu",
"greek small letter nu, U+03BD ISOgrk3" },
1946{ 958,
"xi",
"greek small letter xi, U+03BE ISOgrk3" },
1947{ 959,
"omicron",
"greek small letter omicron, U+03BF NEW" },
1948{ 960,
"pi",
"greek small letter pi, U+03C0 ISOgrk3" },
1949{ 961,
"rho",
"greek small letter rho, U+03C1 ISOgrk3" },
1950{ 962,
"sigmaf",
"greek small letter final sigma, U+03C2 ISOgrk3" },
1951{ 963,
"sigma",
"greek small letter sigma, U+03C3 ISOgrk3" },
1952{ 964,
"tau",
"greek small letter tau, U+03C4 ISOgrk3" },
1953{ 965,
"upsilon",
"greek small letter upsilon, U+03C5 ISOgrk3" },
1954{ 966,
"phi",
"greek small letter phi, U+03C6 ISOgrk3" },
1955{ 967,
"chi",
"greek small letter chi, U+03C7 ISOgrk3" },
1956{ 968,
"psi",
"greek small letter psi, U+03C8 ISOgrk3" },
1957{ 969,
"omega",
"greek small letter omega, U+03C9 ISOgrk3" },
1958{ 977,
"thetasym",
"greek small letter theta symbol, U+03D1 NEW" },
1959{ 978,
"upsih",
"greek upsilon with hook symbol, U+03D2 NEW" },
1960{ 982,
"piv",
"greek pi symbol, U+03D6 ISOgrk3" },
1962{ 8194,
"ensp",
"en space, U+2002 ISOpub" },
1963{ 8195,
"emsp",
"em space, U+2003 ISOpub" },
1964{ 8201,
"thinsp",
"thin space, U+2009 ISOpub" },
1965{ 8204,
"zwnj",
"zero width non-joiner, U+200C NEW RFC 2070" },
1966{ 8205,
"zwj",
"zero width joiner, U+200D NEW RFC 2070" },
1967{ 8206,
"lrm",
"left-to-right mark, U+200E NEW RFC 2070" },
1968{ 8207,
"rlm",
"right-to-left mark, U+200F NEW RFC 2070" },
1969{ 8211,
"ndash",
"en dash, U+2013 ISOpub" },
1970{ 8212,
"mdash",
"em dash, U+2014 ISOpub" },
1971{ 8216,
"lsquo",
"left single quotation mark, U+2018 ISOnum" },
1972{ 8217,
"rsquo",
"right single quotation mark, U+2019 ISOnum" },
1973{ 8218,
"sbquo",
"single low-9 quotation mark, U+201A NEW" },
1974{ 8220,
"ldquo",
"left double quotation mark, U+201C ISOnum" },
1975{ 8221,
"rdquo",
"right double quotation mark, U+201D ISOnum" },
1976{ 8222,
"bdquo",
"double low-9 quotation mark, U+201E NEW" },
1977{ 8224,
"dagger",
"dagger, U+2020 ISOpub" },
1978{ 8225,
"Dagger",
"double dagger, U+2021 ISOpub" },
1980{ 8226,
"bull",
"bullet = black small circle, U+2022 ISOpub" },
1981{ 8230,
"hellip",
"horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1983{ 8240,
"permil",
"per mille sign, U+2030 ISOtech" },
1985{ 8242,
"prime",
"prime = minutes = feet, U+2032 ISOtech" },
1986{ 8243,
"Prime",
"double prime = seconds = inches, U+2033 ISOtech" },
1988{ 8249,
"lsaquo",
"single left-pointing angle quotation mark, U+2039 ISO proposed" },
1989{ 8250,
"rsaquo",
"single right-pointing angle quotation mark, U+203A ISO proposed" },
1991{ 8254,
"oline",
"overline = spacing overscore, U+203E NEW" },
1992{ 8260,
"frasl",
"fraction slash, U+2044 NEW" },
1994{ 8364,
"euro",
"euro sign, U+20AC NEW" },
1996{ 8465,
"image",
"blackletter capital I = imaginary part, U+2111 ISOamso" },
1997{ 8472,
"weierp",
"script capital P = power set = Weierstrass p, U+2118 ISOamso" },
1998{ 8476,
"real",
"blackletter capital R = real part symbol, U+211C ISOamso" },
1999{ 8482,
"trade",
"trade mark sign, U+2122 ISOnum" },
2000{ 8501,
"alefsym",
"alef symbol = first transfinite cardinal, U+2135 NEW" },
2001{ 8592,
"larr",
"leftwards arrow, U+2190 ISOnum" },
2002{ 8593,
"uarr",
"upwards arrow, U+2191 ISOnum" },
2003{ 8594,
"rarr",
"rightwards arrow, U+2192 ISOnum" },
2004{ 8595,
"darr",
"downwards arrow, U+2193 ISOnum" },
2005{ 8596,
"harr",
"left right arrow, U+2194 ISOamsa" },
2006{ 8629,
"crarr",
"downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
2007{ 8656,
"lArr",
"leftwards double arrow, U+21D0 ISOtech" },
2008{ 8657,
"uArr",
"upwards double arrow, U+21D1 ISOamsa" },
2009{ 8658,
"rArr",
"rightwards double arrow, U+21D2 ISOtech" },
2010{ 8659,
"dArr",
"downwards double arrow, U+21D3 ISOamsa" },
2011{ 8660,
"hArr",
"left right double arrow, U+21D4 ISOamsa" },
2013{ 8704,
"forall",
"for all, U+2200 ISOtech" },
2014{ 8706,
"part",
"partial differential, U+2202 ISOtech" },
2015{ 8707,
"exist",
"there exists, U+2203 ISOtech" },
2016{ 8709,
"empty",
"empty set = null set = diameter, U+2205 ISOamso" },
2017{ 8711,
"nabla",
"nabla = backward difference, U+2207 ISOtech" },
2018{ 8712,
"isin",
"element of, U+2208 ISOtech" },
2019{ 8713,
"notin",
"not an element of, U+2209 ISOtech" },
2020{ 8715,
"ni",
"contains as member, U+220B ISOtech" },
2021{ 8719,
"prod",
"n-ary product = product sign, U+220F ISOamsb" },
2022{ 8721,
"sum",
"n-ary summation, U+2211 ISOamsb" },
2023{ 8722,
"minus",
"minus sign, U+2212 ISOtech" },
2024{ 8727,
"lowast",
"asterisk operator, U+2217 ISOtech" },
2025{ 8730,
"radic",
"square root = radical sign, U+221A ISOtech" },
2026{ 8733,
"prop",
"proportional to, U+221D ISOtech" },
2027{ 8734,
"infin",
"infinity, U+221E ISOtech" },
2028{ 8736,
"ang",
"angle, U+2220 ISOamso" },
2029{ 8743,
"and",
"logical and = wedge, U+2227 ISOtech" },
2030{ 8744,
"or",
"logical or = vee, U+2228 ISOtech" },
2031{ 8745,
"cap",
"intersection = cap, U+2229 ISOtech" },
2032{ 8746,
"cup",
"union = cup, U+222A ISOtech" },
2033{ 8747,
"int",
"integral, U+222B ISOtech" },
2034{ 8756,
"there4",
"therefore, U+2234 ISOtech" },
2035{ 8764,
"sim",
"tilde operator = varies with = similar to, U+223C ISOtech" },
2036{ 8773,
"cong",
"approximately equal to, U+2245 ISOtech" },
2037{ 8776,
"asymp",
"almost equal to = asymptotic to, U+2248 ISOamsr" },
2038{ 8800,
"ne",
"not equal to, U+2260 ISOtech" },
2039{ 8801,
"equiv",
"identical to, U+2261 ISOtech" },
2040{ 8804,
"le",
"less-than or equal to, U+2264 ISOtech" },
2041{ 8805,
"ge",
"greater-than or equal to, U+2265 ISOtech" },
2042{ 8834,
"sub",
"subset of, U+2282 ISOtech" },
2043{ 8835,
"sup",
"superset of, U+2283 ISOtech" },
2044{ 8836,
"nsub",
"not a subset of, U+2284 ISOamsn" },
2045{ 8838,
"sube",
"subset of or equal to, U+2286 ISOtech" },
2046{ 8839,
"supe",
"superset of or equal to, U+2287 ISOtech" },
2047{ 8853,
"oplus",
"circled plus = direct sum, U+2295 ISOamsb" },
2048{ 8855,
"otimes",
"circled times = vector product, U+2297 ISOamsb" },
2049{ 8869,
"perp",
"up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
2050{ 8901,
"sdot",
"dot operator, U+22C5 ISOamsb" },
2051{ 8968,
"lceil",
"left ceiling = apl upstile, U+2308 ISOamsc" },
2052{ 8969,
"rceil",
"right ceiling, U+2309 ISOamsc" },
2053{ 8970,
"lfloor",
"left floor = apl downstile, U+230A ISOamsc" },
2054{ 8971,
"rfloor",
"right floor, U+230B ISOamsc" },
2055{ 9001,
"lang",
"left-pointing angle bracket = bra, U+2329 ISOtech" },
2056{ 9002,
"rang",
"right-pointing angle bracket = ket, U+232A ISOtech" },
2057{ 9674,
"loz",
"lozenge, U+25CA ISOpub" },
2059{ 9824,
"spades",
"black spade suit, U+2660 ISOpub" },
2060{ 9827,
"clubs",
"black club suit = shamrock, U+2663 ISOpub" },
2061{ 9829,
"hearts",
"black heart suit = valentine, U+2665 ISOpub" },
2062{ 9830,
"diams",
"black diamond suit, U+2666 ISOpub" },
2075#define growBuffer(buffer) { \
2077 buffer##_size *= 2; \
2078 tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2079 if (tmp == NULL) { \
2080 htmlErrMemory(ctxt, "growing buffer\n"); \
2097const htmlEntityDesc *
2101 for (
i = 0;
i < (
sizeof(html40EntitiesTable)/
2102 sizeof(html40EntitiesTable[0]));
i++) {
2104 return((htmlEntityDescPtr) &html40EntitiesTable[
i]);
2120const htmlEntityDesc *
2121htmlEntityValueLookup(
unsigned int value) {
2124 for (
i = 0;
i < (
sizeof(html40EntitiesTable)/
2125 sizeof(html40EntitiesTable[0]));
i++) {
2129 return((htmlEntityDescPtr) &html40EntitiesTable[
i]);
2151UTF8ToHtml(
unsigned char*
out,
int *outlen,
2152 const unsigned char*
in,
int *inlen) {
2154 const unsigned char* outend;
2155 const unsigned char* outstart =
out;
2156 const unsigned char* instart =
in;
2157 const unsigned char* inend;
2170 inend =
in + (*inlen);
2171 outend =
out + (*outlen);
2172 while (
in < inend) {
2175 else if (
d < 0xC0) {
2177 *outlen =
out - outstart;
2180 }
else if (
d < 0xE0) {
c=
d & 0x1F;
trailing= 1; }
2185 *outlen =
out - outstart;
2195 if ((
in >= inend) || (((
d= *
in++) & 0xC0) != 0x80))
2203 if (
out + 1 >= outend)
2208 const htmlEntityDesc * ent;
2216 ent = htmlEntityValueLookup(
c);
2224 if (
out + 2 +
len >= outend)
2233 *outlen =
out - outstart;
2255htmlEncodeEntities(
unsigned char*
out,
int *outlen,
2256 const unsigned char*
in,
int *inlen,
int quoteChar) {
2258 const unsigned char* outend;
2259 const unsigned char* outstart =
out;
2260 const unsigned char* instart =
in;
2261 const unsigned char* inend;
2267 outend =
out + (*outlen);
2268 inend =
in + (*inlen);
2269 while (
in < inend) {
2272 else if (
d < 0xC0) {
2274 *outlen =
out - outstart;
2277 }
else if (
d < 0xE0) {
c=
d & 0x1F;
trailing= 1; }
2282 *outlen =
out - outstart;
2291 if (((
d= *
in++) & 0xC0) != 0x80) {
2292 *outlen =
out - outstart;
2301 if ((
c < 0x80) && (
c != (
unsigned int) quoteChar) &&
2302 (
c !=
'&') && (
c !=
'<') && (
c !=
'>')) {
2307 const htmlEntityDesc * ent;
2315 ent = htmlEntityValueLookup(
c);
2323 if (
out + 2 +
len > outend)
2332 *outlen =
out - outstart;
2343#ifdef LIBXML_PUSH_ENABLED
2351static htmlParserInputPtr
2352htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2353 htmlParserInputPtr
input;
2357 htmlErrMemory(ctxt,
"couldn't allocate a new input stream\n");
2371 input->consumed = 0;
2389static const char *allowPCData[] = {
2390 "a",
"abbr",
"acronym",
"address",
"applet",
"b",
"bdo",
"big",
2391 "blockquote",
"body",
"button",
"caption",
"center",
"cite",
"code",
2392 "dd",
"del",
"dfn",
"div",
"dt",
"em",
"font",
"form",
"h1",
"h2",
2393 "h3",
"h4",
"h5",
"h6",
"i",
"iframe",
"ins",
"kbd",
"label",
"legend",
2394 "li",
"noframes",
"noscript",
"object",
"p",
"pre",
"q",
"s",
"samp",
2395 "small",
"span",
"strike",
"strong",
"td",
"th",
"tt",
"u",
"var"
2418 if (
CUR == 0)
return(1);
2419 if (
CUR !=
'<')
return(0);
2420 if (ctxt->name ==
NULL)
2437 if (ctxt->node ==
NULL)
return(0);
2440 lastChild = lastChild->
prev;
2441 if (lastChild ==
NULL) {
2443 (ctxt->node->content !=
NULL))
return(0);
2446 for (
i = 0;
i <
sizeof(allowPCData)/
sizeof(allowPCData[0]);
i++ ) {
2456 for (
i = 0;
i <
sizeof(allowPCData)/
sizeof(allowPCData[0]);
i++ ) {
2484 htmlErrMemory(
NULL,
"HTML document creation failed\n");
2498 cur->standalone = 1;
2499 cur->compression = 0;
2505 if ((ExternalID !=
NULL) ||
2524 if ((URI ==
NULL) && (ExternalID ==
NULL))
2525 return(htmlNewDocNoDtD(
2526 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
2527 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
2529 return(htmlNewDocNoDtD(URI, ExternalID));
2549htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
2553 "Incorrectly opened comment\n",
NULL,
NULL);
2574htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2576 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2581 while ((
i < HTML_PARSER_BUFFER_SIZE) &&
2583 (
CUR ==
':') || (
CUR ==
'-') || (
CUR ==
'_') ||
2585 if ((
CUR >=
'A') && (
CUR <=
'Z')) loc[
i] =
CUR + 0x20;
2608htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2610 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2613 (
NXT(1) !=
':'))
return(
NULL);
2615 while ((
i < HTML_PARSER_BUFFER_SIZE) &&
2617 (
NXT(1+
i) ==
':') || (
NXT(1+
i) ==
'-') || (
NXT(1+
i) ==
'_'))) {
2618 if ((
NXT(1+
i) >=
'A') && (
NXT(1+
i) <=
'Z')) loc[
i] =
NXT(1+
i) + 0x20;
2619 else loc[
i] =
NXT(1+
i);
2637htmlParseName(htmlParserCtxtPtr ctxt) {
2647 in = ctxt->input->cur;
2648 if (((*
in >= 0x61) && (*
in <= 0x7A)) ||
2649 ((*
in >= 0x41) && (*
in <= 0x5A)) ||
2650 (*
in ==
'_') || (*
in ==
':')) {
2652 while (((*
in >= 0x61) && (*
in <= 0x7A)) ||
2653 ((*
in >= 0x41) && (*
in <= 0x5A)) ||
2654 ((*
in >= 0x30) && (*
in <= 0x39)) ||
2655 (*
in ==
'_') || (*
in ==
'-') ||
2656 (*
in ==
':') || (*
in ==
'.'))
2659 if (
in == ctxt->input->end)
2662 if ((*
in > 0) && (*
in < 0x80)) {
2663 count =
in - ctxt->input->cur;
2665 ctxt->input->cur =
in;
2666 ctxt->input->col +=
count;
2670 return(htmlParseNameComplex(ctxt));
2685 if ((
c ==
' ') || (
c ==
'>') || (
c ==
'/') ||
2691 while ((
c !=
' ') && (
c !=
'>') && (
c !=
'/') &&
2693 (
c ==
'.') || (
c ==
'-') ||
2694 (
c ==
'_') || (
c ==
':') ||
2697 if (
count++ > 100) {
2709 return(htmlParseNameComplex(ctxt));
2716 "unexpected change of input buffer",
NULL,
NULL);
2736htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt,
const xmlChar stop) {
2742 const htmlEntityDesc * ent;
2750 htmlErrMemory(ctxt,
"buffer allocation failed\n");
2758 while ((
CUR != 0) && (
CUR != stop)) {
2759 if ((stop == 0) && (
CUR ==
'>'))
break;
2762 if (
NXT(1) ==
'#') {
2766 c = htmlParseCharRef(ctxt);
2770 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2771 else if (
c < 0x10000)
2772 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2774 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2777 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2787 ent = htmlParseEntityRef(ctxt, &
name);
2796 }
else if (ent ==
NULL) {
2822 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2823 else if (
c < 0x10000)
2824 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2826 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2829 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2847 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2848 else if (
c < 0x10000)
2849 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2851 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2854 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2875const htmlEntityDesc *
2876htmlParseEntityRef(htmlParserCtxtPtr ctxt,
const xmlChar **
str) {
2878 const htmlEntityDesc * ent =
NULL;
2881 if ((ctxt ==
NULL) || (ctxt->input ==
NULL))
return(
NULL);
2885 name = htmlParseName(ctxt);
2888 "htmlParseEntityRef: no name\n",
NULL,
NULL);
2898 ent = htmlEntityLookup(
name);
2903 "htmlParseEntityRef: expecting ';'\n",
2926htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2931 ret = htmlParseHTMLAttribute(ctxt,
'"');
2934 "AttValue: \" expected\n",
NULL,
NULL);
2937 }
else if (
CUR ==
'\'') {
2939 ret = htmlParseHTMLAttribute(ctxt,
'\'');
2942 "AttValue: ' expected\n",
NULL,
NULL);
2949 ret = htmlParseHTMLAttribute(ctxt, 0);
2952 "AttValue: no value found\n",
NULL,
NULL);
2970htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2971 size_t len = 0, startPosition = 0;
2976 if ((
CUR !=
'"') && (
CUR !=
'\'')) {
2978 "SystemLiteral \" or ' expected\n",
NULL,
NULL);
2992 "Invalid char in SystemLiteral 0x%X\n",
CUR);
3000 "Unfinished SystemLiteral\n",
NULL,
NULL);
3022htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
3023 size_t len = 0, startPosition = 0;
3028 if ((
CUR !=
'"') && (
CUR !=
'\'')) {
3030 "PubidLiteral \" or ' expected\n",
NULL,
NULL);
3046 "Invalid char in PubidLiteral 0x%X\n",
CUR);
3055 "Unfinished PubidLiteral\n",
NULL,
NULL);
3087htmlParseScript(htmlParserCtxtPtr ctxt) {
3088 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
3095 if ((
cur ==
'<') && (
NXT(1) ==
'/')) {
3107 if (ctxt->recovery) {
3114 "Element %s embeds close tag\n",
3118 if (((
NXT(2) >=
'A') && (
NXT(2) <=
'Z')) ||
3119 ((
NXT(2) >=
'a') && (
NXT(2) <=
'z')))
3129 "Invalid char in CDATA 0x%X\n",
cur);
3131 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3133 if (ctxt->sax->cdataBlock!=
NULL) {
3137 ctxt->sax->cdataBlock(ctxt->userData,
buf, nbchar);
3138 }
else if (ctxt->sax->characters !=
NULL) {
3139 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3148 if ((nbchar != 0) && (ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3150 if (ctxt->sax->cdataBlock!=
NULL) {
3154 ctxt->sax->cdataBlock(ctxt->userData,
buf, nbchar);
3155 }
else if (ctxt->sax->characters !=
NULL) {
3156 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3174htmlParseCharDataInternal(htmlParserCtxtPtr ctxt,
int readahead) {
3175 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
3181 buf[nbchar++] = readahead;
3185 while (((
cur !=
'<') || (ctxt->token ==
'<')) &&
3186 ((
cur !=
'&') || (ctxt->token ==
'&')) &&
3190 "Invalid char in CDATA 0x%X\n",
cur);
3194 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3200 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3202 if (ctxt->keepBlanks) {
3203 if (ctxt->sax->characters !=
NULL)
3204 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3206 if (ctxt->sax->ignorableWhitespace !=
NULL)
3207 ctxt->sax->ignorableWhitespace(ctxt->userData,
3211 htmlCheckParagraph(ctxt);
3212 if (ctxt->sax->characters !=
NULL)
3213 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3220 if (
chunk > HTML_PARSER_BUFFER_SIZE) {
3238 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3240 if (ctxt->keepBlanks) {
3241 if (ctxt->sax->characters !=
NULL)
3242 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3244 if (ctxt->sax->ignorableWhitespace !=
NULL)
3245 ctxt->sax->ignorableWhitespace(ctxt->userData,
3249 htmlCheckParagraph(ctxt);
3250 if (ctxt->sax->characters !=
NULL)
3251 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3274htmlParseCharData(htmlParserCtxtPtr ctxt) {
3275 htmlParseCharDataInternal(ctxt, 0);
3296htmlParseExternalID(htmlParserCtxtPtr ctxt,
xmlChar **publicID) {
3299 if ((UPPER ==
'S') && (UPP(1) ==
'Y') &&
3300 (UPP(2) ==
'S') && (UPP(3) ==
'T') &&
3301 (UPP(4) ==
'E') && (UPP(5) ==
'M')) {
3305 "Space required after 'SYSTEM'\n",
NULL,
NULL);
3308 URI = htmlParseSystemLiteral(ctxt);
3311 "htmlParseExternalID: SYSTEM, no URI\n",
NULL,
NULL);
3313 }
else if ((UPPER ==
'P') && (UPP(1) ==
'U') &&
3314 (UPP(2) ==
'B') && (UPP(3) ==
'L') &&
3315 (UPP(4) ==
'I') && (UPP(5) ==
'C')) {
3319 "Space required after 'PUBLIC'\n",
NULL,
NULL);
3322 *publicID = htmlParsePubidLiteral(ctxt);
3323 if (*publicID ==
NULL) {
3325 "htmlParseExternalID: PUBLIC, no Public Identifier\n",
3329 if ((
CUR ==
'"') || (
CUR ==
'\'')) {
3330 URI = htmlParseSystemLiteral(ctxt);
3345htmlParsePI(htmlParserCtxtPtr ctxt) {
3348 int size = HTML_PARSER_BUFFER_SIZE;
3354 if ((
RAW ==
'<') && (
NXT(1) ==
'?')) {
3355 state = ctxt->instate;
3367 target = htmlParseName(ctxt);
3375 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3376 (ctxt->sax->processingInstruction !=
NULL))
3377 ctxt->sax->processingInstruction(ctxt->userData,
3379 ctxt->instate =
state;
3384 htmlErrMemory(ctxt,
NULL);
3385 ctxt->instate =
state;
3391 "ParsePI: PI %s space expected\n",
target,
NULL);
3395 while ((
cur != 0) && (
cur !=
'>')) {
3402 htmlErrMemory(ctxt,
NULL);
3404 ctxt->instate =
state;
3418 "Invalid char in processing instruction "
3432 "ParsePI: PI %s never end ...\n",
target,
NULL);
3439 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3440 (ctxt->sax->processingInstruction !=
NULL))
3441 ctxt->sax->processingInstruction(ctxt->userData,
3447 "PI is not started correctly",
NULL,
NULL);
3449 ctxt->instate =
state;
3462htmlParseComment(htmlParserCtxtPtr ctxt) {
3465 int size = HTML_PARSER_BUFFER_SIZE;
3475 if ((
RAW !=
'<') || (
NXT(1) !=
'!') ||
3476 (
NXT(2) !=
'-') || (
NXT(3) !=
'-'))
return;
3478 state = ctxt->instate;
3484 htmlErrMemory(ctxt,
"buffer allocation failed\n");
3485 ctxt->instate =
state;
3502 if (
q ==
'-' &&
r ==
'>') {
3509 while ((
cur != 0) &&
3511 (
r !=
'-') || (
q !=
'-'))) {
3520 if ((
q ==
'-') && (
r ==
'-') && (
cur ==
'!') && (
next ==
'>')) {
3522 "Comment incorrectly closed by '--!>'",
NULL,
NULL);
3534 htmlErrMemory(ctxt,
"growing buffer failed\n");
3535 ctxt->instate =
state;
3544 "Invalid char in comment 0x%X\n",
q);
3558 if ((ctxt->sax !=
NULL) && (ctxt->sax->comment !=
NULL) &&
3559 (!ctxt->disableSAX))
3560 ctxt->sax->comment(ctxt->userData,
buf);
3562 ctxt->instate =
state;
3568 "Comment not terminated \n<!--%.50s\n",
buf,
NULL);
3584htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3587 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
3589 "htmlParseCharRef: context error\n",
3593 if ((
CUR ==
'&') && (
NXT(1) ==
'#') &&
3594 ((
NXT(2) ==
'x') ||
NXT(2) ==
'X')) {
3596 while (
CUR !=
';') {
3597 if ((
CUR >=
'0') && (
CUR <=
'9')) {
3600 }
else if ((
CUR >=
'a') && (
CUR <=
'f')) {
3603 }
else if ((
CUR >=
'A') && (
CUR <=
'F')) {
3608 "htmlParseCharRef: missing semicolon\n",
3616 }
else if ((
CUR ==
'&') && (
NXT(1) ==
'#')) {
3618 while (
CUR !=
';') {
3619 if ((
CUR >=
'0') && (
CUR <=
'9')) {
3624 "htmlParseCharRef: missing semicolon\n",
3634 "htmlParseCharRef: invalid value\n",
NULL,
NULL);
3641 }
else if (
val >= 0x110000) {
3643 "htmlParseCharRef: value too large\n",
NULL,
NULL);
3646 "htmlParseCharRef: invalid xmlChar value %d\n",
3664htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3679 name = htmlParseName(ctxt);
3682 "htmlParseDocTypeDecl : no DOCTYPE name !\n",
3694 URI = htmlParseExternalID(ctxt, &ExternalID);
3702 "DOCTYPE improperly terminated\n",
NULL,
NULL);
3704 while ((
CUR != 0) && (
CUR !=
'>'))
3713 if ((ctxt->sax !=
NULL) && (ctxt->sax->internalSubset !=
NULL) &&
3714 (!ctxt->disableSAX))
3715 ctxt->sax->internalSubset(ctxt->userData,
name, ExternalID, URI);
3746htmlParseAttribute(htmlParserCtxtPtr ctxt,
xmlChar **
value) {
3751 name = htmlParseHTMLName(ctxt);
3754 "error parsing attribute name\n",
NULL,
NULL);
3765 val = htmlParseAttValue(ctxt);
3783htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt,
const xmlChar *
encoding) {
3786 (ctxt->options & HTML_PARSE_IGNORE_ENC))
3790 if (ctxt->input->encoding !=
NULL)
3799 if (ctxt->input->encoding !=
NULL)
3812 (ctxt->input->buf !=
NULL) &&
3813 (ctxt->input->buf->encoder ==
NULL)) {
3815 "htmlCheckEncoding: wrong encoding meta\n",
3831 "htmlCheckEncoding: unknown encoding %s\n",
3836 if ((ctxt->input->buf !=
NULL) &&
3837 (ctxt->input->buf->encoder !=
NULL) &&
3838 (ctxt->input->buf->raw !=
NULL) &&
3839 (ctxt->input->buf->buffer !=
NULL)) {
3846 processed = ctxt->input->cur - ctxt->input->base;
3852 "htmlCheckEncoding: encoder error\n",
3870htmlCheckEncoding(htmlParserCtxtPtr ctxt,
const xmlChar *attvalue) {
3887 htmlCheckEncodingDirect(ctxt,
encoding);
3899htmlCheckMeta(htmlParserCtxtPtr ctxt,
const xmlChar **
atts) {
3910 while (att !=
NULL) {
3916 htmlCheckEncodingDirect(ctxt,
value);
3922 htmlCheckEncoding(ctxt,
content);
3947htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3958 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
3960 "htmlParseStartTag: context error\n",
NULL,
NULL);
3965 if (
CUR !=
'<')
return -1;
3969 maxatts = ctxt->maxatts;
3972 name = htmlParseHTMLName(ctxt);
3975 "htmlParseStartTag: invalid element name\n",
3978 while ((
CUR != 0) && (
CUR !=
'>') &&
3989 htmlAutoClose(ctxt,
name);
3994 htmlCheckImplied(ctxt,
name);
4002 "htmlParseStartTag: misplaced <html> tag\n",
4007 if ((ctxt->nameNr != 1) &&
4010 "htmlParseStartTag: misplaced <head> tag\n",
4017 for (indx = 0;indx < ctxt->nameNr;indx++) {
4020 "htmlParseStartTag: misplaced <body> tag\n",
4034 while ((
CUR != 0) &&
4036 ((
CUR !=
'/') || (
NXT(1) !=
'>'))) {
4038 attname = htmlParseAttribute(ctxt, &attvalue);
4039 if (attname !=
NULL) {
4044 for (
i = 0;
i < nbatts;
i += 2) {
4047 "Attribute %s redefined\n", attname,
NULL);
4048 if (attvalue !=
NULL)
4062 htmlErrMemory(ctxt,
NULL);
4063 if (attvalue !=
NULL)
4068 ctxt->maxatts = maxatts;
4069 }
else if (nbatts + 4 > maxatts) {
4074 maxatts *
sizeof(
const xmlChar *));
4076 htmlErrMemory(ctxt,
NULL);
4077 if (attvalue !=
NULL)
4083 ctxt->maxatts = maxatts;
4085 atts[nbatts++] = attname;
4086 atts[nbatts++] = attvalue;
4091 if (attvalue !=
NULL)
4095 while ((
CUR != 0) &&
4097 ((
CUR !=
'/') || (
NXT(1) !=
'>')))
4108 if (meta && (nbatts != 0))
4109 htmlCheckMeta(ctxt,
atts);
4115 htmlnamePush(ctxt,
name);
4116 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL)) {
4118 ctxt->sax->startElement(ctxt->userData,
name,
atts);
4120 ctxt->sax->startElement(ctxt->userData,
name,
NULL);
4125 for (
i = 1;
i < nbatts;
i += 2) {
4150htmlParseEndTag(htmlParserCtxtPtr ctxt)
4156 if ((
CUR !=
'<') || (
NXT(1) !=
'/')) {
4158 "htmlParseEndTag: '</' not found\n",
NULL,
NULL);
4163 name = htmlParseHTMLName(ctxt);
4172 "End tag : expected '>'\n",
NULL,
NULL);
4174 while ((
CUR != 0) && (
CUR !=
'>'))
4184 if ((ctxt->depth > 0) &&
4196 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
4202 "Unexpected end tag : %s\n",
name,
NULL);
4211 htmlAutoCloseOnClose(ctxt,
name);
4220 "Opening and ending tag mismatch: %s and %s\n",
4227 oldname = ctxt->name;
4229 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4230 ctxt->sax->endElement(ctxt->userData,
name);
4231 htmlNodeInfoPop(ctxt);
4251htmlParseReference(htmlParserCtxtPtr ctxt) {
4252 const htmlEntityDesc * ent;
4255 if (
CUR !=
'&')
return;
4257 if (
NXT(1) ==
'#') {
4261 c = htmlParseCharRef(ctxt);
4266 else if (
c < 0x800) {
out[
i++]=((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
4267 else if (
c < 0x10000) {
out[
i++]=((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
4268 else {
out[
i++]=((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
4275 htmlCheckParagraph(ctxt);
4276 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4277 ctxt->sax->characters(ctxt->userData,
out,
i);
4279 ent = htmlParseEntityRef(ctxt, &
name);
4281 htmlCheckParagraph(ctxt);
4282 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4283 ctxt->sax->characters(ctxt->userData,
BAD_CAST "&", 1);
4286 if ((ent ==
NULL) || !(ent->value > 0)) {
4287 htmlCheckParagraph(ctxt);
4288 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL)) {
4289 ctxt->sax->characters(ctxt->userData,
BAD_CAST "&", 1);
4301 {
out[
i++]=((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
4302 else if (
c < 0x10000)
4303 {
out[
i++]=((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
4305 {
out[
i++]=((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
4312 htmlCheckParagraph(ctxt);
4313 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4314 ctxt->sax->characters(ctxt->userData,
out,
i);
4328htmlParseContent(htmlParserCtxtPtr ctxt) {
4334 depth = ctxt->nameNr;
4344 if ((
CUR ==
'<') && (
NXT(1) ==
'/')) {
4345 if (htmlParseEndTag(ctxt) &&
4346 ((currentNode !=
NULL) || (ctxt->nameNr == 0))) {
4347 if (currentNode !=
NULL)
4354 else if ((
CUR ==
'<') &&
4356 (
NXT(1) ==
'_') || (
NXT(1) ==
':'))) {
4357 name = htmlParseHTMLName_nonInvasive(ctxt);
4360 "htmlParseStartTag: invalid element name\n",
4363 while ((
CUR != 0) && (
CUR !=
'>'))
4366 if (currentNode !=
NULL)
4371 if (ctxt->name !=
NULL) {
4372 if (htmlCheckAutoClose(
name, ctxt->
name) == 1) {
4373 htmlAutoClose(ctxt,
name);
4383 if ((ctxt->nameNr > 0) && (
depth >= ctxt->nameNr) &&
4395 htmlParseScript(ctxt);
4398 else if ((
CUR ==
'<') && (
NXT(1) ==
'!')) {
4402 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4403 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4404 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4407 "Misplaced DOCTYPE declaration\n",
4409 htmlParseDocTypeDecl(ctxt);
4414 else if ((
NXT(2) ==
'-') && (
NXT(3) ==
'-')) {
4415 htmlParseComment(ctxt);
4418 htmlSkipBogusComment(ctxt);
4425 else if ((
CUR ==
'<') && (
NXT(1) ==
'?')) {
4433 htmlParseElement(ctxt);
4435 else if (
CUR ==
'<') {
4436 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
4437 (ctxt->sax->characters !=
NULL))
4438 ctxt->sax->characters(ctxt->userData,
BAD_CAST "<", 1);
4446 else if (
CUR ==
'&') {
4447 htmlParseReference(ctxt);
4453 else if (
CUR == 0) {
4454 htmlAutoCloseOnEnd(ctxt);
4462 htmlParseCharData(ctxt);
4482htmlParseElement(htmlParserCtxtPtr ctxt) {
4485 const htmlElemDesc *
info;
4486 htmlParserNodeInfo node_info;
4491 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4493 "htmlParseElement: context error\n",
NULL,
NULL);
4501 if (ctxt->record_info) {
4502 node_info.begin_pos = ctxt->input->consumed +
4503 (
CUR_PTR - ctxt->input->base);
4504 node_info.begin_line = ctxt->input->line;
4507 failed = htmlParseStartTag(ctxt);
4509 if ((failed == -1) || (
name ==
NULL)) {
4527 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
4529 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4530 ctxt->sax->endElement(ctxt->userData,
name);
4539 "Couldn't find end of Start Tag %s\n",
name,
NULL);
4552 if (ctxt->record_info) {
4553 node_info.end_pos = ctxt->input->consumed +
4554 (
CUR_PTR - ctxt->input->base);
4555 node_info.end_line = ctxt->input->line;
4556 node_info.node = ctxt->node;
4566 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4567 ctxt->sax->endElement(ctxt->userData,
name);
4576 depth = ctxt->nameNr;
4578 oldptr = ctxt->input->cur;
4579 htmlParseContent(ctxt);
4580 if (oldptr==ctxt->input->cur)
break;
4581 if (ctxt->nameNr <
depth)
break;
4587 if ( currentNode !=
NULL && ctxt->record_info ) {
4588 node_info.end_pos = ctxt->input->consumed +
4589 (
CUR_PTR - ctxt->input->base);
4590 node_info.end_line = ctxt->input->line;
4591 node_info.node = ctxt->node;
4595 htmlAutoCloseOnEnd(ctxt);
4598 if (currentNode !=
NULL)
4603htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4607 if ( ctxt->node !=
NULL && ctxt->record_info ) {
4608 ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4609 (
CUR_PTR - ctxt->input->base);
4610 ctxt->nodeInfo->end_line = ctxt->input->line;
4611 ctxt->nodeInfo->node = ctxt->node;
4613 htmlNodeInfoPop(ctxt);
4616 htmlAutoCloseOnEnd(ctxt);
4632htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4634 const htmlElemDesc *
info;
4635 htmlParserNodeInfo node_info = {
NULL, 0, 0, 0, 0 };
4638 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4640 "htmlParseElementInternal: context error\n",
NULL,
NULL);
4648 if (ctxt->record_info) {
4649 node_info.begin_pos = ctxt->input->consumed +
4650 (
CUR_PTR - ctxt->input->base);
4651 node_info.begin_line = ctxt->input->line;
4654 failed = htmlParseStartTag(ctxt);
4656 if ((failed == -1) || (
name ==
NULL)) {
4674 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
4676 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4677 ctxt->sax->endElement(ctxt->userData,
name);
4686 "Couldn't find end of Start Tag %s\n",
name,
NULL);
4696 if (ctxt->record_info)
4697 htmlNodeInfoPush(ctxt, &node_info);
4698 htmlParserFinishElementParsing(ctxt);
4706 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4707 ctxt->sax->endElement(ctxt->userData,
name);
4712 if (ctxt->record_info)
4713 htmlNodeInfoPush(ctxt, &node_info);
4725htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4731 depth = ctxt->nameNr;
4741 if ((
CUR ==
'<') && (
NXT(1) ==
'/')) {
4742 if (htmlParseEndTag(ctxt) &&
4743 ((currentNode !=
NULL) || (ctxt->nameNr == 0))) {
4744 if (currentNode !=
NULL)
4748 depth = ctxt->nameNr;
4753 else if ((
CUR ==
'<') &&
4755 (
NXT(1) ==
'_') || (
NXT(1) ==
':'))) {
4756 name = htmlParseHTMLName_nonInvasive(ctxt);
4759 "htmlParseStartTag: invalid element name\n",
4762 while ((
CUR == 0) && (
CUR !=
'>'))
4765 htmlParserFinishElementParsing(ctxt);
4766 if (currentNode !=
NULL)
4770 depth = ctxt->nameNr;
4774 if (ctxt->name !=
NULL) {
4775 if (htmlCheckAutoClose(
name, ctxt->
name) == 1) {
4776 htmlAutoClose(ctxt,
name);
4786 if ((ctxt->nameNr > 0) && (
depth >= ctxt->nameNr) &&
4789 htmlParserFinishElementParsing(ctxt);
4793 depth = ctxt->nameNr;
4802 htmlParseScript(ctxt);
4805 else if ((
CUR ==
'<') && (
NXT(1) ==
'!')) {
4809 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4810 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4811 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4814 "Misplaced DOCTYPE declaration\n",
4816 htmlParseDocTypeDecl(ctxt);
4821 else if ((
NXT(2) ==
'-') && (
NXT(3) ==
'-')) {
4822 htmlParseComment(ctxt);
4825 htmlSkipBogusComment(ctxt);
4832 else if ((
CUR ==
'<') && (
NXT(1) ==
'?')) {
4840 htmlParseElementInternal(ctxt);
4844 depth = ctxt->nameNr;
4846 else if (
CUR ==
'<') {
4847 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
4848 (ctxt->sax->characters !=
NULL))
4849 ctxt->sax->characters(ctxt->userData,
BAD_CAST "<", 1);
4857 else if (
CUR ==
'&') {
4858 htmlParseReference(ctxt);
4864 else if (
CUR == 0) {
4865 htmlAutoCloseOnEnd(ctxt);
4873 htmlParseCharData(ctxt);
4889__htmlParseContent(
void *ctxt) {
4891 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4906htmlParseDocument(htmlParserCtxtPtr ctxt) {
4913 htmlDefaultSAXHandlerInit();
4915 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4917 "htmlParseDocument: context error\n",
NULL,
NULL);
4921 ctxt->linenumbers = 1;
4926 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4930 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
4952 "Document is empty\n",
NULL,
NULL);
4955 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4956 ctxt->sax->startDocument(ctxt->userData);
4962 while (((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4963 (
NXT(2) ==
'-') && (
NXT(3) ==
'-')) ||
4964 ((
CUR ==
'<') && (
NXT(1) ==
'?'))) {
4965 htmlParseComment(ctxt);
4975 if ((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4976 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4977 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4978 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4980 htmlParseDocTypeDecl(ctxt);
4987 while (((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4988 (
NXT(2) ==
'-') && (
NXT(3) ==
'-')) ||
4989 ((
CUR ==
'<') && (
NXT(1) ==
'?'))) {
4990 htmlParseComment(ctxt);
4998 htmlParseContentInternal(ctxt);
5004 htmlAutoCloseOnEnd(ctxt);
5010 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5011 ctxt->sax->endDocument(ctxt->userData);
5013 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc !=
NULL)) {
5016 ctxt->myDoc->intSubset =
5018 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
5019 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
5021 if (! ctxt->wellFormed)
return(-1);
5042htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
5044 htmlSAXHandler *
sax;
5046 if (ctxt ==
NULL)
return(-1);
5047 memset(ctxt, 0,
sizeof(htmlParserCtxt));
5050 if (ctxt->dict ==
NULL) {
5051 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5054 sax = (htmlSAXHandler *)
xmlMalloc(
sizeof(htmlSAXHandler));
5056 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5062 ctxt->inputTab = (htmlParserInputPtr *)
5063 xmlMalloc(5 *
sizeof(htmlParserInputPtr));
5064 if (ctxt->inputTab ==
NULL) {
5065 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5074 ctxt->version =
NULL;
5075 ctxt->encoding =
NULL;
5076 ctxt->standalone = -1;
5080 ctxt->nodeTab = (htmlNodePtr *)
xmlMalloc(10 *
sizeof(htmlNodePtr));
5081 if (ctxt->nodeTab ==
NULL) {
5082 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5097 if (ctxt->nameTab ==
NULL) {
5098 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5114 ctxt->nodeInfoTab =
NULL;
5115 ctxt->nodeInfoNr = 0;
5116 ctxt->nodeInfoMax = 0;
5119 xmlSAX2InitHtmlDefaultSAXHandler(
sax);
5121 ctxt->userData = ctxt;
5123 ctxt->wellFormed = 1;
5124 ctxt->replaceEntities = 0;
5128 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
5129 ctxt->vctxt.userData = ctxt;
5132 ctxt->record_info = 0;
5134 ctxt->checkIndex = 0;
5135 ctxt->catalogs =
NULL;
5149htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
5163htmlNewParserCtxt(
void)
5169 htmlErrMemory(
NULL,
"NewParserCtxt: out of memory\n");
5173 if (htmlInitParserCtxt(ctxt) < 0) {
5174 htmlFreeParserCtxt(ctxt);
5190htmlCreateMemoryParserCtxt(
const char *
buffer,
int size) {
5200 ctxt = htmlNewParserCtxt();
5233static htmlParserCtxtPtr
5236 htmlParserCtxtPtr ctxt;
5241 ctxt = htmlCreateMemoryParserCtxt((
char *)
cur,
len);
5249 if (ctxt->input->encoding !=
NULL)
5261 "Unsupported encoding %s\n",
5273 "Unsupported encoding %s\n",
5281#ifdef LIBXML_PUSH_ENABLED
5307htmlParseLookupSequence(htmlParserCtxtPtr ctxt,
xmlChar first,
5311 htmlParserInputPtr
in;
5314 char valdellim = 0x0;
5324 if (ctxt->checkIndex >
base) {
5325 base = ctxt->checkIndex;
5327 invalue = ctxt->hasPErefs & 1 ? 1 : 0;
5344 if (ignoreattrval) {
5356 }
else if (invalue) {
5364 }
else if (
next != 0) {
5368 ctxt->checkIndex = 0;
5372 "HPP: lookup '%c' found at %d\n",
5374 else if (third == 0)
5376 "HPP: lookup '%c%c' found at %d\n",
5380 "HPP: lookup '%c%c%c' found at %d\n",
5383 return (
base - (
in->cur -
in->base));
5386 ctxt->checkIndex =
base;
5389 ctxt->hasPErefs |= 1;
5391 ctxt->hasPErefs &= ~1;
5395 "HPP: lookup '%c' failed\n",
first);
5396 else if (third == 0)
5398 "HPP: lookup '%c%c' failed\n",
first,
next);
5401 "HPP: lookup '%c%c%c' failed\n",
first,
next,
5422htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
5428 mark = htmlParseLookupSequence(ctxt,
'-',
'-', 0, 0);
5430 (
NXT(mark+2) ==
'>') ||
5431 ((
NXT(mark+2) ==
'!') && (
NXT(mark+3) ==
'>'))) {
5434 ctxt->checkIndex =
cur + mark + 1;
5450htmlParseTryOrFinish(htmlParserCtxtPtr ctxt,
int terminate) {
5452 htmlParserInputPtr
in;
5456 htmlParserNodeInfo node_info;
5459 switch (ctxt->instate) {
5462 "HPP: try EOF\n");
break;
5465 "HPP: try START\n");
break;
5468 "HPP: try MISC\n");
break;
5471 "HPP: try COMMENT\n");
break;
5474 "HPP: try PROLOG\n");
break;
5477 "HPP: try START_TAG\n");
break;
5480 "HPP: try CONTENT\n");
break;
5483 "HPP: try CDATA_SECTION\n");
break;
5486 "HPP: try END_TAG\n");
break;
5489 "HPP: try ENTITY_DECL\n");
break;
5492 "HPP: try ENTITY_VALUE\n");
break;
5495 "HPP: try ATTRIBUTE_VALUE\n");
break;
5498 "HPP: try DTD\n");
break;
5501 "HPP: try EPILOG\n");
break;
5504 "HPP: try PI\n");
break;
5507 "HPP: try SYSTEM_LITERAL\n");
break;
5519 (
in->cur -
in->base);
5521 htmlAutoCloseOnEnd(ctxt);
5527 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5528 ctxt->sax->endDocument(ctxt->userData);
5545 switch (ctxt->instate) {
5562 (
in->cur -
in->base);
5564 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5565 ctxt->sax->setDocumentLocator(ctxt->userData,
5567 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5568 (!ctxt->disableSAX))
5569 ctxt->sax->startDocument(ctxt->userData);
5573 if ((
cur ==
'<') && (
next ==
'!') &&
5574 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5575 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5576 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5579 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5583 "HPP: Parsing internal subset\n");
5585 htmlParseDocTypeDecl(ctxt);
5589 "HPP: entering PROLOG\n");
5595 "HPP: entering MISC\n");
5605 (
in->cur -
in->base);
5623 if ((
cur ==
'<') && (
next ==
'!') &&
5624 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5625 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5629 "HPP: Parsing Comment\n");
5631 htmlParseComment(ctxt);
5633 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5635 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5639 "HPP: Parsing PI\n");
5643 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5644 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5645 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5646 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5649 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5653 "HPP: Parsing internal subset\n");
5655 htmlParseDocTypeDecl(ctxt);
5659 "HPP: entering PROLOG\n");
5661 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5668 "HPP: entering START_TAG\n");
5678 (
in->cur -
in->base);
5683 if ((
cur ==
'<') && (
next ==
'!') &&
5684 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5685 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5689 "HPP: Parsing Comment\n");
5691 htmlParseComment(ctxt);
5693 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5695 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5699 "HPP: Parsing PI\n");
5703 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5710 "HPP: entering START_TAG\n");
5719 (
in->cur -
in->base);
5724 htmlParseCharData(ctxt);
5730 if ((
cur ==
'<') && (
next ==
'!') &&
5731 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5732 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5736 "HPP: Parsing Comment\n");
5738 htmlParseComment(ctxt);
5740 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5742 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5746 "HPP: Parsing PI\n");
5750 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5755 ctxt->wellFormed = 0;
5759 "HPP: entering EOF\n");
5761 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5762 ctxt->sax->endDocument(ctxt->userData);
5769 const htmlElemDesc *
info;
5792 "HPP: entering CONTENT\n");
5798 ctxt->checkIndex = 0;
5801 "HPP: entering END_TAG\n");
5806 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5810 if (ctxt->record_info) {
5811 node_info.begin_pos = ctxt->input->consumed +
5812 (
CUR_PTR - ctxt->input->base);
5813 node_info.begin_line = ctxt->input->line;
5817 failed = htmlParseStartTag(ctxt);
5819 if ((failed == -1) ||
5838 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
5840 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
5841 ctxt->sax->endElement(ctxt->userData,
name);
5846 "HPP: entering CONTENT\n");
5855 "Couldn't find end of Start Tag %s\n",
5866 if (ctxt->record_info)
5867 htmlNodeInfoPush(ctxt, &node_info);
5872 "HPP: entering CONTENT\n");
5881 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
5882 ctxt->sax->endElement(ctxt->userData,
name);
5886 if (ctxt->record_info)
5887 htmlNodeInfoPush(ctxt, &node_info);
5892 "HPP: entering CONTENT\n");
5902 if (ctxt->token != 0) {
5903 chr[0] = (
xmlChar) ctxt->token;
5904 htmlCheckParagraph(ctxt);
5905 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
5906 ctxt->sax->characters(ctxt->userData, chr, 1);
5908 ctxt->checkIndex = 0;
5912 if ((
cur !=
'<') && (
cur !=
'&')) {
5913 if (ctxt->sax !=
NULL) {
5916 if (ctxt->keepBlanks) {
5917 if (ctxt->sax->characters !=
NULL)
5918 ctxt->sax->characters(
5919 ctxt->userData, chr, 1);
5921 if (ctxt->sax->ignorableWhitespace !=
NULL)
5922 ctxt->sax->ignorableWhitespace(
5923 ctxt->userData, chr, 1);
5926 htmlCheckParagraph(ctxt);
5927 if (ctxt->sax->characters !=
NULL)
5928 ctxt->sax->characters(
5929 ctxt->userData, chr, 1);
5933 ctxt->checkIndex = 0;
5951 idx = htmlParseLookupSequence(ctxt,
'<',
'/', 0, 0);
5958 htmlParseScript(ctxt);
5959 if ((
cur ==
'<') && (
next ==
'/')) {
5961 ctxt->checkIndex = 0;
5964 "HPP: entering END_TAG\n");
5968 }
else if ((
cur ==
'<') && (
next ==
'!')) {
5972 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5973 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5974 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5977 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5980 "Misplaced DOCTYPE declaration\n",
5982 htmlParseDocTypeDecl(ctxt);
5983 }
else if ((
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5985 (htmlParseLookupCommentEnd(ctxt) < 0))
5989 "HPP: Parsing Comment\n");
5991 htmlParseComment(ctxt);
5995 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5997 htmlSkipBogusComment(ctxt);
5999 }
else if ((
cur ==
'<') && (
next ==
'?')) {
6001 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
6005 "HPP: Parsing PI\n");
6009 }
else if ((
cur ==
'<') && (
next ==
'!') && (
avail < 4)) {
6011 }
else if ((
cur ==
'<') && (
next ==
'/')) {
6013 ctxt->checkIndex = 0;
6016 "HPP: entering END_TAG\n");
6023 ctxt->checkIndex = 0;
6026 "HPP: entering START_TAG\n");
6029 }
else if (
cur ==
'<') {
6030 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
6031 (ctxt->sax->characters !=
NULL))
6032 ctxt->sax->characters(ctxt->userData,
6043 (htmlParseLookupSequence(ctxt,
'<', 0, 0, 0) < 0))
6045 ctxt->checkIndex = 0;
6048 "HPP: Parsing char data\n");
6051 (
cur !=
'<') && (
in->cur <
in->end)) {
6053 htmlParseReference(ctxt);
6055 htmlParseCharData(ctxt);
6067 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
6069 htmlParseEndTag(ctxt);
6070 if (ctxt->nameNr == 0) {
6075 ctxt->checkIndex = 0;
6078 "HPP: entering CONTENT\n");
6083 "HPP: internal error, state == CDATA\n",
6086 ctxt->checkIndex = 0;
6089 "HPP: entering CONTENT\n");
6094 "HPP: internal error, state == DTD\n",
6097 ctxt->checkIndex = 0;
6100 "HPP: entering CONTENT\n");
6105 "HPP: internal error, state == COMMENT\n",
6108 ctxt->checkIndex = 0;
6111 "HPP: entering CONTENT\n");
6116 "HPP: internal error, state == PI\n",
6119 ctxt->checkIndex = 0;
6122 "HPP: entering CONTENT\n");
6127 "HPP: internal error, state == ENTITY_DECL\n",
6130 ctxt->checkIndex = 0;
6133 "HPP: entering CONTENT\n");
6138 "HPP: internal error, state == ENTITY_VALUE\n",
6141 ctxt->checkIndex = 0;
6144 "HPP: entering DTD\n");
6149 "HPP: internal error, state == ATTRIBUTE_VALUE\n",
6152 ctxt->checkIndex = 0;
6155 "HPP: entering START_TAG\n");
6160 "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
6163 ctxt->checkIndex = 0;
6166 "HPP: entering CONTENT\n");
6171 "HPP: internal error, state == XML_PARSER_IGNORE\n",
6174 ctxt->checkIndex = 0;
6177 "HPP: entering CONTENT\n");
6182 "HPP: internal error, state == XML_PARSER_LITERAL\n",
6185 ctxt->checkIndex = 0;
6188 "HPP: entering CONTENT\n");
6196 htmlAutoCloseOnEnd(ctxt);
6202 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
6203 ctxt->sax->endDocument(ctxt->userData);
6206 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc !=
NULL) &&
6212 ctxt->myDoc->intSubset =
6214 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
6215 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
6235htmlParseChunk(htmlParserCtxtPtr ctxt,
const char *
chunk,
int size,
6237 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
6239 "htmlParseChunk: context error\n",
NULL,
NULL);
6245 size_t cur = ctxt->input->cur - ctxt->input->base;
6252 ctxt->disableSAX = 1;
6260 if ((
terminate) || (ctxt->input->buf->buffer->use > 80))
6264 if ((ctxt->input !=
NULL) && ctxt->input->buf !=
NULL) {
6270 size_t current = ctxt->input->cur - ctxt->input->base;
6288 ctxt->wellFormed = 0;
6291 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
6292 ctxt->sax->endDocument(ctxt->userData);
6321htmlCreatePushParserCtxt(htmlSAXHandlerPtr
sax,
void *user_data,
6324 htmlParserCtxtPtr ctxt;
6325 htmlParserInputPtr inputStream;
6333 ctxt = htmlNewParserCtxt();
6343 ctxt->sax = (htmlSAXHandlerPtr)
xmlMalloc(
sizeof(htmlSAXHandler));
6344 if (ctxt->sax ==
NULL) {
6349 memcpy(ctxt->sax,
sax,
sizeof(htmlSAXHandler));
6350 if (user_data !=
NULL)
6351 ctxt->userData = user_data;
6354 ctxt->directory =
NULL;
6359 inputStream = htmlNewInputStream(ctxt);
6360 if (inputStream ==
NULL) {
6367 inputStream->filename =
NULL;
6369 inputStream->filename = (
char *)
6371 inputStream->buf =
buf;
6377 (ctxt->input->buf !=
NULL)) {
6379 size_t cur = ctxt->input->cur - ctxt->input->base;
6388 ctxt->progressive = 1;
6411 htmlSAXHandlerPtr
sax,
void *userData) {
6413 htmlParserCtxtPtr ctxt;
6425 ctxt->userData = userData;
6428 htmlParseDocument(ctxt);
6432 ctxt->userData =
NULL;
6434 htmlFreeParserCtxt(ctxt);
6469 htmlParserCtxtPtr ctxt;
6470 htmlParserInputPtr inputStream;
6471 char *canonicFilename;
6478 ctxt = htmlNewParserCtxt();
6483 if (canonicFilename ==
NULL) {
6484#ifdef LIBXML_SAX1_ENABLED
6495 if (inputStream ==
NULL) {
6511 htmlCheckEncoding (ctxt,
content);
6540 htmlParserCtxtPtr ctxt;
6541 htmlSAXHandlerPtr oldsax =
NULL;
6550 ctxt->userData = userData;
6553 htmlParseDocument(ctxt);
6558 ctxt->userData =
NULL;
6560 htmlFreeParserCtxt(ctxt);
6591htmlHandleOmittedElem(
int val) {
6592 int old = htmlOmittedDefaultValue;
6594 htmlOmittedDefaultValue =
val;
6609htmlElementAllowedHere(
const htmlElemDesc*
parent,
const xmlChar* elt) {
6632htmlElementStatusHere(
const htmlElemDesc*
parent,
const htmlElemDesc* elt) {
6634 return HTML_INVALID ;
6635 if ( ! htmlElementAllowedHere(
parent, (
const xmlChar*) elt->name ) )
6636 return HTML_INVALID ;
6638 return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
6655 if ( !elt || !
attr )
6656 return HTML_INVALID ;
6658 if ( elt->attrs_req )
6659 for (
p = elt->attrs_req; *
p; ++
p)
6661 return HTML_REQUIRED ;
6663 if ( elt->attrs_opt )
6664 for (
p = elt->attrs_opt; *
p; ++
p)
6668 if (
legacy && elt->attrs_depr )
6669 for (
p = elt->attrs_depr; *
p; ++
p)
6671 return HTML_DEPRECATED ;
6673 return HTML_INVALID ;
6690htmlNodeStatus(
const htmlNodePtr
node,
int legacy) {
6692 return HTML_INVALID ;
6694 switch (
node->type ) {
6697 ? ( htmlElementAllowedHere (
6698 htmlTagLookup(
node->parent->name) ,
node->name
6699 ) ? HTML_VALID : HTML_INVALID )
6700 : htmlElementStatusHere(
6701 htmlTagLookup(
node->parent->name) ,
6702 htmlTagLookup(
node->name) )
6705 return htmlAttrAllowed(
6707 default:
return HTML_NA ;
6722#define DICT_FREE(str) \
6723 if ((str) && ((!dict) || \
6724 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
6725 xmlFree((char *)(str));
6734htmlCtxtReset(htmlParserCtxtPtr ctxt)
6752 if (ctxt->spaceTab !=
NULL) {
6753 ctxt->spaceTab[0] = -1;
6754 ctxt->space = &ctxt->spaceTab[0];
6769 ctxt->version =
NULL;
6771 ctxt->encoding =
NULL;
6773 ctxt->directory =
NULL;
6775 ctxt->extSubURI =
NULL;
6777 ctxt->extSubSystem =
NULL;
6778 if (ctxt->myDoc !=
NULL)
6782 ctxt->standalone = -1;
6783 ctxt->hasExternalSubset = 0;
6784 ctxt->hasPErefs = 0;
6790 ctxt->wellFormed = 1;
6791 ctxt->nsWellFormed = 1;
6792 ctxt->disableSAX = 0;
6794 ctxt->vctxt.userData = ctxt;
6797 ctxt->record_info = 0;
6798 ctxt->checkIndex = 0;
6803 ctxt->catalogs =
NULL;
6806 if (ctxt->attsDefault !=
NULL) {
6808 ctxt->attsDefault =
NULL;
6810 if (ctxt->attsSpecial !=
NULL) {
6812 ctxt->attsSpecial =
NULL;
6827htmlCtxtUseOptions(htmlParserCtxtPtr ctxt,
int options)
6832 if (
options & HTML_PARSE_NOWARNING) {
6833 ctxt->sax->warning =
NULL;
6834 ctxt->vctxt.warning =
NULL;
6838 if (
options & HTML_PARSE_NOERROR) {
6839 ctxt->sax->error =
NULL;
6840 ctxt->vctxt.error =
NULL;
6841 ctxt->sax->fatalError =
NULL;
6845 if (
options & HTML_PARSE_PEDANTIC) {
6852 ctxt->keepBlanks = 0;
6857 ctxt->keepBlanks = 1;
6858 if (
options & HTML_PARSE_RECOVER) {
6860 options -= HTML_PARSE_RECOVER;
6863 if (
options & HTML_PARSE_COMPACT) {
6864 ctxt->options |= HTML_PARSE_COMPACT;
6865 options -= HTML_PARSE_COMPACT;
6871 if (
options & HTML_PARSE_NODEFDTD) {
6872 ctxt->options |= HTML_PARSE_NODEFDTD;
6873 options -= HTML_PARSE_NODEFDTD;
6875 if (
options & HTML_PARSE_IGNORE_ENC) {
6876 ctxt->options |= HTML_PARSE_IGNORE_ENC;
6877 options -= HTML_PARSE_IGNORE_ENC;
6879 if (
options & HTML_PARSE_NOIMPLIED) {
6880 ctxt->options |= HTML_PARSE_NOIMPLIED;
6881 options -= HTML_PARSE_NOIMPLIED;
6883 ctxt->dictNames = 0;
6900htmlDoRead(htmlParserCtxtPtr ctxt,
const char *URL,
const char *
encoding,
6905 htmlCtxtUseOptions(ctxt,
options);
6913 if (ctxt->input->encoding !=
NULL)
6918 if ((URL !=
NULL) && (ctxt->input !=
NULL) &&
6919 (ctxt->input->filename ==
NULL))
6921 htmlParseDocument(ctxt);
6925 if ((ctxt->dictNames) &&
6927 (
ret->dict == ctxt->dict))
6948 htmlParserCtxtPtr ctxt;
6954 ctxt = htmlCreateDocParserCtxt(
cur,
NULL);
6973 htmlParserCtxtPtr ctxt;
6997 htmlParserCtxtPtr ctxt;
7003 htmlDefaultSAXHandlerInit();
7004 if (ctxt->sax !=
NULL)
7025 htmlParserCtxtPtr ctxt;
7027 htmlParserInputPtr
stream;
7037 ctxt = htmlNewParserCtxt();
7045 htmlFreeParserCtxt(ctxt);
7069 htmlParserCtxtPtr ctxt;
7080 if (ioclose !=
NULL)
7084 ctxt = htmlNewParserCtxt();
7113htmlCtxtReadDoc(htmlParserCtxtPtr ctxt,
const xmlChar *
cur,
7118 return (htmlCtxtReadMemory(ctxt, (
const char *)
cur,
xmlStrlen(
cur), URL,
7135htmlCtxtReadFile(htmlParserCtxtPtr ctxt,
const char *
filename,
7146 htmlCtxtReset(ctxt);
7171htmlCtxtReadMemory(htmlParserCtxtPtr ctxt,
const char *
buffer,
int size,
7183 htmlCtxtReset(ctxt);
7214htmlCtxtReadFd(htmlParserCtxtPtr ctxt,
int fd,
7226 htmlCtxtReset(ctxt);
7271 htmlCtxtReset(ctxt);
7276 if (ioclose !=
NULL)
XMLPUBFUN void XMLCALL xmlSAX2IgnorableWhitespace(void *ctx, const xmlChar *ch, int len)
int strcmp(const char *String1, const char *String2)
ACPI_SIZE strlen(const char *String)
_In_ uint16_t _Out_ ULONG * atts
int xmlBufResetInput(xmlBufPtr buf, xmlParserInputPtr input)
size_t xmlBufGetInputBase(xmlBufPtr buf, xmlParserInputPtr input)
int xmlBufSetInputBaseCur(xmlBufPtr buf, xmlParserInputPtr input, size_t base, size_t cur)
static const WCHAR quote[]
UINT(* handler)(MSIPACKAGE *)
__kernel_ptrdiff_t ptrdiff_t
int xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlFindCharEncodingHandler(const char *name)
@ XML_CHAR_ENCODING_UTF16BE
@ XML_CHAR_ENCODING_UCS4LE
@ XML_CHAR_ENCODING_ERROR
@ XML_CHAR_ENCODING_UCS4BE
@ XML_CHAR_ENCODING_8859_1
@ XML_CHAR_ENCODING_UTF16LE
XMLPUBFUN xmlCharEncoding XMLCALL xmlParseCharEncoding(const char *name)
XMLPUBFUN xmlCharEncoding XMLCALL xmlDetectCharEncoding(const unsigned char *in, int len)
GLint GLint GLsizei GLsizei GLsizei depth
GLuint GLuint GLsizei count
GLdouble GLdouble GLdouble r
GLdouble GLdouble GLdouble GLdouble q
GLenum GLuint GLenum GLsizei const GLchar * buf
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
GLenum GLenum GLenum input
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
void MSVCRT() terminate()
int __xmlRegisterCallbacks
#define memcpy(s1, s2, n)
struct task_struct * current
static const WCHAR desc[]
XMLPUBFUN xmlParserInputPtr XMLCALL xmlNewInputStream(xmlParserCtxtPtr ctxt)
#define IS_PUBIDCHAR_CH(c)
#define IS_ASCII_DIGIT(c)
XMLPUBFUN xmlChar XMLCALL xmlPopInput(xmlParserCtxtPtr ctxt)
XMLPUBFUN int XMLCALL xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
XMLPUBFUN int XMLCALL inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
XMLPUBFUN int XMLCALL xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
XMLPUBFUN xmlParserInputPtr XMLCALL inputPop(xmlParserCtxtPtr ctxt)
XMLPUBFUN xmlNodePtr XMLCALL nodePop(xmlParserCtxtPtr ctxt)
#define IS_ASCII_LETTER(c)
XMLPUBFUN xmlParserCtxtPtr XMLCALL xmlCreateMemoryParserCtxt(const char *buffer, int size)
XMLPUBFUN void XMLCALL xmlFreeInputStream(xmlParserInputPtr input)
static unsigned __int64 next
XMLPUBFUN const xmlChar *XMLCALL xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len)
XMLPUBFUN xmlDictPtr XMLCALL xmlDictCreate(void)
XMLPUBVAR xmlMallocFunc xmlMallocAtomic
XMLPUBVAR int xmlLineNumbersDefaultValue
XMLPUBVAR xmlMallocFunc xmlMalloc
XMLPUBVAR int xmlKeepBlanksDefaultValue
XMLPUBVAR xmlRegisterNodeFunc xmlRegisterNodeDefaultValue
XMLPUBVAR xmlFreeFunc xmlFree
XMLPUBVAR void * xmlGenericErrorContext
XMLPUBVAR xmlReallocFunc xmlRealloc
XMLPUBVAR xmlSAXLocator xmlDefaultSAXLocator
XMLPUBVAR xmlSAXHandlerV1 xmlDefaultSAXHandler
XMLPUBVAR xmlGenericErrorFunc xmlGenericError
XMLPUBFUN void XMLCALL xmlHashFree(xmlHashTablePtr table, xmlHashDeallocator f)
XMLPUBFUN void XMLCALL xmlHashDefaultDeallocator(void *entry, const xmlChar *name)
@ XML_PARSER_PUBLIC_LITERAL
@ XML_PARSER_SYSTEM_LITERAL
@ XML_PARSER_ATTRIBUTE_VALUE
@ XML_PARSER_ENTITY_VALUE
@ XML_PARSER_CDATA_SECTION
XMLPUBFUN xmlParserInputPtr XMLCALL xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc)
XMLPUBFUN void XMLCALL xmlInitParser(void)
XMLPUBFUN int XMLCALL xmlParserInputGrow(xmlParserInputPtr in, int len)
XMLPUBFUN void XMLCALL xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
XMLPUBFUN void XMLCALL xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, const xmlParserNodeInfoPtr info)
XMLPUBFUN xmlParserInputPtr XMLCALL xmlLoadExternalEntity(const char *URL, const char *ID, xmlParserCtxtPtr ctxt)
XMLPUBFUN void XMLCALL xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
XMLPUBFUN xmlDtdPtr XMLCALL xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID)
XMLPUBFUN xmlNodePtr XMLCALL xmlGetLastChild(const xmlNode *parent)
XMLPUBFUN void XMLCALL xmlFreeDoc(xmlDocPtr cur)
XMLPUBFUN size_t XMLCALL xmlBufUse(const xmlBufPtr buf)
XMLPUBFUN int XMLCALL xmlNodeIsText(const xmlNode *node)
XMLPUBFUN xmlDtdPtr XMLCALL xmlGetIntSubset(const xmlDoc *doc)
xmlParserInput * xmlParserInputPtr
XMLPUBFUN size_t XMLCALL xmlBufShrink(xmlBufPtr buf, size_t len)
xmlParserCtxt * xmlParserCtxtPtr
XMLPUBFUN xmlChar *XMLCALL xmlBufContent(const xmlBuf *buf)
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int blank_chars)
#define growBuffer(buffer, n)
#define COPY_BUF(l, b, i, v)
wchar_t const *const size_t const buffer_size
struct _xmlDictEntry * dict
const xmlChar * ExternalID
xmlParserInputState instate
static int processed(const type_t *type)
XMLPUBFUN xmlChar *XMLCALL xmlCanonicPath(const xmlChar *path)
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
XMLPUBFUN char *XMLCALL xmlParserGetDirectory(const char *filename)
XMLPUBFUN void XMLCALL xmlFreeParserInputBuffer(xmlParserInputBufferPtr in)
XMLPUBFUN xmlParserInputBufferPtr XMLCALL xmlAllocParserInputBuffer(xmlCharEncoding enc)
XMLPUBFUN xmlParserInputBufferPtr XMLCALL xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc)
int(XMLCALL * xmlInputReadCallback)(void *context, char *buffer, int len)
XMLPUBFUN xmlParserInputBufferPtr XMLCALL xmlParserInputBufferCreateIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc)
int(XMLCALL * xmlInputCloseCallback)(void *context)
XMLPUBFUN int XMLCALL xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf)
XMLPUBFUN xmlParserInputBufferPtr XMLCALL xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc)
XMLPUBFUN void XMLCDECL XMLPUBFUN void XMLCDECL XMLPUBFUN void XMLCDECL XMLPUBFUN void XMLCDECL xmlParserValidityWarning(void *ctx, const char *msg,...) LIBXML_ATTR_FORMAT(2
XMLPUBFUN void XMLCDECL XMLPUBFUN void XMLCDECL XMLPUBFUN void XMLCDECL xmlParserValidityError(void *ctx, const char *msg,...) LIBXML_ATTR_FORMAT(2
@ XML_ERR_ATTRIBUTE_NOT_FINISHED
@ XML_HTML_INCORRECTLY_OPENED_COMMENT
@ XML_ERR_ENTITYREF_SEMICOL_MISSING
@ XML_ERR_LITERAL_NOT_FINISHED
@ XML_ERR_LTSLASH_REQUIRED
@ XML_ERR_DOCTYPE_NOT_FINISHED
@ XML_ERR_PI_NOT_FINISHED
@ XML_ERR_ATTRIBUTE_REDEFINED
@ XML_ERR_TAG_NAME_MISMATCH
@ XML_ERR_INVALID_ENCODING
@ XML_ERR_INVALID_CHARREF
@ XML_HTML_STRUCURE_ERROR
@ XML_ERR_INVALID_DEC_CHARREF
@ XML_ERR_LITERAL_NOT_STARTED
@ XML_ERR_COMMENT_NOT_FINISHED
@ XML_ERR_ATTRIBUTE_WITHOUT_VALUE
@ XML_ERR_INVALID_HEX_CHARREF
@ XML_ERR_COMMENT_ABRUPTLY_ENDED
@ XML_ERR_UNSUPPORTED_ENCODING
XMLPUBFUN xmlChar *XMLCALL xmlStrndup(const xmlChar *cur, int len)
XMLPUBFUN int XMLCALL xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN xmlChar *XMLCALL xmlStrdup(const xmlChar *cur)
XMLPUBFUN const xmlChar *XMLCALL xmlStrcasestr(const xmlChar *str, const xmlChar *val)
XMLPUBFUN int XMLCALL xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN int XMLCALL xmlStrcmp(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN int XMLCALL xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len)
XMLPUBFUN int XMLCALL xmlStrlen(const xmlChar *str)
#define LIBXML_ATTR_FORMAT(fmt, args)