11#ifdef LIBXML_HTML_ENABLED
37#define HTML_MAX_NAMELEN 1000
38#define HTML_PARSER_BIG_BUFFER_SIZE 1000
39#define HTML_PARSER_BUFFER_SIZE 100
41static int htmlOmittedDefaultValue = 1;
43xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt,
int len,
45static void htmlParseComment(htmlParserCtxtPtr ctxt);
61htmlErrMemory(xmlParserCtxtPtr ctxt,
const char *
extra)
63 if ((ctxt !=
NULL) && (ctxt->disableSAX != 0) &&
75 "Memory allocation failed : %s\n",
extra);
79 NULL,
NULL, 0, 0,
"Memory allocation failed\n");
96 if ((ctxt !=
NULL) && (ctxt->disableSAX != 0) &&
103 (
const char *)
str1, (
const char *)
str2,
107 ctxt->wellFormed = 0;
123 if ((ctxt !=
NULL) && (ctxt->disableSAX != 0) &&
132 ctxt->wellFormed = 0;
151htmlnamePush(htmlParserCtxtPtr ctxt,
const xmlChar *
value)
157 if (ctxt->nameNr >= ctxt->nameMax) {
158 size_t newSize = ctxt->nameMax * 2;
162 newSize *
sizeof(ctxt->nameTab[0]));
164 htmlErrMemory(ctxt,
NULL);
168 ctxt->nameMax = newSize;
170 ctxt->nameTab[ctxt->nameNr] =
value;
172 return (ctxt->nameNr++);
183htmlnamePop(htmlParserCtxtPtr ctxt)
187 if (ctxt->nameNr <= 0)
190 if (ctxt->nameNr < 0)
192 if (ctxt->nameNr > 0)
193 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
196 ret = ctxt->nameTab[ctxt->nameNr];
197 ctxt->nameTab[ctxt->nameNr] =
NULL;
211htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *
value)
213 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
214 if (ctxt->nodeInfoMax == 0)
215 ctxt->nodeInfoMax = 5;
216 ctxt->nodeInfoMax *= 2;
217 ctxt->nodeInfoTab = (htmlParserNodeInfo *)
218 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
220 sizeof(ctxt->nodeInfoTab[0]));
221 if (ctxt->nodeInfoTab ==
NULL) {
222 htmlErrMemory(ctxt,
NULL);
226 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *
value;
227 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
228 return (ctxt->nodeInfoNr++);
239static htmlParserNodeInfo *
240htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
242 if (ctxt->nodeInfoNr <= 0)
245 if (ctxt->nodeInfoNr < 0)
247 if (ctxt->nodeInfoNr > 0)
248 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
250 ctxt->nodeInfo =
NULL;
251 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
281#define UPPER (toupper(*ctxt->input->cur))
283#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val)
285#define NXT(val) ctxt->input->cur[(val)]
287#define UPP(val) (toupper(ctxt->input->cur[(val)]))
289#define CUR_PTR ctxt->input->cur
290#define BASE_PTR ctxt->input->base
292#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
293 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
294 xmlParserShrink(ctxt)
296#define GROW if ((ctxt->progressive == 0) && \
297 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
300#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
304#define CUR (*ctxt->input->cur)
305#define NEXT xmlNextChar(ctxt)
307#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
310#define NEXTL(l) do { \
311 if (*(ctxt->input->cur) == '\n') { \
312 ctxt->input->line++; ctxt->input->col = 1; \
313 } else ctxt->input->col++; \
314 ctxt->token = 0; ctxt->input->cur += l; \
323#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
325#define COPY_BUF(l,b,i,v) \
326 if (l == 1) b[i++] = v; \
327 else i += xmlCopyChar(l,&b[i],v)
344htmlFindEncoding(xmlParserCtxtPtr ctxt) {
347 if ((ctxt ==
NULL) || (ctxt->input ==
NULL) ||
350 if ((ctxt->input->cur ==
NULL) || (ctxt->input->end ==
NULL))
353 start = ctxt->input->cur;
354 end = ctxt->input->end;
370 while (((*
cur >=
'A') && (*
cur <=
'Z')) ||
371 ((*
cur >=
'a') && (*
cur <=
'z')) ||
372 ((*
cur >=
'0') && (*
cur <=
'9')) ||
373 (*
cur ==
'-') || (*
cur ==
'_') || (*
cur ==
':') || (*
cur ==
'/'))
395htmlCurrentChar(xmlParserCtxtPtr ctxt,
int *
len) {
396 const unsigned char *
cur;
403 if (ctxt->token != 0) {
408 if (ctxt->input->end - ctxt->input->cur <
INPUT_CHUNK) {
423 if (*ctxt->input->cur < 0x80) {
425 if ((*ctxt->input->cur == 0) &&
426 (ctxt->input->cur < ctxt->input->end)) {
428 "Char 0x%X out of allowed range\n", 0);
431 return(*ctxt->input->cur);
437 guess = htmlFindEncoding(ctxt);
451 "Unsupported encoding %s", guess,
NULL);
469 cur = ctxt->input->cur;
477 avail = ctxt->input->end - ctxt->input->cur;
479 if ((
avail < 2) || ((
cur[1] & 0xc0) != 0x80))
481 if ((
c & 0xe0) == 0xe0) {
482 if ((
avail < 3) || ((
cur[2] & 0xc0) != 0x80))
484 if ((
c & 0xf0) == 0xf0) {
485 if (((
c & 0xf8) != 0xf0) ||
486 (
avail < 4) || ((
cur[3] & 0xc0) != 0x80))
490 val = (
cur[0] & 0x7) << 18;
491 val |= (
cur[1] & 0x3f) << 12;
492 val |= (
cur[2] & 0x3f) << 6;
499 val = (
cur[0] & 0xf) << 12;
500 val |= (
cur[1] & 0x3f) << 6;
508 val = (
cur[0] & 0x1f) << 6;
515 "Char 0x%X out of allowed range\n",
val);
519 if ((*ctxt->input->cur == 0) &&
520 (ctxt->input->cur < ctxt->input->end)) {
522 "Char 0x%X out of allowed range\n", 0);
528 return(*ctxt->input->cur);
535 if (ctxt->input->end - ctxt->input->cur >= 4) {
537 ctxt->input->cur[0], ctxt->input->cur[1],
538 ctxt->input->cur[2], ctxt->input->cur[3]);
543 "Input is not proper UTF-8, indicate encoding !\n",
550 return(*ctxt->input->cur);
563htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
567 if (*(ctxt->input->cur) ==
'\n') {
568 ctxt->input->line++; ctxt->input->col = 1;
569 }
else ctxt->input->col++;
571 if (*ctxt->input->cur == 0)
602#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
603#define NB_FONTSTYLE 8
604#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
606#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
608#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
609#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
610#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
611#define NB_BLOCK NB_HEADING + NB_LIST + 14
612#define FORMCTRL "input", "select", "textarea", "label", "button"
616#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
618#define LIST "ul", "ol", "dir", "menu"
622#define FLOW BLOCK,INLINE
623#define NB_FLOW NB_BLOCK + NB_INLINE
627static const char*
const html_flow[] = { FLOW,
NULL } ;
628static const char*
const html_inline[] = {
INLINE,
NULL } ;
631static const char*
const html_pcdata[] = {
NULL } ;
632#define html_cdata html_pcdata
637#define COREATTRS "id", "class", "style", "title"
638#define NB_COREATTRS 4
639#define I18N "lang", "dir"
641#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
643#define ATTRS COREATTRS,I18N,EVENTS
644#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
645#define CELLHALIGN "align", "char", "charoff"
646#define NB_CELLHALIGN 3
647#define CELLVALIGN "valign"
648#define NB_CELLVALIGN 1
650static const char*
const html_attrs[] = { ATTRS,
NULL } ;
651static const char*
const core_i18n_attrs[] = { COREATTRS, I18N,
NULL } ;
652static const char*
const core_attrs[] = { COREATTRS,
NULL } ;
653static const char*
const i18n_attrs[] = { I18N,
NULL } ;
657static const char*
const a_attrs[] = { ATTRS,
"charset",
"type",
"name",
658 "href",
"hreflang",
"rel",
"rev",
"accesskey",
"shape",
"coords",
659 "tabindex",
"onfocus",
"onblur",
NULL } ;
660static const char*
const target_attr[] = {
"target",
NULL } ;
661static const char*
const rows_cols_attr[] = {
"rows",
"cols",
NULL } ;
662static const char*
const alt_attr[] = {
"alt",
NULL } ;
663static const char*
const src_alt_attrs[] = {
"src",
"alt",
NULL } ;
664static const char*
const href_attrs[] = {
"href",
NULL } ;
665static const char*
const clear_attrs[] = {
"clear",
NULL } ;
666static const char*
const inline_p[] = {
INLINE,
"p",
NULL } ;
668static const char*
const flow_param[] = { FLOW,
"param",
NULL } ;
669static const char*
const applet_attrs[] = { COREATTRS ,
"codebase",
670 "archive",
"alt",
"name",
"height",
"width",
"align",
671 "hspace",
"vspace",
NULL } ;
672static const char*
const area_attrs[] = {
"shape",
"coords",
"href",
"nohref",
673 "tabindex",
"accesskey",
"onfocus",
"onblur",
NULL } ;
674static const char*
const basefont_attrs[] =
675 {
"id",
"size",
"color",
"face",
NULL } ;
676static const char*
const quote_attrs[] = { ATTRS,
"cite",
NULL } ;
677static const char*
const body_contents[] = { FLOW,
"ins",
"del",
NULL } ;
678static const char*
const body_attrs[] = { ATTRS,
"onload",
"onunload",
NULL } ;
679static const char*
const body_depr[] = {
"background",
"bgcolor",
"text",
680 "link",
"vlink",
"alink",
NULL } ;
681static const char*
const button_attrs[] = { ATTRS,
"name",
"value",
"type",
682 "disabled",
"tabindex",
"accesskey",
"onfocus",
"onblur",
NULL } ;
685static const char*
const col_attrs[] = { ATTRS,
"span",
"width", CELLHALIGN, CELLVALIGN,
NULL } ;
686static const char*
const col_elt[] = {
"col",
NULL } ;
687static const char*
const edit_attrs[] = { ATTRS,
"datetime",
"cite",
NULL } ;
688static const char*
const compact_attrs[] = { ATTRS,
"compact",
NULL } ;
689static const char*
const dl_contents[] = {
"dt",
"dd",
NULL } ;
690static const char*
const compact_attr[] = {
"compact",
NULL } ;
691static const char*
const label_attr[] = {
"label",
NULL } ;
692static const char*
const fieldset_contents[] = { FLOW,
"legend" } ;
693static const char*
const font_attrs[] = { COREATTRS, I18N,
"size",
"color",
"face" ,
NULL } ;
694static const char*
const form_contents[] = { HEADING,
LIST,
INLINE,
"pre",
"p",
"div",
"center",
"noscript",
"noframes",
"blockquote",
"isindex",
"hr",
"table",
"fieldset",
"address",
NULL } ;
695static const char*
const form_attrs[] = { ATTRS,
"method",
"enctype",
"accept",
"name",
"onsubmit",
"onreset",
"accept-charset",
NULL } ;
696static const char*
const frame_attrs[] = { COREATTRS,
"longdesc",
"name",
"src",
"frameborder",
"marginwidth",
"marginheight",
"noresize",
"scrolling" ,
NULL } ;
697static const char*
const frameset_attrs[] = { COREATTRS,
"rows",
"cols",
"onload",
"onunload",
NULL } ;
698static const char*
const frameset_contents[] = {
"frameset",
"frame",
"noframes",
NULL } ;
699static const char*
const head_attrs[] = { I18N,
"profile",
NULL } ;
700static const char*
const head_contents[] = {
"title",
"isindex",
"base",
"script",
"style",
"meta",
"link",
"object",
NULL } ;
701static const char*
const hr_depr[] = {
"align",
"noshade",
"size",
"width",
NULL } ;
702static const char*
const version_attr[] = {
"version",
NULL } ;
703static const char*
const html_content[] = {
"head",
"body",
"frameset",
NULL } ;
704static const char*
const iframe_attrs[] = { COREATTRS,
"longdesc",
"name",
"src",
"frameborder",
"marginwidth",
"marginheight",
"scrolling",
"align",
"height",
"width",
NULL } ;
705static const char*
const img_attrs[] = { ATTRS,
"longdesc",
"name",
"height",
"width",
"usemap",
"ismap",
NULL } ;
706static const char*
const embed_attrs[] = { COREATTRS,
"align",
"alt",
"border",
"code",
"codebase",
"frameborder",
"height",
"hidden",
"hspace",
"name",
"palette",
"pluginspace",
"pluginurl",
"src",
"type",
"units",
"vspace",
"width",
NULL } ;
707static const char*
const input_attrs[] = { ATTRS,
"type",
"name",
"value",
"checked",
"disabled",
"readonly",
"size",
"maxlength",
"src",
"alt",
"usemap",
"ismap",
"tabindex",
"accesskey",
"onfocus",
"onblur",
"onselect",
"onchange",
"accept",
NULL } ;
708static const char*
const prompt_attrs[] = { COREATTRS, I18N,
"prompt",
NULL } ;
709static const char*
const label_attrs[] = { ATTRS,
"for",
"accesskey",
"onfocus",
"onblur",
NULL } ;
710static const char*
const legend_attrs[] = { ATTRS,
"accesskey",
NULL } ;
711static const char*
const align_attr[] = {
"align",
NULL } ;
712static const char*
const link_attrs[] = { ATTRS,
"charset",
"href",
"hreflang",
"type",
"rel",
"rev",
"media",
NULL } ;
713static const char*
const map_contents[] = {
BLOCK,
"area",
NULL } ;
714static const char*
const name_attr[] = {
"name",
NULL } ;
715static const char*
const action_attr[] = {
"action",
NULL } ;
716static const char*
const blockli_elt[] = {
BLOCK,
"li",
NULL } ;
717static const char*
const meta_attrs[] = { I18N,
"http-equiv",
"name",
"scheme",
"charset",
NULL } ;
718static const char*
const content_attr[] = {
"content",
NULL } ;
719static const char*
const type_attr[] = {
"type",
NULL } ;
720static const char*
const noframes_content[] = {
"body", FLOW MODIFIER,
NULL } ;
721static const char*
const object_contents[] = { FLOW,
"param",
NULL } ;
722static const char*
const object_attrs[] = { ATTRS,
"declare",
"classid",
"codebase",
"data",
"type",
"codetype",
"archive",
"standby",
"height",
"width",
"usemap",
"name",
"tabindex",
NULL } ;
723static const char*
const object_depr[] = {
"align",
"border",
"hspace",
"vspace",
NULL } ;
724static const char*
const ol_attrs[] = {
"type",
"compact",
"start",
NULL} ;
725static const char*
const option_elt[] = {
"option",
NULL } ;
726static const char*
const optgroup_attrs[] = { ATTRS,
"disabled",
NULL } ;
727static const char*
const option_attrs[] = { ATTRS,
"disabled",
"label",
"selected",
"value",
NULL } ;
728static const char*
const param_attrs[] = {
"id",
"value",
"valuetype",
"type",
NULL } ;
729static const char*
const width_attr[] = {
"width",
NULL } ;
730static const char*
const pre_content[] = { PHRASE,
"tt",
"i",
"b",
"u",
"s",
"strike",
"a",
"br",
"script",
"map",
"q",
"span",
"bdo",
"iframe",
NULL } ;
731static const char*
const script_attrs[] = {
"charset",
"src",
"defer",
"event",
"for",
NULL } ;
732static const char*
const language_attr[] = {
"language",
NULL } ;
733static const char*
const select_content[] = {
"optgroup",
"option",
NULL } ;
734static const char*
const select_attrs[] = { ATTRS,
"name",
"size",
"multiple",
"disabled",
"tabindex",
"onfocus",
"onblur",
"onchange",
NULL } ;
735static const char*
const style_attrs[] = { I18N,
"media",
"title",
NULL } ;
736static const char*
const table_attrs[] = { ATTRS,
"summary",
"width",
"border",
"frame",
"rules",
"cellspacing",
"cellpadding",
"datapagesize",
NULL } ;
737static const char*
const table_depr[] = {
"align",
"bgcolor",
NULL } ;
738static const char*
const table_contents[] = {
"caption",
"col",
"colgroup",
"thead",
"tfoot",
"tbody",
"tr",
NULL} ;
739static const char*
const tr_elt[] = {
"tr",
NULL } ;
740static const char*
const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN,
NULL} ;
741static const char*
const th_td_depr[] = {
"nowrap",
"bgcolor",
"width",
"height",
NULL } ;
742static const char*
const th_td_attr[] = { ATTRS,
"abbr",
"axis",
"headers",
"scope",
"rowspan",
"colspan", CELLHALIGN, CELLVALIGN,
NULL } ;
743static const char*
const textarea_attrs[] = { ATTRS,
"name",
"disabled",
"readonly",
"tabindex",
"accesskey",
"onfocus",
"onblur",
"onselect",
"onchange",
NULL } ;
744static const char*
const tr_contents[] = {
"th",
"td",
NULL } ;
745static const char*
const bgcolor_attr[] = {
"bgcolor",
NULL } ;
746static const char*
const li_elt[] = {
"li",
NULL } ;
747static const char*
const ul_depr[] = {
"type",
"compact",
NULL} ;
748static const char*
const dir_attr[] = {
"dir",
NULL} ;
750#define DECL (const char**)
752static const htmlElemDesc
753html40ElementTable[] = {
754{
"a", 0, 0, 0, 0, 0, 0, 1,
"anchor ",
755 DECL html_inline ,
NULL , DECL a_attrs , DECL target_attr,
NULL
757{
"abbr", 0, 0, 0, 0, 0, 0, 1,
"abbreviated form",
760{
"acronym", 0, 0, 0, 0, 0, 0, 1,
"",
763{
"address", 0, 0, 0, 0, 0, 0, 0,
"information on author ",
766{
"applet", 0, 0, 0, 0, 1, 1, 2,
"java applet ",
769{
"area", 0, 2, 2, 1, 0, 0, 0,
"client-side image map area ",
770 EMPTY ,
NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
772{
"b", 0, 3, 0, 0, 0, 0, 1,
"bold text style",
775{
"base", 0, 2, 2, 1, 0, 0, 0,
"document base uri ",
778{
"basefont", 0, 2, 2, 1, 1, 1, 1,
"base font size " ,
781{
"bdo", 0, 0, 0, 0, 0, 0, 1,
"i18n bidi over-ride ",
782 DECL html_inline ,
NULL , DECL core_i18n_attrs,
NULL, DECL dir_attr
784{
"big", 0, 3, 0, 0, 0, 0, 1,
"large text style",
787{
"blockquote", 0, 0, 0, 0, 0, 0, 0,
"long quotation ",
790{
"body", 1, 1, 0, 0, 0, 0, 0,
"document body ",
791 DECL body_contents ,
"div" , DECL body_attrs, DECL body_depr,
NULL
793{
"br", 0, 2, 2, 1, 0, 0, 1,
"forced line break ",
796{
"button", 0, 0, 0, 0, 0, 0, 2,
"push button ",
797 DECL html_flow MODIFIER ,
NULL , DECL button_attrs,
NULL,
NULL
799{
"caption", 0, 0, 0, 0, 0, 0, 0,
"table caption ",
802{
"center", 0, 3, 0, 0, 1, 1, 0,
"shorthand for div align=center ",
805{
"cite", 0, 0, 0, 0, 0, 0, 1,
"citation",
808{
"code", 0, 0, 0, 0, 0, 0, 1,
"computer code fragment",
811{
"col", 0, 2, 2, 1, 0, 0, 0,
"table column ",
814{
"colgroup", 0, 1, 0, 0, 0, 0, 0,
"table column group ",
815 DECL col_elt ,
"col" , DECL col_attrs ,
NULL,
NULL
817{
"dd", 0, 1, 0, 0, 0, 0, 0,
"definition description ",
820{
"del", 0, 0, 0, 0, 0, 0, 2,
"deleted text ",
823{
"dfn", 0, 0, 0, 0, 0, 0, 1,
"instance definition",
826{
"dir", 0, 0, 0, 0, 1, 1, 0,
"directory list",
827 DECL blockli_elt,
"li" ,
NULL, DECL compact_attrs,
NULL
829{
"div", 0, 0, 0, 0, 0, 0, 0,
"generic language/style container",
830 DECL html_flow,
NULL, DECL html_attrs, DECL align_attr,
NULL
832{
"dl", 0, 0, 0, 0, 0, 0, 0,
"definition list ",
833 DECL dl_contents ,
"dd" , DECL html_attrs, DECL compact_attr,
NULL
835{
"dt", 0, 1, 0, 0, 0, 0, 0,
"definition term ",
838{
"em", 0, 3, 0, 0, 0, 0, 1,
"emphasis",
841{
"embed", 0, 1, 0, 0, 1, 1, 1,
"generic embedded object ",
844{
"fieldset", 0, 0, 0, 0, 0, 0, 0,
"form control group ",
845 DECL fieldset_contents ,
NULL, DECL html_attrs,
NULL,
NULL
847{
"font", 0, 3, 0, 0, 1, 1, 1,
"local change to font ",
850{
"form", 0, 0, 0, 0, 0, 0, 0,
"interactive form ",
851 DECL form_contents,
"fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
853{
"frame", 0, 2, 2, 1, 0, 2, 0,
"subwindow " ,
856{
"frameset", 0, 0, 0, 0, 0, 2, 0,
"window subdivision" ,
857 DECL frameset_contents,
"noframes" ,
NULL , DECL frameset_attrs,
NULL
859{
"h1", 0, 0, 0, 0, 0, 0, 0,
"heading ",
860 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
862{
"h2", 0, 0, 0, 0, 0, 0, 0,
"heading ",
863 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
865{
"h3", 0, 0, 0, 0, 0, 0, 0,
"heading ",
866 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
868{
"h4", 0, 0, 0, 0, 0, 0, 0,
"heading ",
869 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
871{
"h5", 0, 0, 0, 0, 0, 0, 0,
"heading ",
872 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
874{
"h6", 0, 0, 0, 0, 0, 0, 0,
"heading ",
875 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
877{
"head", 1, 1, 0, 0, 0, 0, 0,
"document head ",
880{
"hr", 0, 2, 2, 1, 0, 0, 0,
"horizontal rule " ,
883{
"html", 1, 1, 0, 0, 0, 0, 0,
"document root element ",
884 DECL html_content ,
NULL , DECL i18n_attrs, DECL version_attr,
NULL
886{
"i", 0, 3, 0, 0, 0, 0, 1,
"italic text style",
889{
"iframe", 0, 0, 0, 0, 0, 1, 2,
"inline subwindow ",
892{
"img", 0, 2, 2, 1, 0, 0, 1,
"embedded image ",
893 EMPTY,
NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
895{
"input", 0, 2, 2, 1, 0, 0, 1,
"form control ",
898{
"ins", 0, 0, 0, 0, 0, 0, 2,
"inserted text",
901{
"isindex", 0, 2, 2, 1, 1, 1, 0,
"single line prompt ",
904{
"kbd", 0, 0, 0, 0, 0, 0, 1,
"text to be entered by the user",
907{
"label", 0, 0, 0, 0, 0, 0, 1,
"form field label text ",
908 DECL html_inline MODIFIER,
NULL, DECL label_attrs ,
NULL,
NULL
910{
"legend", 0, 0, 0, 0, 0, 0, 0,
"fieldset legend ",
911 DECL html_inline,
NULL, DECL legend_attrs , DECL align_attr,
NULL
913{
"li", 0, 1, 1, 0, 0, 0, 0,
"list item ",
916{
"link", 0, 2, 2, 1, 0, 0, 0,
"a media-independent link ",
919{
"map", 0, 0, 0, 0, 0, 0, 2,
"client-side image map ",
920 DECL map_contents ,
NULL, DECL html_attrs ,
NULL, DECL name_attr
922{
"menu", 0, 0, 0, 0, 1, 1, 0,
"menu list ",
925{
"meta", 0, 2, 2, 1, 0, 0, 0,
"generic metainformation ",
928{
"noframes", 0, 0, 0, 0, 0, 2, 0,
"alternate content container for non frame-based rendering ",
929 DECL noframes_content,
"body" , DECL html_attrs,
NULL,
NULL
931{
"noscript", 0, 0, 0, 0, 0, 0, 0,
"alternate content container for non script-based rendering ",
932 DECL html_flow,
"div", DECL html_attrs,
NULL,
NULL
934{
"object", 0, 0, 0, 0, 0, 0, 2,
"generic embedded object ",
935 DECL object_contents ,
"div" , DECL object_attrs, DECL object_depr,
NULL
937{
"ol", 0, 0, 0, 0, 0, 0, 0,
"ordered list ",
938 DECL li_elt ,
"li" , DECL html_attrs, DECL ol_attrs,
NULL
940{
"optgroup", 0, 0, 0, 0, 0, 0, 0,
"option group ",
941 DECL option_elt ,
"option", DECL optgroup_attrs,
NULL, DECL label_attr
943{
"option", 0, 1, 0, 0, 0, 0, 0,
"selectable choice " ,
946{
"p", 0, 1, 0, 0, 0, 0, 0,
"paragraph ",
947 DECL html_inline,
NULL, DECL html_attrs, DECL align_attr,
NULL
949{
"param", 0, 2, 2, 1, 0, 0, 0,
"named property value ",
952{
"pre", 0, 0, 0, 0, 0, 0, 0,
"preformatted text ",
953 DECL pre_content,
NULL, DECL html_attrs, DECL width_attr,
NULL
955{
"q", 0, 0, 0, 0, 0, 0, 1,
"short inline quotation ",
958{
"s", 0, 3, 0, 0, 1, 1, 1,
"strike-through text style",
961{
"samp", 0, 0, 0, 0, 0, 0, 1,
"sample program output, scripts, etc.",
964{
"script", 0, 0, 0, 0, 0, 0, 2,
"script statements ",
965 DECL html_cdata,
NULL, DECL script_attrs, DECL language_attr, DECL type_attr
967{
"select", 0, 0, 0, 0, 0, 0, 1,
"option selector ",
968 DECL select_content,
NULL, DECL select_attrs,
NULL,
NULL
970{
"small", 0, 3, 0, 0, 0, 0, 1,
"small text style",
973{
"span", 0, 0, 0, 0, 0, 0, 1,
"generic language/style container ",
976{
"strike", 0, 3, 0, 0, 1, 1, 1,
"strike-through text",
979{
"strong", 0, 3, 0, 0, 0, 0, 1,
"strong emphasis",
982{
"style", 0, 0, 0, 0, 0, 0, 0,
"style info ",
983 DECL html_cdata,
NULL, DECL style_attrs,
NULL, DECL type_attr
985{
"sub", 0, 3, 0, 0, 0, 0, 1,
"subscript",
988{
"sup", 0, 3, 0, 0, 0, 0, 1,
"superscript ",
991{
"table", 0, 0, 0, 0, 0, 0, 0,
"",
992 DECL table_contents ,
"tr" , DECL table_attrs , DECL table_depr,
NULL
994{
"tbody", 1, 0, 0, 0, 0, 0, 0,
"table body ",
995 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
997{
"td", 0, 0, 0, 0, 0, 0, 0,
"table data cell",
998 DECL html_flow,
NULL, DECL th_td_attr, DECL th_td_depr,
NULL
1000{
"textarea", 0, 0, 0, 0, 0, 0, 1,
"multi-line text field ",
1001 DECL html_pcdata,
NULL, DECL textarea_attrs,
NULL, DECL rows_cols_attr
1003{
"tfoot", 0, 1, 0, 0, 0, 0, 0,
"table footer ",
1004 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
1006{
"th", 0, 1, 0, 0, 0, 0, 0,
"table header cell",
1007 DECL html_flow,
NULL, DECL th_td_attr, DECL th_td_depr,
NULL
1009{
"thead", 0, 1, 0, 0, 0, 0, 0,
"table header ",
1010 DECL tr_elt ,
"tr" , DECL talign_attrs,
NULL,
NULL
1012{
"title", 0, 0, 0, 0, 0, 0, 0,
"document title ",
1015{
"tr", 0, 0, 0, 0, 0, 0, 0,
"table row ",
1016 DECL tr_contents ,
"td" , DECL talign_attrs, DECL bgcolor_attr,
NULL
1018{
"tt", 0, 3, 0, 0, 0, 0, 1,
"teletype or monospaced text style",
1021{
"u", 0, 3, 0, 0, 1, 1, 1,
"underlined text style",
1024{
"ul", 0, 0, 0, 0, 0, 0, 0,
"unordered list ",
1025 DECL li_elt ,
"li" , DECL html_attrs, DECL ul_depr,
NULL
1027{
"var", 0, 0, 0, 0, 0, 0, 1,
"instance of a variable or program argument",
1035} htmlStartCloseEntry;
1040static const htmlStartCloseEntry htmlStartClose[] = {
1042 {
"a",
"fieldset" },
1046 {
"address",
"dd" },
1047 {
"address",
"dl" },
1048 {
"address",
"dt" },
1049 {
"address",
"form" },
1050 {
"address",
"li" },
1051 {
"address",
"ul" },
1057 {
"caption",
"col" },
1058 {
"caption",
"colgroup" },
1059 {
"caption",
"tbody" },
1060 {
"caption",
"tfoot" },
1061 {
"caption",
"thead" },
1062 {
"caption",
"tr" },
1064 {
"col",
"colgroup" },
1069 {
"colgroup",
"colgroup" },
1070 {
"colgroup",
"tbody" },
1071 {
"colgroup",
"tfoot" },
1072 {
"colgroup",
"thead" },
1073 {
"colgroup",
"tr" },
1084 {
"font",
"center" },
1088 {
"h1",
"fieldset" },
1093 {
"h2",
"fieldset" },
1098 {
"h3",
"fieldset" },
1103 {
"h4",
"fieldset" },
1108 {
"h5",
"fieldset" },
1113 {
"h6",
"fieldset" },
1120 {
"head",
"acronym" },
1121 {
"head",
"address" },
1125 {
"head",
"blockquote" },
1128 {
"head",
"center" },
1138 {
"head",
"fieldset" },
1141 {
"head",
"frameset" },
1150 {
"head",
"iframe" },
1154 {
"head",
"listing" },
1163 {
"head",
"small" },
1165 {
"head",
"strike" },
1166 {
"head",
"strong" },
1169 {
"head",
"table" },
1180 {
"legend",
"fieldset" },
1183 {
"link",
"frameset" },
1184 {
"listing",
"dd" },
1185 {
"listing",
"dl" },
1186 {
"listing",
"dt" },
1187 {
"listing",
"fieldset" },
1188 {
"listing",
"form" },
1189 {
"listing",
"li" },
1190 {
"listing",
"table" },
1191 {
"listing",
"ul" },
1198 {
"option",
"optgroup" },
1199 {
"option",
"option" },
1201 {
"p",
"blockquote" },
1206 {
"p",
"colgroup" },
1212 {
"p",
"fieldset" },
1214 {
"p",
"frameset" },
1241 {
"pre",
"fieldset" },
1247 {
"script",
"noscript" },
1252 {
"style",
"body" },
1253 {
"style",
"frameset" },
1254 {
"tbody",
"tbody" },
1255 {
"tbody",
"tfoot" },
1261 {
"tfoot",
"tbody" },
1267 {
"thead",
"tbody" },
1268 {
"thead",
"tfoot" },
1269 {
"title",
"body" },
1270 {
"title",
"frameset" },
1278 {
"ul",
"address" },
1285 {
"xmp",
"fieldset" },
1299static const char *
const htmlNoContentElements[] = {
1310static const char *
const htmlScriptAttributes[] = {
1344static const elementPriority htmlEndPriority[] = {
1371htmlInitAutoClose(
void) {
1375htmlCompareTags(
const void *
key,
const void *
member) {
1377 const htmlElemDesc *
desc = (
const htmlElemDesc *)
member;
1395 return((
const htmlElemDesc *)
bsearch(
tag, html40ElementTable,
1396 sizeof(html40ElementTable) /
sizeof(htmlElemDesc),
1397 sizeof(htmlElemDesc), htmlCompareTags));
1410 while ((htmlEndPriority[
i].
name !=
NULL) &&
1419htmlCompareStartClose(
const void *vkey,
const void *
member) {
1420 const htmlStartCloseEntry *
key = (
const htmlStartCloseEntry *) vkey;
1421 const htmlStartCloseEntry *
entry = (
const htmlStartCloseEntry *)
member;
1442htmlCheckAutoClose(
const xmlChar * newtag,
const xmlChar * oldtag)
1444 htmlStartCloseEntry
key;
1447 key.oldTag = (
const char *) oldtag;
1448 key.newTag = (
const char *) newtag;
1450 sizeof(htmlStartClose) /
sizeof(htmlStartCloseEntry),
1451 sizeof(htmlStartCloseEntry), htmlCompareStartClose);
1464htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt,
const xmlChar * newtag)
1466 const htmlElemDesc *
info;
1469 priority = htmlGetEndPriority(newtag);
1471 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
1481 if (htmlGetEndPriority(ctxt->nameTab[
i]) >
priority)
1488 info = htmlTagLookup(ctxt->name);
1491 "Opening and ending tag mismatch: %s and %s\n",
1492 newtag, ctxt->name);
1494 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1495 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1507htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1511 if (ctxt->nameNr == 0)
1513 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
1514 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1515 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1533htmlAutoClose(htmlParserCtxtPtr ctxt,
const xmlChar * newtag)
1538 while ((ctxt->name !=
NULL) &&
1539 (htmlCheckAutoClose(newtag, ctxt->name))) {
1540 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
1541 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1560htmlAutoCloseTag(htmlDocPtr doc,
const xmlChar *
name, htmlNodePtr
elem) {
1565 if (htmlCheckAutoClose(
elem->name,
name))
return(1);
1568 if (htmlAutoCloseTag(doc,
name,
child))
return(1);
1586htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr
elem) {
1592 if (htmlAutoCloseTag(doc,
elem->name,
child))
return(1);
1608htmlCheckImplied(htmlParserCtxtPtr ctxt,
const xmlChar *newtag) {
1611 if (ctxt->options & HTML_PARSE_NOIMPLIED)
1613 if (!htmlOmittedDefaultValue)
1617 if (ctxt->nameNr <= 0) {
1618 htmlnamePush(ctxt,
BAD_CAST"html");
1619 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1620 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"html",
NULL);
1624 if ((ctxt->nameNr <= 1) &&
1631 if (ctxt->html >= 3) {
1639 htmlnamePush(ctxt,
BAD_CAST"head");
1640 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1641 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"head",
NULL);
1645 if (ctxt->html >= 10) {
1649 for (
i = 0;
i < ctxt->nameNr;
i++) {
1658 htmlnamePush(ctxt,
BAD_CAST"body");
1659 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1660 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"body",
NULL);
1676htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1685 htmlCheckImplied(ctxt,
BAD_CAST"p");
1687 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1688 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"p",
NULL);
1691 if (!htmlOmittedDefaultValue)
1693 for (
i = 0; htmlNoContentElements[
i] !=
NULL;
i++) {
1696 htmlCheckImplied(ctxt,
BAD_CAST"p");
1698 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL))
1699 ctxt->sax->startElement(ctxt->userData,
BAD_CAST"p",
NULL);
1723 if ((
name[0] !=
'o') || (
name[1] !=
'n'))
1726 i <
sizeof(htmlScriptAttributes)/
sizeof(htmlScriptAttributes[0]);
1741static const htmlEntityDesc html40EntitiesTable[] = {
1745{ 34,
"quot",
"quotation mark = APL quote, U+0022 ISOnum" },
1746{ 38,
"amp",
"ampersand, U+0026 ISOnum" },
1747{ 39,
"apos",
"single quote" },
1748{ 60,
"lt",
"less-than sign, U+003C ISOnum" },
1749{ 62,
"gt",
"greater-than sign, U+003E ISOnum" },
1755{ 160,
"nbsp",
"no-break space = non-breaking space, U+00A0 ISOnum" },
1756{ 161,
"iexcl",
"inverted exclamation mark, U+00A1 ISOnum" },
1757{ 162,
"cent",
"cent sign, U+00A2 ISOnum" },
1758{ 163,
"pound",
"pound sign, U+00A3 ISOnum" },
1759{ 164,
"curren",
"currency sign, U+00A4 ISOnum" },
1760{ 165,
"yen",
"yen sign = yuan sign, U+00A5 ISOnum" },
1761{ 166,
"brvbar",
"broken bar = broken vertical bar, U+00A6 ISOnum" },
1762{ 167,
"sect",
"section sign, U+00A7 ISOnum" },
1763{ 168,
"uml",
"diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1764{ 169,
"copy",
"copyright sign, U+00A9 ISOnum" },
1765{ 170,
"ordf",
"feminine ordinal indicator, U+00AA ISOnum" },
1766{ 171,
"laquo",
"left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1767{ 172,
"not",
"not sign, U+00AC ISOnum" },
1768{ 173,
"shy",
"soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1769{ 174,
"reg",
"registered sign = registered trade mark sign, U+00AE ISOnum" },
1770{ 175,
"macr",
"macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1771{ 176,
"deg",
"degree sign, U+00B0 ISOnum" },
1772{ 177,
"plusmn",
"plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1773{ 178,
"sup2",
"superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1774{ 179,
"sup3",
"superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1775{ 180,
"acute",
"acute accent = spacing acute, U+00B4 ISOdia" },
1776{ 181,
"micro",
"micro sign, U+00B5 ISOnum" },
1777{ 182,
"para",
"pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1778{ 183,
"middot",
"middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1779{ 184,
"cedil",
"cedilla = spacing cedilla, U+00B8 ISOdia" },
1780{ 185,
"sup1",
"superscript one = superscript digit one, U+00B9 ISOnum" },
1781{ 186,
"ordm",
"masculine ordinal indicator, U+00BA ISOnum" },
1782{ 187,
"raquo",
"right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1783{ 188,
"frac14",
"vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1784{ 189,
"frac12",
"vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1785{ 190,
"frac34",
"vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1786{ 191,
"iquest",
"inverted question mark = turned question mark, U+00BF ISOnum" },
1787{ 192,
"Agrave",
"latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1788{ 193,
"Aacute",
"latin capital letter A with acute, U+00C1 ISOlat1" },
1789{ 194,
"Acirc",
"latin capital letter A with circumflex, U+00C2 ISOlat1" },
1790{ 195,
"Atilde",
"latin capital letter A with tilde, U+00C3 ISOlat1" },
1791{ 196,
"Auml",
"latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1792{ 197,
"Aring",
"latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1793{ 198,
"AElig",
"latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1794{ 199,
"Ccedil",
"latin capital letter C with cedilla, U+00C7 ISOlat1" },
1795{ 200,
"Egrave",
"latin capital letter E with grave, U+00C8 ISOlat1" },
1796{ 201,
"Eacute",
"latin capital letter E with acute, U+00C9 ISOlat1" },
1797{ 202,
"Ecirc",
"latin capital letter E with circumflex, U+00CA ISOlat1" },
1798{ 203,
"Euml",
"latin capital letter E with diaeresis, U+00CB ISOlat1" },
1799{ 204,
"Igrave",
"latin capital letter I with grave, U+00CC ISOlat1" },
1800{ 205,
"Iacute",
"latin capital letter I with acute, U+00CD ISOlat1" },
1801{ 206,
"Icirc",
"latin capital letter I with circumflex, U+00CE ISOlat1" },
1802{ 207,
"Iuml",
"latin capital letter I with diaeresis, U+00CF ISOlat1" },
1803{ 208,
"ETH",
"latin capital letter ETH, U+00D0 ISOlat1" },
1804{ 209,
"Ntilde",
"latin capital letter N with tilde, U+00D1 ISOlat1" },
1805{ 210,
"Ograve",
"latin capital letter O with grave, U+00D2 ISOlat1" },
1806{ 211,
"Oacute",
"latin capital letter O with acute, U+00D3 ISOlat1" },
1807{ 212,
"Ocirc",
"latin capital letter O with circumflex, U+00D4 ISOlat1" },
1808{ 213,
"Otilde",
"latin capital letter O with tilde, U+00D5 ISOlat1" },
1809{ 214,
"Ouml",
"latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1810{ 215,
"times",
"multiplication sign, U+00D7 ISOnum" },
1811{ 216,
"Oslash",
"latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1812{ 217,
"Ugrave",
"latin capital letter U with grave, U+00D9 ISOlat1" },
1813{ 218,
"Uacute",
"latin capital letter U with acute, U+00DA ISOlat1" },
1814{ 219,
"Ucirc",
"latin capital letter U with circumflex, U+00DB ISOlat1" },
1815{ 220,
"Uuml",
"latin capital letter U with diaeresis, U+00DC ISOlat1" },
1816{ 221,
"Yacute",
"latin capital letter Y with acute, U+00DD ISOlat1" },
1817{ 222,
"THORN",
"latin capital letter THORN, U+00DE ISOlat1" },
1818{ 223,
"szlig",
"latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1819{ 224,
"agrave",
"latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1820{ 225,
"aacute",
"latin small letter a with acute, U+00E1 ISOlat1" },
1821{ 226,
"acirc",
"latin small letter a with circumflex, U+00E2 ISOlat1" },
1822{ 227,
"atilde",
"latin small letter a with tilde, U+00E3 ISOlat1" },
1823{ 228,
"auml",
"latin small letter a with diaeresis, U+00E4 ISOlat1" },
1824{ 229,
"aring",
"latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1825{ 230,
"aelig",
"latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1826{ 231,
"ccedil",
"latin small letter c with cedilla, U+00E7 ISOlat1" },
1827{ 232,
"egrave",
"latin small letter e with grave, U+00E8 ISOlat1" },
1828{ 233,
"eacute",
"latin small letter e with acute, U+00E9 ISOlat1" },
1829{ 234,
"ecirc",
"latin small letter e with circumflex, U+00EA ISOlat1" },
1830{ 235,
"euml",
"latin small letter e with diaeresis, U+00EB ISOlat1" },
1831{ 236,
"igrave",
"latin small letter i with grave, U+00EC ISOlat1" },
1832{ 237,
"iacute",
"latin small letter i with acute, U+00ED ISOlat1" },
1833{ 238,
"icirc",
"latin small letter i with circumflex, U+00EE ISOlat1" },
1834{ 239,
"iuml",
"latin small letter i with diaeresis, U+00EF ISOlat1" },
1835{ 240,
"eth",
"latin small letter eth, U+00F0 ISOlat1" },
1836{ 241,
"ntilde",
"latin small letter n with tilde, U+00F1 ISOlat1" },
1837{ 242,
"ograve",
"latin small letter o with grave, U+00F2 ISOlat1" },
1838{ 243,
"oacute",
"latin small letter o with acute, U+00F3 ISOlat1" },
1839{ 244,
"ocirc",
"latin small letter o with circumflex, U+00F4 ISOlat1" },
1840{ 245,
"otilde",
"latin small letter o with tilde, U+00F5 ISOlat1" },
1841{ 246,
"ouml",
"latin small letter o with diaeresis, U+00F6 ISOlat1" },
1842{ 247,
"divide",
"division sign, U+00F7 ISOnum" },
1843{ 248,
"oslash",
"latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1844{ 249,
"ugrave",
"latin small letter u with grave, U+00F9 ISOlat1" },
1845{ 250,
"uacute",
"latin small letter u with acute, U+00FA ISOlat1" },
1846{ 251,
"ucirc",
"latin small letter u with circumflex, U+00FB ISOlat1" },
1847{ 252,
"uuml",
"latin small letter u with diaeresis, U+00FC ISOlat1" },
1848{ 253,
"yacute",
"latin small letter y with acute, U+00FD ISOlat1" },
1849{ 254,
"thorn",
"latin small letter thorn with, U+00FE ISOlat1" },
1850{ 255,
"yuml",
"latin small letter y with diaeresis, U+00FF ISOlat1" },
1852{ 338,
"OElig",
"latin capital ligature OE, U+0152 ISOlat2" },
1853{ 339,
"oelig",
"latin small ligature oe, U+0153 ISOlat2" },
1854{ 352,
"Scaron",
"latin capital letter S with caron, U+0160 ISOlat2" },
1855{ 353,
"scaron",
"latin small letter s with caron, U+0161 ISOlat2" },
1856{ 376,
"Yuml",
"latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1861{ 402,
"fnof",
"latin small f with hook = function = florin, U+0192 ISOtech" },
1863{ 710,
"circ",
"modifier letter circumflex accent, U+02C6 ISOpub" },
1864{ 732,
"tilde",
"small tilde, U+02DC ISOdia" },
1866{ 913,
"Alpha",
"greek capital letter alpha, U+0391" },
1867{ 914,
"Beta",
"greek capital letter beta, U+0392" },
1868{ 915,
"Gamma",
"greek capital letter gamma, U+0393 ISOgrk3" },
1869{ 916,
"Delta",
"greek capital letter delta, U+0394 ISOgrk3" },
1870{ 917,
"Epsilon",
"greek capital letter epsilon, U+0395" },
1871{ 918,
"Zeta",
"greek capital letter zeta, U+0396" },
1872{ 919,
"Eta",
"greek capital letter eta, U+0397" },
1873{ 920,
"Theta",
"greek capital letter theta, U+0398 ISOgrk3" },
1874{ 921,
"Iota",
"greek capital letter iota, U+0399" },
1875{ 922,
"Kappa",
"greek capital letter kappa, U+039A" },
1876{ 923,
"Lambda",
"greek capital letter lambda, U+039B ISOgrk3" },
1877{ 924,
"Mu",
"greek capital letter mu, U+039C" },
1878{ 925,
"Nu",
"greek capital letter nu, U+039D" },
1879{ 926,
"Xi",
"greek capital letter xi, U+039E ISOgrk3" },
1880{ 927,
"Omicron",
"greek capital letter omicron, U+039F" },
1881{ 928,
"Pi",
"greek capital letter pi, U+03A0 ISOgrk3" },
1882{ 929,
"Rho",
"greek capital letter rho, U+03A1" },
1883{ 931,
"Sigma",
"greek capital letter sigma, U+03A3 ISOgrk3" },
1884{ 932,
"Tau",
"greek capital letter tau, U+03A4" },
1885{ 933,
"Upsilon",
"greek capital letter upsilon, U+03A5 ISOgrk3" },
1886{ 934,
"Phi",
"greek capital letter phi, U+03A6 ISOgrk3" },
1887{ 935,
"Chi",
"greek capital letter chi, U+03A7" },
1888{ 936,
"Psi",
"greek capital letter psi, U+03A8 ISOgrk3" },
1889{ 937,
"Omega",
"greek capital letter omega, U+03A9 ISOgrk3" },
1891{ 945,
"alpha",
"greek small letter alpha, U+03B1 ISOgrk3" },
1892{ 946,
"beta",
"greek small letter beta, U+03B2 ISOgrk3" },
1893{ 947,
"gamma",
"greek small letter gamma, U+03B3 ISOgrk3" },
1894{ 948,
"delta",
"greek small letter delta, U+03B4 ISOgrk3" },
1895{ 949,
"epsilon",
"greek small letter epsilon, U+03B5 ISOgrk3" },
1896{ 950,
"zeta",
"greek small letter zeta, U+03B6 ISOgrk3" },
1897{ 951,
"eta",
"greek small letter eta, U+03B7 ISOgrk3" },
1898{ 952,
"theta",
"greek small letter theta, U+03B8 ISOgrk3" },
1899{ 953,
"iota",
"greek small letter iota, U+03B9 ISOgrk3" },
1900{ 954,
"kappa",
"greek small letter kappa, U+03BA ISOgrk3" },
1901{ 955,
"lambda",
"greek small letter lambda, U+03BB ISOgrk3" },
1902{ 956,
"mu",
"greek small letter mu, U+03BC ISOgrk3" },
1903{ 957,
"nu",
"greek small letter nu, U+03BD ISOgrk3" },
1904{ 958,
"xi",
"greek small letter xi, U+03BE ISOgrk3" },
1905{ 959,
"omicron",
"greek small letter omicron, U+03BF NEW" },
1906{ 960,
"pi",
"greek small letter pi, U+03C0 ISOgrk3" },
1907{ 961,
"rho",
"greek small letter rho, U+03C1 ISOgrk3" },
1908{ 962,
"sigmaf",
"greek small letter final sigma, U+03C2 ISOgrk3" },
1909{ 963,
"sigma",
"greek small letter sigma, U+03C3 ISOgrk3" },
1910{ 964,
"tau",
"greek small letter tau, U+03C4 ISOgrk3" },
1911{ 965,
"upsilon",
"greek small letter upsilon, U+03C5 ISOgrk3" },
1912{ 966,
"phi",
"greek small letter phi, U+03C6 ISOgrk3" },
1913{ 967,
"chi",
"greek small letter chi, U+03C7 ISOgrk3" },
1914{ 968,
"psi",
"greek small letter psi, U+03C8 ISOgrk3" },
1915{ 969,
"omega",
"greek small letter omega, U+03C9 ISOgrk3" },
1916{ 977,
"thetasym",
"greek small letter theta symbol, U+03D1 NEW" },
1917{ 978,
"upsih",
"greek upsilon with hook symbol, U+03D2 NEW" },
1918{ 982,
"piv",
"greek pi symbol, U+03D6 ISOgrk3" },
1920{ 8194,
"ensp",
"en space, U+2002 ISOpub" },
1921{ 8195,
"emsp",
"em space, U+2003 ISOpub" },
1922{ 8201,
"thinsp",
"thin space, U+2009 ISOpub" },
1923{ 8204,
"zwnj",
"zero width non-joiner, U+200C NEW RFC 2070" },
1924{ 8205,
"zwj",
"zero width joiner, U+200D NEW RFC 2070" },
1925{ 8206,
"lrm",
"left-to-right mark, U+200E NEW RFC 2070" },
1926{ 8207,
"rlm",
"right-to-left mark, U+200F NEW RFC 2070" },
1927{ 8211,
"ndash",
"en dash, U+2013 ISOpub" },
1928{ 8212,
"mdash",
"em dash, U+2014 ISOpub" },
1929{ 8216,
"lsquo",
"left single quotation mark, U+2018 ISOnum" },
1930{ 8217,
"rsquo",
"right single quotation mark, U+2019 ISOnum" },
1931{ 8218,
"sbquo",
"single low-9 quotation mark, U+201A NEW" },
1932{ 8220,
"ldquo",
"left double quotation mark, U+201C ISOnum" },
1933{ 8221,
"rdquo",
"right double quotation mark, U+201D ISOnum" },
1934{ 8222,
"bdquo",
"double low-9 quotation mark, U+201E NEW" },
1935{ 8224,
"dagger",
"dagger, U+2020 ISOpub" },
1936{ 8225,
"Dagger",
"double dagger, U+2021 ISOpub" },
1938{ 8226,
"bull",
"bullet = black small circle, U+2022 ISOpub" },
1939{ 8230,
"hellip",
"horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1941{ 8240,
"permil",
"per mille sign, U+2030 ISOtech" },
1943{ 8242,
"prime",
"prime = minutes = feet, U+2032 ISOtech" },
1944{ 8243,
"Prime",
"double prime = seconds = inches, U+2033 ISOtech" },
1946{ 8249,
"lsaquo",
"single left-pointing angle quotation mark, U+2039 ISO proposed" },
1947{ 8250,
"rsaquo",
"single right-pointing angle quotation mark, U+203A ISO proposed" },
1949{ 8254,
"oline",
"overline = spacing overscore, U+203E NEW" },
1950{ 8260,
"frasl",
"fraction slash, U+2044 NEW" },
1952{ 8364,
"euro",
"euro sign, U+20AC NEW" },
1954{ 8465,
"image",
"blackletter capital I = imaginary part, U+2111 ISOamso" },
1955{ 8472,
"weierp",
"script capital P = power set = Weierstrass p, U+2118 ISOamso" },
1956{ 8476,
"real",
"blackletter capital R = real part symbol, U+211C ISOamso" },
1957{ 8482,
"trade",
"trade mark sign, U+2122 ISOnum" },
1958{ 8501,
"alefsym",
"alef symbol = first transfinite cardinal, U+2135 NEW" },
1959{ 8592,
"larr",
"leftwards arrow, U+2190 ISOnum" },
1960{ 8593,
"uarr",
"upwards arrow, U+2191 ISOnum" },
1961{ 8594,
"rarr",
"rightwards arrow, U+2192 ISOnum" },
1962{ 8595,
"darr",
"downwards arrow, U+2193 ISOnum" },
1963{ 8596,
"harr",
"left right arrow, U+2194 ISOamsa" },
1964{ 8629,
"crarr",
"downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
1965{ 8656,
"lArr",
"leftwards double arrow, U+21D0 ISOtech" },
1966{ 8657,
"uArr",
"upwards double arrow, U+21D1 ISOamsa" },
1967{ 8658,
"rArr",
"rightwards double arrow, U+21D2 ISOtech" },
1968{ 8659,
"dArr",
"downwards double arrow, U+21D3 ISOamsa" },
1969{ 8660,
"hArr",
"left right double arrow, U+21D4 ISOamsa" },
1971{ 8704,
"forall",
"for all, U+2200 ISOtech" },
1972{ 8706,
"part",
"partial differential, U+2202 ISOtech" },
1973{ 8707,
"exist",
"there exists, U+2203 ISOtech" },
1974{ 8709,
"empty",
"empty set = null set = diameter, U+2205 ISOamso" },
1975{ 8711,
"nabla",
"nabla = backward difference, U+2207 ISOtech" },
1976{ 8712,
"isin",
"element of, U+2208 ISOtech" },
1977{ 8713,
"notin",
"not an element of, U+2209 ISOtech" },
1978{ 8715,
"ni",
"contains as member, U+220B ISOtech" },
1979{ 8719,
"prod",
"n-ary product = product sign, U+220F ISOamsb" },
1980{ 8721,
"sum",
"n-ary summation, U+2211 ISOamsb" },
1981{ 8722,
"minus",
"minus sign, U+2212 ISOtech" },
1982{ 8727,
"lowast",
"asterisk operator, U+2217 ISOtech" },
1983{ 8730,
"radic",
"square root = radical sign, U+221A ISOtech" },
1984{ 8733,
"prop",
"proportional to, U+221D ISOtech" },
1985{ 8734,
"infin",
"infinity, U+221E ISOtech" },
1986{ 8736,
"ang",
"angle, U+2220 ISOamso" },
1987{ 8743,
"and",
"logical and = wedge, U+2227 ISOtech" },
1988{ 8744,
"or",
"logical or = vee, U+2228 ISOtech" },
1989{ 8745,
"cap",
"intersection = cap, U+2229 ISOtech" },
1990{ 8746,
"cup",
"union = cup, U+222A ISOtech" },
1991{ 8747,
"int",
"integral, U+222B ISOtech" },
1992{ 8756,
"there4",
"therefore, U+2234 ISOtech" },
1993{ 8764,
"sim",
"tilde operator = varies with = similar to, U+223C ISOtech" },
1994{ 8773,
"cong",
"approximately equal to, U+2245 ISOtech" },
1995{ 8776,
"asymp",
"almost equal to = asymptotic to, U+2248 ISOamsr" },
1996{ 8800,
"ne",
"not equal to, U+2260 ISOtech" },
1997{ 8801,
"equiv",
"identical to, U+2261 ISOtech" },
1998{ 8804,
"le",
"less-than or equal to, U+2264 ISOtech" },
1999{ 8805,
"ge",
"greater-than or equal to, U+2265 ISOtech" },
2000{ 8834,
"sub",
"subset of, U+2282 ISOtech" },
2001{ 8835,
"sup",
"superset of, U+2283 ISOtech" },
2002{ 8836,
"nsub",
"not a subset of, U+2284 ISOamsn" },
2003{ 8838,
"sube",
"subset of or equal to, U+2286 ISOtech" },
2004{ 8839,
"supe",
"superset of or equal to, U+2287 ISOtech" },
2005{ 8853,
"oplus",
"circled plus = direct sum, U+2295 ISOamsb" },
2006{ 8855,
"otimes",
"circled times = vector product, U+2297 ISOamsb" },
2007{ 8869,
"perp",
"up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
2008{ 8901,
"sdot",
"dot operator, U+22C5 ISOamsb" },
2009{ 8968,
"lceil",
"left ceiling = apl upstile, U+2308 ISOamsc" },
2010{ 8969,
"rceil",
"right ceiling, U+2309 ISOamsc" },
2011{ 8970,
"lfloor",
"left floor = apl downstile, U+230A ISOamsc" },
2012{ 8971,
"rfloor",
"right floor, U+230B ISOamsc" },
2013{ 9001,
"lang",
"left-pointing angle bracket = bra, U+2329 ISOtech" },
2014{ 9002,
"rang",
"right-pointing angle bracket = ket, U+232A ISOtech" },
2015{ 9674,
"loz",
"lozenge, U+25CA ISOpub" },
2017{ 9824,
"spades",
"black spade suit, U+2660 ISOpub" },
2018{ 9827,
"clubs",
"black club suit = shamrock, U+2663 ISOpub" },
2019{ 9829,
"hearts",
"black heart suit = valentine, U+2665 ISOpub" },
2020{ 9830,
"diams",
"black diamond suit, U+2666 ISOpub" },
2033#define growBuffer(buffer) { \
2035 buffer##_size *= 2; \
2036 tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size); \
2037 if (tmp == NULL) { \
2038 htmlErrMemory(ctxt, "growing buffer\n"); \
2055const htmlEntityDesc *
2059 for (
i = 0;
i < (
sizeof(html40EntitiesTable)/
2060 sizeof(html40EntitiesTable[0]));
i++) {
2062 return((htmlEntityDescPtr) &html40EntitiesTable[
i]);
2078const htmlEntityDesc *
2079htmlEntityValueLookup(
unsigned int value) {
2082 for (
i = 0;
i < (
sizeof(html40EntitiesTable)/
2083 sizeof(html40EntitiesTable[0]));
i++) {
2087 return((htmlEntityDescPtr) &html40EntitiesTable[
i]);
2109UTF8ToHtml(
unsigned char*
out,
int *outlen,
2110 const unsigned char*
in,
int *inlen) {
2112 const unsigned char* outend;
2113 const unsigned char* outstart =
out;
2114 const unsigned char* instart =
in;
2115 const unsigned char* inend;
2128 inend =
in + (*inlen);
2129 outend =
out + (*outlen);
2130 while (
in < inend) {
2133 else if (
d < 0xC0) {
2135 *outlen =
out - outstart;
2138 }
else if (
d < 0xE0) {
c=
d & 0x1F;
trailing= 1; }
2143 *outlen =
out - outstart;
2153 if ((
in >= inend) || (((
d= *
in++) & 0xC0) != 0x80))
2161 if (
out + 1 >= outend)
2166 const htmlEntityDesc * ent;
2174 ent = htmlEntityValueLookup(
c);
2182 if (
out + 2 +
len >= outend)
2191 *outlen =
out - outstart;
2213htmlEncodeEntities(
unsigned char*
out,
int *outlen,
2214 const unsigned char*
in,
int *inlen,
int quoteChar) {
2216 const unsigned char* outend;
2217 const unsigned char* outstart =
out;
2218 const unsigned char* instart =
in;
2219 const unsigned char* inend;
2225 outend =
out + (*outlen);
2226 inend =
in + (*inlen);
2227 while (
in < inend) {
2230 else if (
d < 0xC0) {
2232 *outlen =
out - outstart;
2235 }
else if (
d < 0xE0) {
c=
d & 0x1F;
trailing= 1; }
2240 *outlen =
out - outstart;
2249 if (((
d= *
in++) & 0xC0) != 0x80) {
2250 *outlen =
out - outstart;
2259 if ((
c < 0x80) && (
c != (
unsigned int) quoteChar) &&
2260 (
c !=
'&') && (
c !=
'<') && (
c !=
'>')) {
2265 const htmlEntityDesc * ent;
2273 ent = htmlEntityValueLookup(
c);
2281 if (outend -
out <
len + 2)
2290 *outlen =
out - outstart;
2301#ifdef LIBXML_PUSH_ENABLED
2309static htmlParserInputPtr
2310htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2311 htmlParserInputPtr
input;
2315 htmlErrMemory(ctxt,
"couldn't allocate a new input stream\n");
2329 input->consumed = 0;
2347static const char *allowPCData[] = {
2348 "a",
"abbr",
"acronym",
"address",
"applet",
"b",
"bdo",
"big",
2349 "blockquote",
"body",
"button",
"caption",
"center",
"cite",
"code",
2350 "dd",
"del",
"dfn",
"div",
"dt",
"em",
"font",
"form",
"h1",
"h2",
2351 "h3",
"h4",
"h5",
"h6",
"i",
"iframe",
"ins",
"kbd",
"label",
"legend",
2352 "li",
"noframes",
"noscript",
"object",
"p",
"pre",
"q",
"s",
"samp",
2353 "small",
"span",
"strike",
"strong",
"td",
"th",
"tt",
"u",
"var"
2370 xmlNodePtr lastChild;
2376 if (
CUR == 0)
return(1);
2377 if (
CUR !=
'<')
return(0);
2378 if (ctxt->name ==
NULL)
2387 dtd = xmlGetIntSubset(ctxt->myDoc);
2388 if (dtd !=
NULL && dtd->ExternalID !=
NULL) {
2395 if (ctxt->node ==
NULL)
return(0);
2396 lastChild = xmlGetLastChild(ctxt->node);
2397 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
2398 lastChild = lastChild->prev;
2399 if (lastChild ==
NULL) {
2400 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2401 (ctxt->node->content !=
NULL))
return(0);
2404 for (
i = 0;
i <
sizeof(allowPCData)/
sizeof(allowPCData[0]);
i++ ) {
2409 }
else if (xmlNodeIsText(lastChild)) {
2414 for (
i = 0;
i <
sizeof(allowPCData)/
sizeof(allowPCData[0]);
i++ ) {
2442 htmlErrMemory(
NULL,
"HTML document creation failed\n");
2447 cur->type = XML_HTML_DOCUMENT_NODE;
2456 cur->standalone = 1;
2457 cur->compression = 0;
2462 cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT;
2463 if ((ExternalID !=
NULL) ||
2465 xmlCreateIntSubset(
cur,
BAD_CAST "html", ExternalID, URI);
2482 if ((URI ==
NULL) && (ExternalID ==
NULL))
2483 return(htmlNewDocNoDtD(
2484 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
2485 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
2487 return(htmlNewDocNoDtD(URI, ExternalID));
2504static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2507htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
2511 "Incorrectly opened comment\n",
NULL,
NULL);
2532htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2535 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2540 while ((
i < HTML_PARSER_BUFFER_SIZE) &&
2542 (
CUR ==
':') || (
CUR ==
'-') || (
CUR ==
'_') ||
2544 if ((
CUR >=
'A') && (
CUR <=
'Z')) loc[
i] =
CUR + 0x20;
2553 htmlErrMemory(ctxt,
NULL);
2571htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2573 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2576 (
NXT(1) !=
':'))
return(
NULL);
2578 while ((
i < HTML_PARSER_BUFFER_SIZE) &&
2580 (
NXT(1+
i) ==
':') || (
NXT(1+
i) ==
'-') || (
NXT(1+
i) ==
'_'))) {
2581 if ((
NXT(1+
i) >=
'A') && (
NXT(1+
i) <=
'Z')) loc[
i] =
NXT(1+
i) + 0x20;
2582 else loc[
i] =
NXT(1+
i);
2600htmlParseName(htmlParserCtxtPtr ctxt) {
2610 in = ctxt->input->cur;
2611 if (((*
in >= 0x61) && (*
in <= 0x7A)) ||
2612 ((*
in >= 0x41) && (*
in <= 0x5A)) ||
2613 (*
in ==
'_') || (*
in ==
':')) {
2615 while (((*
in >= 0x61) && (*
in <= 0x7A)) ||
2616 ((*
in >= 0x41) && (*
in <= 0x5A)) ||
2617 ((*
in >= 0x30) && (*
in <= 0x39)) ||
2618 (*
in ==
'_') || (*
in ==
'-') ||
2619 (*
in ==
':') || (*
in ==
'.'))
2622 if (
in == ctxt->input->end)
2625 if ((*
in > 0) && (*
in < 0x80)) {
2626 count =
in - ctxt->input->cur;
2628 ctxt->input->cur =
in;
2629 ctxt->input->col +=
count;
2633 return(htmlParseNameComplex(ctxt));
2637htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2649 if ((
c ==
' ') || (
c ==
'>') || (
c ==
'/') ||
2655 while ((
c !=
' ') && (
c !=
'>') && (
c !=
'/') &&
2657 (
c ==
'.') || (
c ==
'-') ||
2658 (
c ==
'_') || (
c ==
':') ||
2668 if (ctxt->input->base !=
base) {
2673 return(htmlParseNameComplex(ctxt));
2679 if (ctxt->input->cur - ctxt->input->base <
len) {
2682 "unexpected change of input buffer",
NULL,
NULL);
2702htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt,
const xmlChar stop) {
2711 const htmlEntityDesc * ent;
2719 htmlErrMemory(ctxt,
"buffer allocation failed\n");
2727 while ((
CUR != 0) && (
CUR != stop)) {
2728 if ((stop == 0) && (
CUR ==
'>'))
break;
2731 if (
NXT(1) ==
'#') {
2735 c = htmlParseCharRef(ctxt);
2739 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2740 else if (
c < 0x10000)
2741 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2743 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2746 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2756 ent = htmlParseEntityRef(ctxt, &
name);
2765 }
else if (ent ==
NULL) {
2791 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2792 else if (
c < 0x10000)
2793 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2795 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2798 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2820 { *
out++ =((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
2821 else if (
c < 0x10000)
2822 { *
out++ =((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
2824 { *
out++ =((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
2827 *
out++ = ((
c >>
bits) & 0x3F) | 0x80;
2833 "attribute value too long\n",
NULL,
NULL);
2856const htmlEntityDesc *
2857htmlParseEntityRef(htmlParserCtxtPtr ctxt,
const xmlChar **
str) {
2859 const htmlEntityDesc * ent =
NULL;
2862 if ((ctxt ==
NULL) || (ctxt->input ==
NULL))
return(
NULL);
2866 name = htmlParseName(ctxt);
2869 "htmlParseEntityRef: no name\n",
NULL,
NULL);
2879 ent = htmlEntityLookup(
name);
2884 "htmlParseEntityRef: expecting ';'\n",
2907htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2912 ret = htmlParseHTMLAttribute(ctxt,
'"');
2915 "AttValue: \" expected\n",
NULL,
NULL);
2918 }
else if (
CUR ==
'\'') {
2920 ret = htmlParseHTMLAttribute(ctxt,
'\'');
2923 "AttValue: ' expected\n",
NULL,
NULL);
2930 ret = htmlParseHTMLAttribute(ctxt, 0);
2933 "AttValue: no value found\n",
NULL,
NULL);
2951htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2952 size_t len = 0, startPosition = 0;
2957 if ((
CUR !=
'"') && (
CUR !=
'\'')) {
2959 "SystemLiteral \" or ' expected\n",
NULL,
NULL);
2973 "Invalid char in SystemLiteral 0x%X\n",
CUR);
2981 "Unfinished SystemLiteral\n",
NULL,
NULL);
3003htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
3004 size_t len = 0, startPosition = 0;
3009 if ((
CUR !=
'"') && (
CUR !=
'\'')) {
3011 "PubidLiteral \" or ' expected\n",
NULL,
NULL);
3027 "Invalid char in PubidLiteral 0x%X\n",
CUR);
3036 "Unfinished PubidLiteral\n",
NULL,
NULL);
3068htmlParseScript(htmlParserCtxtPtr ctxt) {
3069 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
3075 if ((
cur ==
'<') && (
NXT(1) ==
'/')) {
3087 if (ctxt->recovery) {
3094 "Element %s embeds close tag\n",
3098 if (((
NXT(2) >=
'A') && (
NXT(2) <=
'Z')) ||
3099 ((
NXT(2) >=
'a') && (
NXT(2) <=
'z')))
3109 "Invalid char in CDATA 0x%X\n",
cur);
3112 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3114 if (ctxt->sax->cdataBlock!=
NULL) {
3118 ctxt->sax->cdataBlock(ctxt->userData,
buf, nbchar);
3119 }
else if (ctxt->sax->characters !=
NULL) {
3120 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3131 if ((nbchar != 0) && (ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3133 if (ctxt->sax->cdataBlock!=
NULL) {
3137 ctxt->sax->cdataBlock(ctxt->userData,
buf, nbchar);
3138 }
else if (ctxt->sax->characters !=
NULL) {
3139 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3157htmlParseCharDataInternal(htmlParserCtxtPtr ctxt,
int readahead) {
3158 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
3163 buf[nbchar++] = readahead;
3166 while (((
cur !=
'<') || (ctxt->token ==
'<')) &&
3167 ((
cur !=
'&') || (ctxt->token ==
'&')) &&
3171 "Invalid char in CDATA 0x%X\n",
cur);
3176 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3182 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3184 if (ctxt->keepBlanks) {
3185 if (ctxt->sax->characters !=
NULL)
3186 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3188 if (ctxt->sax->ignorableWhitespace !=
NULL)
3189 ctxt->sax->ignorableWhitespace(ctxt->userData,
3193 htmlCheckParagraph(ctxt);
3194 if (ctxt->sax->characters !=
NULL)
3195 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3211 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX)) {
3213 if (ctxt->keepBlanks) {
3214 if (ctxt->sax->characters !=
NULL)
3215 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3217 if (ctxt->sax->ignorableWhitespace !=
NULL)
3218 ctxt->sax->ignorableWhitespace(ctxt->userData,
3222 htmlCheckParagraph(ctxt);
3223 if (ctxt->sax->characters !=
NULL)
3224 ctxt->sax->characters(ctxt->userData,
buf, nbchar);
3241htmlParseCharData(htmlParserCtxtPtr ctxt) {
3242 htmlParseCharDataInternal(ctxt, 0);
3263htmlParseExternalID(htmlParserCtxtPtr ctxt,
xmlChar **publicID) {
3266 if ((UPPER ==
'S') && (UPP(1) ==
'Y') &&
3267 (UPP(2) ==
'S') && (UPP(3) ==
'T') &&
3268 (UPP(4) ==
'E') && (UPP(5) ==
'M')) {
3272 "Space required after 'SYSTEM'\n",
NULL,
NULL);
3275 URI = htmlParseSystemLiteral(ctxt);
3278 "htmlParseExternalID: SYSTEM, no URI\n",
NULL,
NULL);
3280 }
else if ((UPPER ==
'P') && (UPP(1) ==
'U') &&
3281 (UPP(2) ==
'B') && (UPP(3) ==
'L') &&
3282 (UPP(4) ==
'I') && (UPP(5) ==
'C')) {
3286 "Space required after 'PUBLIC'\n",
NULL,
NULL);
3289 *publicID = htmlParsePubidLiteral(ctxt);
3290 if (*publicID ==
NULL) {
3292 "htmlParseExternalID: PUBLIC, no Public Identifier\n",
3296 if ((
CUR ==
'"') || (
CUR ==
'\'')) {
3297 URI = htmlParseSystemLiteral(ctxt);
3312htmlParsePI(htmlParserCtxtPtr ctxt) {
3315 int size = HTML_PARSER_BUFFER_SIZE;
3323 if ((
RAW ==
'<') && (
NXT(1) ==
'?')) {
3324 state = ctxt->instate;
3335 target = htmlParseName(ctxt);
3343 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3344 (ctxt->sax->processingInstruction !=
NULL))
3345 ctxt->sax->processingInstruction(ctxt->userData,
3347 ctxt->instate =
state;
3352 htmlErrMemory(ctxt,
NULL);
3353 ctxt->instate =
state;
3359 "ParsePI: PI %s space expected\n",
target,
NULL);
3363 while ((
cur != 0) && (
cur !=
'>')) {
3370 htmlErrMemory(ctxt,
NULL);
3372 ctxt->instate =
state;
3381 "Invalid char in processing instruction "
3388 ctxt->instate =
state;
3401 "ParsePI: PI %s never end ...\n",
target,
NULL);
3408 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3409 (ctxt->sax->processingInstruction !=
NULL))
3410 ctxt->sax->processingInstruction(ctxt->userData,
3416 "PI is not started correctly",
NULL,
NULL);
3418 ctxt->instate =
state;
3431htmlParseComment(htmlParserCtxtPtr ctxt) {
3434 int size = HTML_PARSER_BUFFER_SIZE;
3447 if ((
RAW !=
'<') || (
NXT(1) !=
'!') ||
3448 (
NXT(2) !=
'-') || (
NXT(3) !=
'-'))
return;
3450 state = ctxt->instate;
3455 htmlErrMemory(ctxt,
"buffer allocation failed\n");
3456 ctxt->instate =
state;
3473 if (
q ==
'-' &&
r ==
'>') {
3480 while ((
cur != 0) &&
3482 (
r !=
'-') || (
q !=
'-'))) {
3486 if ((
q ==
'-') && (
r ==
'-') && (
cur ==
'!') && (
next ==
'>')) {
3488 "Comment incorrectly closed by '--!>'",
NULL,
NULL);
3500 htmlErrMemory(ctxt,
"growing buffer failed\n");
3501 ctxt->instate =
state;
3510 "Invalid char in comment 0x%X\n",
q);
3516 ctxt->instate =
state;
3535 if ((ctxt->sax !=
NULL) && (ctxt->sax->comment !=
NULL) &&
3536 (!ctxt->disableSAX))
3537 ctxt->sax->comment(ctxt->userData,
buf);
3539 ctxt->instate =
state;
3545 "Comment not terminated \n<!--%.50s\n",
buf,
NULL);
3563htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3566 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
3568 "htmlParseCharRef: context error\n",
3572 if ((
CUR ==
'&') && (
NXT(1) ==
'#') &&
3573 ((
NXT(2) ==
'x') ||
NXT(2) ==
'X')) {
3575 while (
CUR !=
';') {
3576 if ((
CUR >=
'0') && (
CUR <=
'9')) {
3579 }
else if ((
CUR >=
'a') && (
CUR <=
'f')) {
3582 }
else if ((
CUR >=
'A') && (
CUR <=
'F')) {
3587 "htmlParseCharRef: missing semicolon\n",
3595 }
else if ((
CUR ==
'&') && (
NXT(1) ==
'#')) {
3597 while (
CUR !=
';') {
3598 if ((
CUR >=
'0') && (
CUR <=
'9')) {
3603 "htmlParseCharRef: missing semicolon\n",
3613 "htmlParseCharRef: invalid value\n",
NULL,
NULL);
3620 }
else if (
val >= 0x110000) {
3622 "htmlParseCharRef: value too large\n",
NULL,
NULL);
3625 "htmlParseCharRef: invalid xmlChar value %d\n",
3643htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3658 name = htmlParseName(ctxt);
3661 "htmlParseDocTypeDecl : no DOCTYPE name !\n",
3673 URI = htmlParseExternalID(ctxt, &ExternalID);
3681 "DOCTYPE improperly terminated\n",
NULL,
NULL);
3683 while ((
CUR != 0) && (
CUR !=
'>') &&
3693 if ((ctxt->sax !=
NULL) && (ctxt->sax->internalSubset !=
NULL) &&
3694 (!ctxt->disableSAX))
3695 ctxt->sax->internalSubset(ctxt->userData,
name, ExternalID, URI);
3726htmlParseAttribute(htmlParserCtxtPtr ctxt,
xmlChar **
value) {
3731 name = htmlParseHTMLName(ctxt);
3734 "error parsing attribute name\n",
NULL,
NULL);
3745 val = htmlParseAttValue(ctxt);
3763htmlCheckEncoding(htmlParserCtxtPtr ctxt,
const xmlChar *attvalue) {
3770 if (encoding !=
NULL) {
3778 if (encoding && *encoding ==
'=') {
3792htmlCheckMeta(htmlParserCtxtPtr ctxt,
const xmlChar **
atts) {
3803 while (att !=
NULL) {
3815 htmlCheckEncoding(ctxt,
content);
3840htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3851 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
3853 "htmlParseStartTag: context error\n",
NULL,
NULL);
3858 if (
CUR !=
'<')
return -1;
3862 maxatts = ctxt->maxatts;
3865 name = htmlParseHTMLName(ctxt);
3868 "htmlParseStartTag: invalid element name\n",
3871 while ((
CUR != 0) && (
CUR !=
'>') &&
3882 htmlAutoClose(ctxt,
name);
3887 htmlCheckImplied(ctxt,
name);
3895 "htmlParseStartTag: misplaced <html> tag\n",
3900 if ((ctxt->nameNr != 1) &&
3903 "htmlParseStartTag: misplaced <head> tag\n",
3910 for (indx = 0;indx < ctxt->nameNr;indx++) {
3913 "htmlParseStartTag: misplaced <body> tag\n",
3927 while ((
CUR != 0) &&
3929 ((
CUR !=
'/') || (
NXT(1) !=
'>')) &&
3932 attname = htmlParseAttribute(ctxt, &attvalue);
3933 if (attname !=
NULL) {
3938 for (
i = 0;
i < nbatts;
i += 2) {
3941 "Attribute %s redefined\n", attname,
NULL);
3942 if (attvalue !=
NULL)
3956 htmlErrMemory(ctxt,
NULL);
3957 if (attvalue !=
NULL)
3962 ctxt->maxatts = maxatts;
3963 }
else if (nbatts + 4 > maxatts) {
3968 maxatts *
sizeof(
const xmlChar *));
3970 htmlErrMemory(ctxt,
NULL);
3971 if (attvalue !=
NULL)
3977 ctxt->maxatts = maxatts;
3979 atts[nbatts++] = attname;
3980 atts[nbatts++] = attvalue;
3985 if (attvalue !=
NULL)
3989 while ((
CUR != 0) &&
3991 ((
CUR !=
'/') || (
NXT(1) !=
'>')) &&
4003 if (meta && (nbatts != 0))
4004 htmlCheckMeta(ctxt,
atts);
4010 htmlnamePush(ctxt,
name);
4011 if ((ctxt->sax !=
NULL) && (ctxt->sax->startElement !=
NULL)) {
4013 ctxt->sax->startElement(ctxt->userData,
name,
atts);
4015 ctxt->sax->startElement(ctxt->userData,
name,
NULL);
4020 for (
i = 1;
i < nbatts;
i += 2) {
4045htmlParseEndTag(htmlParserCtxtPtr ctxt)
4051 if ((
CUR !=
'<') || (
NXT(1) !=
'/')) {
4053 "htmlParseEndTag: '</' not found\n",
NULL,
NULL);
4058 name = htmlParseHTMLName(ctxt);
4067 "End tag : expected '>'\n",
NULL,
NULL);
4069 while ((
CUR != 0) && (
CUR !=
'>'))
4079 if ((ctxt->depth > 0) &&
4091 for (
i = (ctxt->nameNr - 1);
i >= 0;
i--) {
4097 "Unexpected end tag : %s\n",
name,
NULL);
4106 htmlAutoCloseOnClose(ctxt,
name);
4115 "Opening and ending tag mismatch: %s and %s\n",
4122 oldname = ctxt->name;
4124 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4125 ctxt->sax->endElement(ctxt->userData,
name);
4126 htmlNodeInfoPop(ctxt);
4146htmlParseReference(htmlParserCtxtPtr ctxt) {
4147 const htmlEntityDesc * ent;
4150 if (
CUR !=
'&')
return;
4152 if (
NXT(1) ==
'#') {
4156 c = htmlParseCharRef(ctxt);
4161 else if (
c < 0x800) {
out[
i++]=((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
4162 else if (
c < 0x10000) {
out[
i++]=((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
4163 else {
out[
i++]=((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
4170 htmlCheckParagraph(ctxt);
4171 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4172 ctxt->sax->characters(ctxt->userData,
out,
i);
4174 ent = htmlParseEntityRef(ctxt, &
name);
4176 htmlCheckParagraph(ctxt);
4177 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4178 ctxt->sax->characters(ctxt->userData,
BAD_CAST "&", 1);
4181 if ((ent ==
NULL) || !(ent->value > 0)) {
4182 htmlCheckParagraph(ctxt);
4183 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL)) {
4184 ctxt->sax->characters(ctxt->userData,
BAD_CAST "&", 1);
4196 {
out[
i++]=((
c >> 6) & 0x1F) | 0xC0;
bits= 0; }
4197 else if (
c < 0x10000)
4198 {
out[
i++]=((
c >> 12) & 0x0F) | 0xE0;
bits= 6; }
4200 {
out[
i++]=((
c >> 18) & 0x07) | 0xF0;
bits= 12; }
4207 htmlCheckParagraph(ctxt);
4208 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
4209 ctxt->sax->characters(ctxt->userData,
out,
i);
4223htmlParseContent(htmlParserCtxtPtr ctxt) {
4229 depth = ctxt->nameNr;
4239 if ((
CUR ==
'<') && (
NXT(1) ==
'/')) {
4240 if (htmlParseEndTag(ctxt) &&
4241 ((currentNode !=
NULL) || (ctxt->nameNr == 0))) {
4242 if (currentNode !=
NULL)
4249 else if ((
CUR ==
'<') &&
4251 (
NXT(1) ==
'_') || (
NXT(1) ==
':'))) {
4252 name = htmlParseHTMLName_nonInvasive(ctxt);
4255 "htmlParseStartTag: invalid element name\n",
4258 while ((
CUR != 0) && (
CUR !=
'>'))
4261 if (currentNode !=
NULL)
4266 if (ctxt->name !=
NULL) {
4267 if (htmlCheckAutoClose(
name, ctxt->
name) == 1) {
4268 htmlAutoClose(ctxt,
name);
4278 if ((ctxt->nameNr > 0) && (
depth >= ctxt->nameNr) &&
4290 htmlParseScript(ctxt);
4293 else if ((
CUR ==
'<') && (
NXT(1) ==
'!')) {
4297 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4298 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4299 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4302 "Misplaced DOCTYPE declaration\n",
4304 htmlParseDocTypeDecl(ctxt);
4309 else if ((
NXT(2) ==
'-') && (
NXT(3) ==
'-')) {
4310 htmlParseComment(ctxt);
4313 htmlSkipBogusComment(ctxt);
4320 else if ((
CUR ==
'<') && (
NXT(1) ==
'?')) {
4328 htmlParseElement(ctxt);
4330 else if (
CUR ==
'<') {
4331 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
4332 (ctxt->sax->characters !=
NULL))
4333 ctxt->sax->characters(ctxt->userData,
BAD_CAST "<", 1);
4341 else if (
CUR ==
'&') {
4342 htmlParseReference(ctxt);
4348 else if (
CUR == 0) {
4349 htmlAutoCloseOnEnd(ctxt);
4357 htmlParseCharData(ctxt);
4381htmlParseElement(htmlParserCtxtPtr ctxt) {
4384 const htmlElemDesc *
info;
4385 htmlParserNodeInfo node_info;
4390 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4392 "htmlParseElement: context error\n",
NULL,
NULL);
4400 if (ctxt->record_info) {
4401 node_info.begin_pos = ctxt->input->consumed +
4402 (
CUR_PTR - ctxt->input->base);
4403 node_info.begin_line = ctxt->input->line;
4406 failed = htmlParseStartTag(ctxt);
4408 if ((failed == -1) || (
name ==
NULL)) {
4426 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
4428 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4429 ctxt->sax->endElement(ctxt->userData,
name);
4438 "Couldn't find end of Start Tag %s\n",
name,
NULL);
4451 if (ctxt->record_info) {
4452 node_info.end_pos = ctxt->input->consumed +
4453 (
CUR_PTR - ctxt->input->base);
4454 node_info.end_line = ctxt->input->line;
4455 node_info.node = ctxt->node;
4465 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4466 ctxt->sax->endElement(ctxt->userData,
name);
4475 depth = ctxt->nameNr;
4477 oldptr = ctxt->input->cur;
4478 htmlParseContent(ctxt);
4479 if (oldptr==ctxt->input->cur)
break;
4480 if (ctxt->nameNr <
depth)
break;
4486 if ( currentNode !=
NULL && ctxt->record_info ) {
4487 node_info.end_pos = ctxt->input->consumed +
4488 (
CUR_PTR - ctxt->input->base);
4489 node_info.end_line = ctxt->input->line;
4490 node_info.node = ctxt->node;
4494 htmlAutoCloseOnEnd(ctxt);
4497 if (currentNode !=
NULL)
4502htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4506 if ( ctxt->node !=
NULL && ctxt->record_info ) {
4507 ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4508 (
CUR_PTR - ctxt->input->base);
4509 ctxt->nodeInfo->end_line = ctxt->input->line;
4510 ctxt->nodeInfo->node = ctxt->node;
4512 htmlNodeInfoPop(ctxt);
4515 htmlAutoCloseOnEnd(ctxt);
4531htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4533 const htmlElemDesc *
info;
4534 htmlParserNodeInfo node_info = {
NULL, 0, 0, 0, 0 };
4537 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4539 "htmlParseElementInternal: context error\n",
NULL,
NULL);
4547 if (ctxt->record_info) {
4548 node_info.begin_pos = ctxt->input->consumed +
4549 (
CUR_PTR - ctxt->input->base);
4550 node_info.begin_line = ctxt->input->line;
4553 failed = htmlParseStartTag(ctxt);
4555 if ((failed == -1) || (
name ==
NULL)) {
4573 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
4575 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4576 ctxt->sax->endElement(ctxt->userData,
name);
4585 "Couldn't find end of Start Tag %s\n",
name,
NULL);
4595 if (ctxt->record_info)
4596 htmlNodeInfoPush(ctxt, &node_info);
4597 htmlParserFinishElementParsing(ctxt);
4605 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
4606 ctxt->sax->endElement(ctxt->userData,
name);
4611 if (ctxt->record_info)
4612 htmlNodeInfoPush(ctxt, &node_info);
4624htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4629 depth = ctxt->nameNr;
4634 if (currentNode ==
NULL) {
4635 htmlErrMemory(ctxt,
NULL);
4648 if ((
CUR ==
'<') && (
NXT(1) ==
'/')) {
4649 if (htmlParseEndTag(ctxt) &&
4650 ((currentNode !=
NULL) || (ctxt->nameNr == 0))) {
4651 if (currentNode !=
NULL)
4654 depth = ctxt->nameNr;
4659 if (currentNode ==
NULL) {
4660 htmlErrMemory(ctxt,
NULL);
4668 else if ((
CUR ==
'<') &&
4670 (
NXT(1) ==
'_') || (
NXT(1) ==
':'))) {
4671 name = htmlParseHTMLName_nonInvasive(ctxt);
4674 "htmlParseStartTag: invalid element name\n",
4677 while ((
CUR == 0) && (
CUR !=
'>'))
4680 htmlParserFinishElementParsing(ctxt);
4681 if (currentNode !=
NULL)
4685 if (currentNode ==
NULL) {
4686 htmlErrMemory(ctxt,
NULL);
4689 depth = ctxt->nameNr;
4693 if (ctxt->name !=
NULL) {
4694 if (htmlCheckAutoClose(
name, ctxt->
name) == 1) {
4695 htmlAutoClose(ctxt,
name);
4705 if ((ctxt->nameNr > 0) && (
depth >= ctxt->nameNr) &&
4708 htmlParserFinishElementParsing(ctxt);
4712 if (currentNode ==
NULL) {
4713 htmlErrMemory(ctxt,
NULL);
4716 depth = ctxt->nameNr;
4725 htmlParseScript(ctxt);
4728 else if ((
CUR ==
'<') && (
NXT(1) ==
'!')) {
4732 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4733 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4734 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4737 "Misplaced DOCTYPE declaration\n",
4739 htmlParseDocTypeDecl(ctxt);
4744 else if ((
NXT(2) ==
'-') && (
NXT(3) ==
'-')) {
4745 htmlParseComment(ctxt);
4748 htmlSkipBogusComment(ctxt);
4755 else if ((
CUR ==
'<') && (
NXT(1) ==
'?')) {
4763 htmlParseElementInternal(ctxt);
4767 if (currentNode ==
NULL) {
4768 htmlErrMemory(ctxt,
NULL);
4771 depth = ctxt->nameNr;
4773 else if (
CUR ==
'<') {
4774 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
4775 (ctxt->sax->characters !=
NULL))
4776 ctxt->sax->characters(ctxt->userData,
BAD_CAST "<", 1);
4784 else if (
CUR ==
'&') {
4785 htmlParseReference(ctxt);
4791 else if (
CUR == 0) {
4792 htmlAutoCloseOnEnd(ctxt);
4800 htmlParseCharData(ctxt);
4818__htmlParseContent(
void *ctxt) {
4820 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4835htmlParseDocument(htmlParserCtxtPtr ctxt) {
4840 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
4842 "htmlParseDocument: context error\n",
NULL,
NULL);
4849 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4868 "Document is empty\n",
NULL,
NULL);
4871 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4872 ctxt->sax->startDocument(ctxt->userData);
4878 while (((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4879 (
NXT(2) ==
'-') && (
NXT(3) ==
'-')) ||
4880 ((
CUR ==
'<') && (
NXT(1) ==
'?'))) {
4881 htmlParseComment(ctxt);
4891 if ((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4892 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
4893 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
4894 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
4896 htmlParseDocTypeDecl(ctxt);
4903 while (((
CUR ==
'<') && (
NXT(1) ==
'!') &&
4904 (
NXT(2) ==
'-') && (
NXT(3) ==
'-')) ||
4905 ((
CUR ==
'<') && (
NXT(1) ==
'?'))) {
4906 htmlParseComment(ctxt);
4914 htmlParseContentInternal(ctxt);
4920 htmlAutoCloseOnEnd(ctxt);
4926 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
4927 ctxt->sax->endDocument(ctxt->userData);
4929 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc !=
NULL)) {
4930 dtd = xmlGetIntSubset(ctxt->myDoc);
4932 ctxt->myDoc->intSubset =
4933 xmlCreateIntSubset(ctxt->myDoc,
BAD_CAST "html",
4934 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
4935 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
4937 if (! ctxt->wellFormed)
return(-1);
4960htmlInitParserCtxt(htmlParserCtxtPtr ctxt,
const htmlSAXHandler *sax,
4963 if (ctxt ==
NULL)
return(-1);
4964 memset(ctxt, 0,
sizeof(htmlParserCtxt));
4967 if (ctxt->dict ==
NULL) {
4968 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
4972 if (ctxt->sax ==
NULL)
4973 ctxt->sax = (htmlSAXHandler *)
xmlMalloc(
sizeof(htmlSAXHandler));
4974 if (ctxt->sax ==
NULL) {
4975 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
4979 memset(ctxt->sax, 0,
sizeof(htmlSAXHandler));
4980 xmlSAX2InitHtmlDefaultSAXHandler(ctxt->sax);
4981 ctxt->userData = ctxt;
4983 memcpy(ctxt->sax, sax,
sizeof(htmlSAXHandler));
4984 ctxt->userData = userData ? userData : ctxt;
4988 ctxt->inputTab = (htmlParserInputPtr *)
4989 xmlMalloc(5 *
sizeof(htmlParserInputPtr));
4990 if (ctxt->inputTab ==
NULL) {
4991 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5000 ctxt->version =
NULL;
5001 ctxt->encoding =
NULL;
5002 ctxt->standalone = -1;
5006 ctxt->nodeTab = (htmlNodePtr *)
xmlMalloc(10 *
sizeof(htmlNodePtr));
5007 if (ctxt->nodeTab ==
NULL) {
5008 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5023 if (ctxt->nameTab ==
NULL) {
5024 htmlErrMemory(
NULL,
"htmlInitParserCtxt: out of memory\n");
5040 ctxt->nodeInfoTab =
NULL;
5041 ctxt->nodeInfoNr = 0;
5042 ctxt->nodeInfoMax = 0;
5045 ctxt->wellFormed = 1;
5046 ctxt->replaceEntities = 0;
5051 ctxt->vctxt.userData = ctxt;
5054 ctxt->record_info = 0;
5056 ctxt->checkIndex = 0;
5057 ctxt->catalogs =
NULL;
5071htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
5085htmlNewParserCtxt(
void)
5087 return(htmlNewSAXParserCtxt(
NULL,
NULL));
5102htmlNewSAXParserCtxt(
const htmlSAXHandler *sax,
void *userData)
5104 xmlParserCtxtPtr ctxt;
5106 ctxt = (xmlParserCtxtPtr)
xmlMalloc(
sizeof(xmlParserCtxt));
5108 htmlErrMemory(
NULL,
"NewParserCtxt: out of memory\n");
5111 memset(ctxt, 0,
sizeof(xmlParserCtxt));
5112 if (htmlInitParserCtxt(ctxt, sax, userData) < 0) {
5113 htmlFreeParserCtxt(ctxt);
5129htmlCreateMemoryParserCtxt(
const char *
buffer,
int size) {
5130 xmlParserCtxtPtr ctxt;
5131 xmlParserInputPtr
input;
5132 xmlParserInputBufferPtr
buf;
5139 ctxt = htmlNewParserCtxt();
5175static htmlParserCtxtPtr
5176htmlCreateDocParserCtxt(
const xmlChar *
str,
const char *encoding) {
5177 xmlParserCtxtPtr ctxt;
5178 xmlParserInputPtr
input;
5179 xmlParserInputBufferPtr
buf;
5184 ctxt = htmlNewParserCtxt();
5207 if (encoding !=
NULL) {
5219 "Unsupported encoding %s\n",
5231 "Unsupported encoding %s\n",
5240#ifdef LIBXML_PUSH_ENABLED
5266htmlParseLookupSequence(htmlParserCtxtPtr ctxt,
xmlChar first,
5270 htmlParserInputPtr
in;
5278 base = ctxt->checkIndex;
5279 quote = ctxt->endCheckState;
5291 ctxt->checkIndex = 0;
5292 ctxt->endCheckState = 0;
5295 if (ignoreattrval) {
5310 }
else if (
next != 0) {
5314 ctxt->checkIndex = 0;
5315 ctxt->endCheckState = 0;
5319 ctxt->checkIndex =
base;
5320 ctxt->endCheckState =
quote;
5339htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
5345 mark = htmlParseLookupSequence(ctxt,
'-',
'-', 0, 0);
5348 if ((
NXT(mark+2) ==
'>') ||
5349 ((
NXT(mark+2) ==
'!') && (
NXT(mark+3) ==
'>'))) {
5350 ctxt->checkIndex = 0;
5354 if (mark +
offset >= ctxt->input->end - ctxt->input->cur) {
5355 ctxt->checkIndex = mark;
5358 ctxt->checkIndex = mark + 1;
5374htmlParseTryOrFinish(htmlParserCtxtPtr ctxt,
int terminate) {
5376 htmlParserInputPtr
in;
5380 htmlParserNodeInfo node_info;
5388 htmlAutoCloseOnEnd(ctxt);
5394 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5395 ctxt->sax->endDocument(ctxt->userData);
5412 switch (ctxt->instate) {
5437 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5438 ctxt->sax->setDocumentLocator(ctxt->userData,
5440 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5441 (!ctxt->disableSAX))
5442 ctxt->sax->startDocument(ctxt->userData);
5448 if ((
cur ==
'<') && (
next ==
'!') &&
5449 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5450 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5451 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5454 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5456 htmlParseDocTypeDecl(ctxt);
5484 if ((
cur ==
'<') && (
next ==
'!') &&
5485 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5486 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5488 htmlParseComment(ctxt);
5492 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5494 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5500 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5501 (UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5502 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5503 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5506 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5508 htmlParseDocTypeDecl(ctxt);
5512 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5526 if ((
cur ==
'<') && (
next ==
'!') &&
5527 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5528 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5530 htmlParseComment(ctxt);
5534 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5536 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5542 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5555 htmlParseCharData(ctxt);
5561 if ((
cur ==
'<') && (
next ==
'!') &&
5562 (
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5563 if ((!
terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5565 htmlParseComment(ctxt);
5569 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5571 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5577 }
else if ((
cur ==
'<') && (
next ==
'!') &&
5582 ctxt->wellFormed = 0;
5584 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5585 ctxt->sax->endDocument(ctxt->userData);
5592 const htmlElemDesc *
info;
5617 ctxt->checkIndex = 0;
5621 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5625 if (ctxt->record_info) {
5626 node_info.begin_pos = ctxt->input->consumed +
5627 (
CUR_PTR - ctxt->input->base);
5628 node_info.begin_line = ctxt->input->line;
5632 failed = htmlParseStartTag(ctxt);
5634 if ((failed == -1) ||
5653 if ((
CUR ==
'/') && (
NXT(1) ==
'>')) {
5655 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
5656 ctxt->sax->endElement(ctxt->userData,
name);
5668 "Couldn't find end of Start Tag %s\n",
5679 if (ctxt->record_info)
5680 htmlNodeInfoPush(ctxt, &node_info);
5692 if ((ctxt->sax !=
NULL) && (ctxt->sax->endElement !=
NULL))
5693 ctxt->sax->endElement(ctxt->userData,
name);
5697 if (ctxt->record_info)
5698 htmlNodeInfoPush(ctxt, &node_info);
5711 if (ctxt->token != 0) {
5712 chr[0] = ctxt->token;
5713 htmlCheckParagraph(ctxt);
5714 if ((ctxt->sax !=
NULL) && (ctxt->sax->characters !=
NULL))
5715 ctxt->sax->characters(ctxt->userData, chr, 1);
5717 ctxt->checkIndex = 0;
5721 if ((
cur !=
'<') && (
cur !=
'&')) {
5722 if (ctxt->sax !=
NULL) {
5725 if (ctxt->keepBlanks) {
5726 if (ctxt->sax->characters !=
NULL)
5727 ctxt->sax->characters(
5728 ctxt->userData, chr, 1);
5730 if (ctxt->sax->ignorableWhitespace !=
NULL)
5731 ctxt->sax->ignorableWhitespace(
5732 ctxt->userData, chr, 1);
5735 htmlCheckParagraph(ctxt);
5736 if (ctxt->sax->characters !=
NULL)
5737 ctxt->sax->characters(
5738 ctxt->userData, chr, 1);
5742 ctxt->checkIndex = 0;
5760 idx = htmlParseLookupSequence(ctxt,
'<',
'/', 0, 0);
5769 ctxt->checkIndex =
idx;
5773 htmlParseScript(ctxt);
5776 if ((
cur ==
'<') && (
next ==
'/')) {
5778 ctxt->checkIndex = 0;
5781 }
else if ((
cur ==
'<') && (
next ==
'!')) {
5787 if ((UPP(2) ==
'D') && (UPP(3) ==
'O') &&
5788 (UPP(4) ==
'C') && (UPP(5) ==
'T') &&
5789 (UPP(6) ==
'Y') && (UPP(7) ==
'P') &&
5792 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 1) < 0))
5795 "Misplaced DOCTYPE declaration\n",
5797 htmlParseDocTypeDecl(ctxt);
5798 }
else if ((
in->cur[2] ==
'-') && (
in->cur[3] ==
'-')) {
5800 (htmlParseLookupCommentEnd(ctxt) < 0))
5802 htmlParseComment(ctxt);
5808 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5810 htmlSkipBogusComment(ctxt);
5812 }
else if ((
cur ==
'<') && (
next ==
'?')) {
5814 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5820 }
else if ((
cur ==
'<') && (
next ==
'/')) {
5822 ctxt->checkIndex = 0;
5828 ctxt->checkIndex = 0;
5830 }
else if (
cur ==
'<') {
5831 if ((ctxt->sax !=
NULL) && (!ctxt->disableSAX) &&
5832 (ctxt->sax->characters !=
NULL))
5833 ctxt->sax->characters(ctxt->userData,
5844 (htmlParseLookupSequence(ctxt,
'<', 0, 0, 0) < 0))
5846 ctxt->checkIndex = 0;
5848 (
cur !=
'<') && (
in->cur <
in->end)) {
5850 htmlParseReference(ctxt);
5852 htmlParseCharData(ctxt);
5864 (htmlParseLookupSequence(ctxt,
'>', 0, 0, 0) < 0))
5866 htmlParseEndTag(ctxt);
5869 if (ctxt->nameNr == 0) {
5874 ctxt->checkIndex = 0;
5878 "HPP: internal error\n",
NULL,
NULL);
5885 htmlAutoCloseOnEnd(ctxt);
5891 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5892 ctxt->sax->endDocument(ctxt->userData);
5895 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc !=
NULL) &&
5899 dtd = xmlGetIntSubset(ctxt->myDoc);
5901 ctxt->myDoc->intSubset =
5902 xmlCreateIntSubset(ctxt->myDoc,
BAD_CAST "html",
5903 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
5904 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
5921htmlParseChunk(htmlParserCtxtPtr ctxt,
const char *
chunk,
int size,
5923 if ((ctxt ==
NULL) || (ctxt->input ==
NULL)) {
5925 "htmlParseChunk: context error\n",
NULL,
NULL);
5930 size_t pos = ctxt->input->cur - ctxt->input->base;
5936 htmlParseErr(ctxt, ctxt->input->buf->error,
5937 "xmlParserInputBufferPush failed",
NULL,
NULL);
5939 return (ctxt->errNo);
5948 ctxt->wellFormed = 0;
5951 if ((ctxt->sax) && (ctxt->sax->endDocument !=
NULL))
5952 ctxt->sax->endDocument(ctxt->userData);
5981htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
void *user_data,
5984 htmlParserCtxtPtr ctxt;
5985 htmlParserInputPtr inputStream;
5986 xmlParserInputBufferPtr
buf;
5993 ctxt = htmlNewSAXParserCtxt(sax, user_data);
5999 ctxt->directory =
NULL;
6004 inputStream = htmlNewInputStream(ctxt);
6005 if (inputStream ==
NULL) {
6012 inputStream->filename =
NULL;
6014 inputStream->filename = (
char *)
6016 inputStream->buf =
buf;
6025 (ctxt->input->buf !=
NULL)) {
6026 size_t pos = ctxt->input->cur - ctxt->input->base;
6032 htmlParseErr(ctxt, ctxt->input->buf->error,
6033 "xmlParserInputBufferPush failed\n",
NULL,
NULL);
6037 ctxt->progressive = 1;
6061htmlSAXParseDoc(
const xmlChar *
cur,
const char *encoding,
6062 htmlSAXHandlerPtr sax,
void *userData) {
6064 htmlParserCtxtPtr ctxt;
6071 ctxt = htmlCreateDocParserCtxt(
cur, encoding);
6076 ctxt->userData = userData;
6079 htmlParseDocument(ctxt);
6083 ctxt->userData =
NULL;
6085 htmlFreeParserCtxt(ctxt);
6101htmlParseDoc(
const xmlChar *
cur,
const char *encoding) {
6102 return(htmlSAXParseDoc(
cur, encoding,
NULL,
NULL));
6118htmlCreateFileParserCtxt(
const char *
filename,
const char *encoding)
6120 htmlParserCtxtPtr ctxt;
6121 htmlParserInputPtr inputStream;
6122 char *canonicFilename;
6127 ctxt = htmlNewParserCtxt();
6132 if (canonicFilename ==
NULL) {
6139 if (inputStream ==
NULL) {
6178htmlSAXParseFile(
const char *
filename,
const char *encoding, htmlSAXHandlerPtr sax,
6181 htmlParserCtxtPtr ctxt;
6182 htmlSAXHandlerPtr oldsax =
NULL;
6186 ctxt = htmlCreateFileParserCtxt(
filename, encoding);
6191 ctxt->userData = userData;
6194 htmlParseDocument(ctxt);
6199 ctxt->userData =
NULL;
6201 htmlFreeParserCtxt(ctxt);
6218htmlParseFile(
const char *
filename,
const char *encoding) {
6232htmlHandleOmittedElem(
int val) {
6233 int old = htmlOmittedDefaultValue;
6235 htmlOmittedDefaultValue =
val;
6250htmlElementAllowedHere(
const htmlElemDesc*
parent,
const xmlChar* elt) {
6273htmlElementStatusHere(
const htmlElemDesc*
parent,
const htmlElemDesc* elt) {
6275 return HTML_INVALID ;
6276 if ( ! htmlElementAllowedHere(
parent, (
const xmlChar*) elt->name ) )
6277 return HTML_INVALID ;
6279 return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
6296 if ( !elt || !
attr )
6297 return HTML_INVALID ;
6299 if ( elt->attrs_req )
6300 for (
p = elt->attrs_req; *
p; ++
p)
6302 return HTML_REQUIRED ;
6304 if ( elt->attrs_opt )
6305 for (
p = elt->attrs_opt; *
p; ++
p)
6309 if (
legacy && elt->attrs_depr )
6310 for (
p = elt->attrs_depr; *
p; ++
p)
6312 return HTML_DEPRECATED ;
6314 return HTML_INVALID ;
6331htmlNodeStatus(
const htmlNodePtr
node,
int legacy) {
6333 return HTML_INVALID ;
6335 switch (
node->type ) {
6336 case XML_ELEMENT_NODE:
6338 ? ( htmlElementAllowedHere (
6339 htmlTagLookup(
node->parent->name) ,
node->name
6340 ) ? HTML_VALID : HTML_INVALID )
6341 : htmlElementStatusHere(
6342 htmlTagLookup(
node->parent->name) ,
6343 htmlTagLookup(
node->name) )
6345 case XML_ATTRIBUTE_NODE:
6346 return htmlAttrAllowed(
6348 default:
return HTML_NA ;
6363#define DICT_FREE(str) \
6364 if ((str) && ((!dict) || \
6365 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
6366 xmlFree((char *)(str));
6375htmlCtxtReset(htmlParserCtxtPtr ctxt)
6377 xmlParserInputPtr
input;
6393 if (ctxt->spaceTab !=
NULL) {
6394 ctxt->spaceTab[0] = -1;
6395 ctxt->space = &ctxt->spaceTab[0];
6410 ctxt->version =
NULL;
6412 ctxt->encoding =
NULL;
6414 ctxt->directory =
NULL;
6416 ctxt->extSubURI =
NULL;
6418 ctxt->extSubSystem =
NULL;
6419 if (ctxt->myDoc !=
NULL)
6420 xmlFreeDoc(ctxt->myDoc);
6423 ctxt->standalone = -1;
6424 ctxt->hasExternalSubset = 0;
6425 ctxt->hasPErefs = 0;
6431 ctxt->wellFormed = 1;
6432 ctxt->nsWellFormed = 1;
6433 ctxt->disableSAX = 0;
6435 ctxt->vctxt.userData = ctxt;
6439 ctxt->record_info = 0;
6440 ctxt->checkIndex = 0;
6441 ctxt->endCheckState = 0;
6445 ctxt->catalogs =
NULL;
6448 if (ctxt->attsDefault !=
NULL) {
6450 ctxt->attsDefault =
NULL;
6452 if (ctxt->attsSpecial !=
NULL) {
6454 ctxt->attsSpecial =
NULL;
6458 ctxt->nbWarnings = 0;
6474htmlCtxtUseOptions(htmlParserCtxtPtr ctxt,
int options)
6479 if (
options & HTML_PARSE_NOWARNING) {
6480 ctxt->sax->warning =
NULL;
6481 ctxt->vctxt.warning =
NULL;
6485 if (
options & HTML_PARSE_NOERROR) {
6486 ctxt->sax->error =
NULL;
6487 ctxt->vctxt.error =
NULL;
6488 ctxt->sax->fatalError =
NULL;
6492 if (
options & HTML_PARSE_PEDANTIC) {
6499 ctxt->keepBlanks = 0;
6504 ctxt->keepBlanks = 1;
6505 if (
options & HTML_PARSE_RECOVER) {
6507 options -= HTML_PARSE_RECOVER;
6510 if (
options & HTML_PARSE_COMPACT) {
6511 ctxt->options |= HTML_PARSE_COMPACT;
6512 options -= HTML_PARSE_COMPACT;
6518 if (
options & HTML_PARSE_NODEFDTD) {
6519 ctxt->options |= HTML_PARSE_NODEFDTD;
6520 options -= HTML_PARSE_NODEFDTD;
6522 if (
options & HTML_PARSE_IGNORE_ENC) {
6523 ctxt->options |= HTML_PARSE_IGNORE_ENC;
6524 options -= HTML_PARSE_IGNORE_ENC;
6526 if (
options & HTML_PARSE_NOIMPLIED) {
6527 ctxt->options |= HTML_PARSE_NOIMPLIED;
6528 options -= HTML_PARSE_NOIMPLIED;
6530 ctxt->dictNames = 0;
6531 ctxt->linenumbers = 1;
6548htmlDoRead(htmlParserCtxtPtr ctxt,
const char *URL,
const char *encoding,
6553 htmlCtxtUseOptions(ctxt,
options);
6555 if (encoding !=
NULL) {
6563 if ((URL !=
NULL) && (ctxt->input !=
NULL) &&
6564 (ctxt->input->filename ==
NULL))
6566 htmlParseDocument(ctxt);
6570 if ((ctxt->dictNames) &&
6572 (
ret->dict == ctxt->dict))
6591htmlReadDoc(
const xmlChar *
cur,
const char *URL,
const char *encoding,
int options)
6593 htmlParserCtxtPtr ctxt;
6599 ctxt = htmlCreateDocParserCtxt(
cur,
NULL);
6602 return (htmlDoRead(ctxt, URL, encoding,
options, 0));
6618 htmlParserCtxtPtr ctxt;
6621 ctxt = htmlCreateFileParserCtxt(
filename, encoding);
6640htmlReadMemory(
const char *
buffer,
int size,
const char *URL,
const char *encoding,
int options)
6642 htmlParserCtxtPtr ctxt;
6645 ctxt = htmlCreateMemoryParserCtxt(
buffer,
size);
6648 return (htmlDoRead(ctxt, URL, encoding,
options, 0));
6665htmlReadFd(
int fd,
const char *URL,
const char *encoding,
int options)
6667 htmlParserCtxtPtr ctxt;
6668 xmlParserInputBufferPtr
input;
6669 htmlParserInputPtr
stream;
6679 ctxt = htmlNewParserCtxt();
6687 htmlFreeParserCtxt(ctxt);
6691 return (htmlDoRead(ctxt, URL, encoding,
options, 0));
6709 void *ioctx,
const char *URL,
const char *encoding,
int options)
6711 htmlParserCtxtPtr ctxt;
6712 xmlParserInputBufferPtr
input;
6713 xmlParserInputPtr
stream;
6722 if (ioclose !=
NULL)
6726 ctxt = htmlNewParserCtxt();
6738 return (htmlDoRead(ctxt, URL, encoding,
options, 0));
6755htmlCtxtReadDoc(htmlParserCtxtPtr ctxt,
const xmlChar *
str,
6756 const char *URL,
const char *encoding,
int options)
6758 xmlParserInputBufferPtr
input;
6759 xmlParserInputPtr
stream;
6767 htmlCtxtReset(ctxt);
6781 return (htmlDoRead(ctxt, URL, encoding,
options, 1));
6797htmlCtxtReadFile(htmlParserCtxtPtr ctxt,
const char *
filename,
6798 const char *encoding,
int options)
6800 xmlParserInputPtr
stream;
6808 htmlCtxtReset(ctxt);
6815 return (htmlDoRead(ctxt,
NULL, encoding,
options, 1));
6833htmlCtxtReadMemory(htmlParserCtxtPtr ctxt,
const char *
buffer,
int size,
6834 const char *URL,
const char *encoding,
int options)
6836 xmlParserInputBufferPtr
input;
6837 xmlParserInputPtr
stream;
6845 htmlCtxtReset(ctxt);
6860 return (htmlDoRead(ctxt, URL, encoding,
options, 1));
6877htmlCtxtReadFd(htmlParserCtxtPtr ctxt,
int fd,
6878 const char *URL,
const char *encoding,
int options)
6880 xmlParserInputBufferPtr
input;
6881 xmlParserInputPtr
stream;
6889 htmlCtxtReset(ctxt);
6901 return (htmlDoRead(ctxt, URL, encoding,
options, 1));
6923 const char *encoding,
int options)
6925 xmlParserInputBufferPtr
input;
6926 xmlParserInputPtr
stream;
6934 htmlCtxtReset(ctxt);
6939 if (ioclose !=
NULL)
6949 return (htmlDoRead(ctxt, URL, encoding,
options, 1));
XMLPUBFUN void xmlSAX2IgnorableWhitespace(void *ctx, const xmlChar *ch, int len)
_In_ uint16_t _Out_ ULONG * atts
int xmlBufResetInput(xmlBufPtr buf, xmlParserInputPtr input)
int xmlBufUpdateInput(xmlBufPtr buf, xmlParserInputPtr input, size_t pos)
static const WCHAR quote[]
UINT(* handler)(MSIPACKAGE *)
void CDECL terminate(void)
_ACRTIMP size_t __cdecl strlen(const char *)
_ACRTIMP int __cdecl strcmp(const char *, const char *)
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name)
xmlCharEncoding xmlParseCharEncoding(const char *name)
@ XML_CHAR_ENCODING_ERROR
@ XML_CHAR_ENCODING_8859_1
GLint GLint GLsizei GLsizei GLsizei depth
GLuint GLuint GLsizei count
GLdouble GLdouble GLdouble r
GLdouble GLdouble GLdouble GLdouble q
GLenum GLuint GLenum GLsizei const GLchar * buf
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
GLenum GLenum GLenum input
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
#define memcpy(s1, s2, n)
D3D11_SHADER_VARIABLE_DESC desc
#define XML_MAX_TEXT_LENGTH
#define XML_MAX_NAME_LENGTH
XML_DEPRECATED XMLPUBFUN xmlNodePtr nodePop(xmlParserCtxtPtr ctxt)
XMLPUBFUN int xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
#define IS_PUBIDCHAR_CH(c)
#define IS_ASCII_DIGIT(c)
XMLPUBFUN int xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
XMLPUBFUN void xmlFreeInputStream(xmlParserInputPtr input)
XMLPUBFUN xmlParserInputPtr inputPop(xmlParserCtxtPtr ctxt)
#define XML_MAX_HUGE_LENGTH
#define IS_ASCII_LETTER(c)
XMLPUBFUN xmlParserInputPtr xmlNewInputStream(xmlParserCtxtPtr ctxt)
XMLPUBFUN int inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
static unsigned __int64 next
xmlDictPtr xmlDictCreate(void)
const xmlChar * xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len)
xmlReallocFunc xmlRealloc
xmlRegisterNodeFunc xmlRegisterNodeDefaultValue
xmlSAXLocator xmlDefaultSAXLocator
xmlMallocFunc xmlMallocAtomic
int xmlKeepBlanksDefaultValue
int xmlLineNumbersDefaultValue
void xmlHashFree(xmlHashTablePtr hash, xmlHashDeallocator dealloc)
void xmlHashDefaultDeallocator(void *entry, const xmlChar *key ATTRIBUTE_UNUSED)
XMLPUBFUN xmlParserInputPtr xmlLoadExternalEntity(const char *URL, const char *ID, xmlParserCtxtPtr ctxt)
XML_GLOBALS_PARSER XMLPUBFUN void xmlInitParser(void)
XMLPUBFUN void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
XMLPUBFUN void xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, const xmlParserNodeInfoPtr info)
XMLPUBFUN void xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
XMLPUBFUN xmlParserInputPtr xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc)
XML_HIDDEN void __xmlRaiseError(xmlStructuredErrorFunc schannel, xmlGenericErrorFunc channel, void *data, void *ctx, void *nod, int domain, int code, xmlErrorLevel level, const char *file, int line, const char *str1, const char *str2, const char *str3, int int1, int col, const char *msg,...) LIBXML_ATTR_FORMAT(16
XML_HIDDEN void xmlParserInputBufferPtr xmlParserInputBufferCreateString(const xmlChar *str)
XML_HIDDEN int xmlParserGrow(xmlParserCtxtPtr ctxt)
XML_HIDDEN void xmlDetectEncoding(xmlParserCtxtPtr ctxt)
XML_HIDDEN void xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding)
XML_HIDDEN void xmlParserErrors const char const xmlChar const xmlChar * str2
#define XML_INPUT_HAS_ENCODING
XML_HIDDEN void xmlParserErrors const char const xmlChar * str1
#define XML_VCTXT_USE_PCTXT
XML_HIDDEN void XML_HIDDEN void xmlHaltParser(xmlParserCtxtPtr ctxt)
XML_HIDDEN int __xmlRegisterCallbacks
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int blank_chars)
#define growBuffer(buffer, n)
#define COPY_BUF(b, i, v)
wchar_t const *const size_t const buffer_size
static int processed(const type_t *type)
XMLPUBFUN xmlChar * xmlCanonicPath(const xmlChar *path)
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
XMLPUBFUN xmlParserInputBufferPtr xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc)
XMLPUBFUN xmlParserInputBufferPtr xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc)
XMLPUBFUN void xmlFreeParserInputBuffer(xmlParserInputBufferPtr in)
XMLPUBFUN xmlParserInputBufferPtr xmlAllocParserInputBuffer(xmlCharEncoding enc)
XMLPUBFUN char * xmlParserGetDirectory(const char *filename)
XMLPUBFUN xmlParserInputBufferPtr xmlParserInputBufferCreateStatic(const char *mem, int size, xmlCharEncoding enc)
int(* xmlInputReadCallback)(void *context, char *buffer, int len)
int(* xmlInputCloseCallback)(void *context)
XMLPUBFUN xmlParserInputBufferPtr xmlParserInputBufferCreateIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc)
XMLPUBFUN int xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf)
XMLPUBFUN void xmlResetError(xmlErrorPtr err)
XMLPUBFUN void XMLPUBFUN void XMLPUBFUN void xmlParserValidityError(void *ctx, const char *msg,...) LIBXML_ATTR_FORMAT(2
@ XML_ERR_ATTRIBUTE_NOT_FINISHED
@ XML_HTML_INCORRECTLY_OPENED_COMMENT
@ XML_ERR_ENTITYREF_SEMICOL_MISSING
@ XML_ERR_LITERAL_NOT_FINISHED
@ XML_ERR_LTSLASH_REQUIRED
@ XML_ERR_DOCTYPE_NOT_FINISHED
@ XML_ERR_PI_NOT_FINISHED
@ XML_ERR_ATTRIBUTE_REDEFINED
@ XML_ERR_TAG_NAME_MISMATCH
@ XML_ERR_INVALID_ENCODING
@ XML_ERR_INVALID_CHARREF
@ XML_HTML_STRUCURE_ERROR
@ XML_ERR_INVALID_DEC_CHARREF
@ XML_ERR_LITERAL_NOT_STARTED
@ XML_ERR_COMMENT_NOT_FINISHED
@ XML_ERR_ATTRIBUTE_WITHOUT_VALUE
@ XML_ERR_INVALID_HEX_CHARREF
@ XML_ERR_COMMENT_ABRUPTLY_ENDED
@ XML_ERR_UNSUPPORTED_ENCODING
XMLPUBFUN void XMLPUBFUN void XMLPUBFUN void XMLPUBFUN void xmlParserValidityWarning(void *ctx, const char *msg,...) LIBXML_ATTR_FORMAT(2
XMLPUBFUN xmlChar * xmlStrndup(const xmlChar *cur, int len)
XMLPUBFUN const xmlChar * xmlStrcasestr(const xmlChar *str, const xmlChar *val)
XMLPUBFUN int xmlStrlen(const xmlChar *str)
XMLPUBFUN int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN int xmlStrcmp(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN int xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len)
XMLPUBFUN int xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN int xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len)
XMLPUBFUN xmlChar * xmlStrdup(const xmlChar *cur)
#define LIBXML_ATTR_FORMAT(fmt, args)