ReactOS 0.4.16-dev-2232-gc2aaa52
HTMLtree.c
Go to the documentation of this file.
1/*
2 * HTMLtree.c : implementation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9
10#define IN_LIBXML
11#include "libxml.h"
12#ifdef LIBXML_HTML_ENABLED
13
14#include <string.h> /* for memset() only ! */
15#include <ctype.h>
16#include <stdlib.h>
17
18#include <libxml/xmlmemory.h>
19#include <libxml/HTMLparser.h>
20#include <libxml/HTMLtree.h>
21#include <libxml/entities.h>
22#include <libxml/xmlerror.h>
24#include <libxml/uri.h>
25
26#include "private/buf.h"
27#include "private/error.h"
28#include "private/io.h"
29#include "private/save.h"
30
31/************************************************************************
32 * *
33 * Getting/Setting encoding meta tags *
34 * *
35 ************************************************************************/
36
45const xmlChar *
46htmlGetMetaEncoding(htmlDocPtr doc) {
47 htmlNodePtr cur;
48 const xmlChar *content;
49 const xmlChar *encoding;
50
51 if (doc == NULL)
52 return(NULL);
53 cur = doc->children;
54
55 /*
56 * Search the html
57 */
58 while (cur != NULL) {
59 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
60 if (xmlStrEqual(cur->name, BAD_CAST"html"))
61 break;
62 if (xmlStrEqual(cur->name, BAD_CAST"head"))
63 goto found_head;
64 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
65 goto found_meta;
66 }
67 cur = cur->next;
68 }
69 if (cur == NULL)
70 return(NULL);
71 cur = cur->children;
72
73 /*
74 * Search the head
75 */
76 while (cur != NULL) {
77 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
78 if (xmlStrEqual(cur->name, BAD_CAST"head"))
79 break;
80 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81 goto found_meta;
82 }
83 cur = cur->next;
84 }
85 if (cur == NULL)
86 return(NULL);
87found_head:
88 cur = cur->children;
89
90 /*
91 * Search the meta elements
92 */
93found_meta:
94 while (cur != NULL) {
95 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
96 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
97 xmlAttrPtr attr = cur->properties;
98 int http;
99 const xmlChar *value;
100
101 content = NULL;
102 http = 0;
103 while (attr != NULL) {
104 if ((attr->children != NULL) &&
105 (attr->children->type == XML_TEXT_NODE) &&
106 (attr->children->next == NULL)) {
107 value = attr->children->content;
108 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
109 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
110 http = 1;
111 else if ((value != NULL)
112 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
113 content = value;
114 if ((http != 0) && (content != NULL))
115 goto found_content;
116 }
117 attr = attr->next;
118 }
119 }
120 }
121 cur = cur->next;
122 }
123 return(NULL);
124
125found_content:
126 encoding = xmlStrstr(content, BAD_CAST"charset=");
127 if (encoding == NULL)
128 encoding = xmlStrstr(content, BAD_CAST"Charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
131 if (encoding != NULL) {
132 encoding += 8;
133 } else {
134 encoding = xmlStrstr(content, BAD_CAST"charset =");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
139 if (encoding != NULL)
140 encoding += 9;
141 }
142 if (encoding != NULL) {
143 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
144 }
145 return(encoding);
146}
147
159int
160htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
161 htmlNodePtr cur, meta = NULL, head = NULL;
162 const xmlChar *content = NULL;
163 char newcontent[100];
164
165 newcontent[0] = 0;
166
167 if (doc == NULL)
168 return(-1);
169
170 /* html isn't a real encoding it's just libxml2 way to get entities */
171 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
172 return(-1);
173
174 if (encoding != NULL) {
175 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
176 (char *)encoding);
177 newcontent[sizeof(newcontent) - 1] = 0;
178 }
179
180 cur = doc->children;
181
182 /*
183 * Search the html
184 */
185 while (cur != NULL) {
186 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
187 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
188 break;
189 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
190 goto found_head;
191 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
192 goto found_meta;
193 }
194 cur = cur->next;
195 }
196 if (cur == NULL)
197 return(-1);
198 cur = cur->children;
199
200 /*
201 * Search the head
202 */
203 while (cur != NULL) {
204 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
205 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
206 break;
207 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
208 head = cur->parent;
209 goto found_meta;
210 }
211 }
212 cur = cur->next;
213 }
214 if (cur == NULL)
215 return(-1);
216found_head:
217 head = cur;
218 if (cur->children == NULL)
219 goto create;
220 cur = cur->children;
221
222found_meta:
223 /*
224 * Search and update all the remaining the meta elements carrying
225 * encoding information
226 */
227 while (cur != NULL) {
228 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
229 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
230 xmlAttrPtr attr = cur->properties;
231 int http;
232 const xmlChar *value;
233
234 content = NULL;
235 http = 0;
236 while (attr != NULL) {
237 if ((attr->children != NULL) &&
238 (attr->children->type == XML_TEXT_NODE) &&
239 (attr->children->next == NULL)) {
240 value = attr->children->content;
241 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
242 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
243 http = 1;
244 else
245 {
246 if ((value != NULL) &&
247 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
248 content = value;
249 }
250 if ((http != 0) && (content != NULL))
251 break;
252 }
253 attr = attr->next;
254 }
255 if ((http != 0) && (content != NULL)) {
256 meta = cur;
257 break;
258 }
259
260 }
261 }
262 cur = cur->next;
263 }
264create:
265 if (meta == NULL) {
266 if ((encoding != NULL) && (head != NULL)) {
267 /*
268 * Create a new Meta element with the right attributes
269 */
270
271 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
272 if (head->children == NULL)
273 xmlAddChild(head, meta);
274 else
275 xmlAddPrevSibling(head->children, meta);
276 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
277 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
278 }
279 } else {
280 /* remove the meta tag if NULL is passed */
281 if (encoding == NULL) {
282 xmlUnlinkNode(meta);
283 xmlFreeNode(meta);
284 }
285 /* change the document only if there is a real encoding change */
286 else if (xmlStrcasestr(content, encoding) == NULL) {
287 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
288 }
289 }
290
291
292 return(0);
293}
294
303static const char* const htmlBooleanAttrs[] = {
304 "checked", "compact", "declare", "defer", "disabled", "ismap",
305 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
306 "selected", NULL
307};
308
309
318int
319htmlIsBooleanAttr(const xmlChar *name)
320{
321 int i = 0;
322
323 while (htmlBooleanAttrs[i] != NULL) {
324 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
325 return 1;
326 i++;
327 }
328 return 0;
329}
330
331#ifdef LIBXML_OUTPUT_ENABLED
332/************************************************************************
333 * *
334 * Output error handlers *
335 * *
336 ************************************************************************/
343static void
344htmlSaveErrMemory(const char *extra)
345{
347}
348
357static void
358htmlSaveErr(int code, xmlNodePtr node, const char *extra)
359{
360 const char *msg = NULL;
361
362 switch(code) {
364 msg = "string is not in UTF-8\n";
365 break;
367 msg = "invalid character value\n";
368 break;
370 msg = "unknown encoding %s\n";
371 break;
373 msg = "HTML has no DOCTYPE\n";
374 break;
375 default:
376 msg = "unexpected error number\n";
377 }
379}
380
381/************************************************************************
382 * *
383 * Dumping HTML tree content to a simple buffer *
384 * *
385 ************************************************************************/
386
398static size_t
399htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
400 int format) {
401 size_t use;
402 int ret;
403 xmlOutputBufferPtr outbuf;
404
405 if (cur == NULL) {
406 return (-1);
407 }
408 if (buf == NULL) {
409 return (-1);
410 }
411 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
412 if (outbuf == NULL) {
413 htmlSaveErrMemory("allocating HTML output buffer");
414 return (-1);
415 }
416 memset(outbuf, 0, sizeof(xmlOutputBuffer));
417 outbuf->buffer = buf;
418 outbuf->encoder = NULL;
419 outbuf->writecallback = NULL;
420 outbuf->closecallback = NULL;
421 outbuf->context = NULL;
422 outbuf->written = 0;
423
424 use = xmlBufUse(buf);
425 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
426 xmlFree(outbuf);
427 ret = xmlBufUse(buf) - use;
428 return (ret);
429}
430
442int
443htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
444 xmlBufPtr buffer;
445 size_t ret;
446
447 if ((buf == NULL) || (cur == NULL))
448 return(-1);
449
452 if (buffer == NULL)
453 return(-1);
454
455 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
456
458
459 if (ret > INT_MAX)
460 return(-1);
461 return((int) ret);
462}
463
478int
479htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
480 xmlNodePtr cur, const char *encoding, int format) {
481 xmlOutputBufferPtr buf;
483 int ret;
484
486
487 if (encoding != NULL) {
488 xmlCharEncoding enc;
489
490 enc = xmlParseCharEncoding(encoding);
491 if (enc != XML_CHAR_ENCODING_UTF8) {
493 if (handler == NULL)
494 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
495 }
496 } else {
497 /*
498 * Fallback to HTML or ASCII when the encoding is unspecified
499 */
500 if (handler == NULL)
502 if (handler == NULL)
504 }
505
506 /*
507 * save the content to a temp buffer.
508 */
509 buf = xmlOutputBufferCreateFile(out, handler);
510 if (buf == NULL) return(0);
511
512 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
513
514 ret = xmlOutputBufferClose(buf);
515 return(ret);
516}
517
527void
528htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
529 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
530}
531
542void
543htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
544 xmlOutputBufferPtr buf;
546 const char *encoding;
547
549
550 if ((mem == NULL) || (size == NULL))
551 return;
552 if (cur == NULL) {
553 *mem = NULL;
554 *size = 0;
555 return;
556 }
557
558 encoding = (const char *) htmlGetMetaEncoding(cur);
559
560 if (encoding != NULL) {
561 xmlCharEncoding enc;
562
563 enc = xmlParseCharEncoding(encoding);
564 if (enc != XML_CHAR_ENCODING_UTF8) {
566 if (handler == NULL)
567 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
568
569 }
570 } else {
571 /*
572 * Fallback to HTML or ASCII when the encoding is unspecified
573 */
574 if (handler == NULL)
576 if (handler == NULL)
578 }
579
580 buf = xmlAllocOutputBufferInternal(handler);
581 if (buf == NULL) {
582 *mem = NULL;
583 *size = 0;
584 return;
585 }
586
587 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
588
589 xmlOutputBufferFlush(buf);
590 if (buf->conv != NULL) {
591 *size = xmlBufUse(buf->conv);
592 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
593 } else {
594 *size = xmlBufUse(buf->buffer);
595 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
596 }
597 (void)xmlOutputBufferClose(buf);
598}
599
609void
610htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
611 htmlDocDumpMemoryFormat(cur, mem, size, 1);
612}
613
614
615/************************************************************************
616 * *
617 * Dumping HTML tree content to an I/O output buffer *
618 * *
619 ************************************************************************/
620
631static void
632htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
633 const char *encoding ATTRIBUTE_UNUSED) {
634 xmlDtdPtr cur = doc->intSubset;
635
636 if (cur == NULL) {
637 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
638 return;
639 }
640 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
641 xmlOutputBufferWriteString(buf, (const char *)cur->name);
642 if (cur->ExternalID != NULL) {
643 xmlOutputBufferWriteString(buf, " PUBLIC ");
644 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
645 if (cur->SystemID != NULL) {
646 xmlOutputBufferWriteString(buf, " ");
647 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
648 }
649 } else if (cur->SystemID != NULL &&
650 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
651 xmlOutputBufferWriteString(buf, " SYSTEM ");
652 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
653 }
654 xmlOutputBufferWriteString(buf, ">\n");
655}
656
665static void
666htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
667 xmlChar *value;
668
669 /*
670 * The html output method should not escape a & character
671 * occurring in an attribute value immediately followed by
672 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
673 * This is implemented in xmlEncodeEntitiesReentrant
674 */
675
676 if (cur == NULL) {
677 return;
678 }
679 xmlOutputBufferWriteString(buf, " ");
680 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
681 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
682 xmlOutputBufferWriteString(buf, ":");
683 }
684 xmlOutputBufferWriteString(buf, (const char *)cur->name);
685 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
686 value = xmlNodeListGetString(doc, cur->children, 0);
687 if (value) {
688 xmlOutputBufferWriteString(buf, "=");
689 if ((cur->ns == NULL) && (cur->parent != NULL) &&
690 (cur->parent->ns == NULL) &&
691 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
692 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
693 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
694 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
695 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
696 xmlChar *escaped;
697 xmlChar *tmp = value;
698
699 while (IS_BLANK_CH(*tmp)) tmp++;
700
701 /*
702 * Angle brackets are technically illegal in URIs, but they're
703 * used in server side includes, for example. Curly brackets
704 * are illegal as well and often used in templates.
705 * Don't escape non-whitespace, printable ASCII chars for
706 * improved interoperability. Only escape space, control
707 * and non-ASCII chars.
708 */
709 escaped = xmlURIEscapeStr(tmp,
710 BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
711 if (escaped != NULL) {
712 xmlBufWriteQuotedString(buf->buffer, escaped);
713 xmlFree(escaped);
714 } else {
716 }
717 } else {
719 }
720 xmlFree(value);
721 } else {
722 xmlOutputBufferWriteString(buf, "=\"\"");
723 }
724 }
725}
726
737void
738htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
739 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
740 int format) {
741 xmlNodePtr root, parent;
742 xmlAttrPtr attr;
743 const htmlElemDesc * info;
744
746
747 if ((cur == NULL) || (buf == NULL)) {
748 return;
749 }
750
751 root = cur;
752 parent = cur->parent;
753 while (1) {
754 switch (cur->type) {
755 case XML_HTML_DOCUMENT_NODE:
756 case XML_DOCUMENT_NODE:
757 if (((xmlDocPtr) cur)->intSubset != NULL) {
758 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
759 }
760 if (cur->children != NULL) {
761 /* Always validate cur->parent when descending. */
762 if (cur->parent == parent) {
763 parent = cur;
764 cur = cur->children;
765 continue;
766 }
767 } else {
768 xmlOutputBufferWriteString(buf, "\n");
769 }
770 break;
771
772 case XML_ELEMENT_NODE:
773 /*
774 * Some users like lxml are known to pass nodes with a corrupted
775 * tree structure. Fall back to a recursive call to handle this
776 * case.
777 */
778 if ((cur->parent != parent) && (cur->children != NULL)) {
779 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
780 break;
781 }
782
783 /*
784 * Get specific HTML info for that node.
785 */
786 if (cur->ns == NULL)
787 info = htmlTagLookup(cur->name);
788 else
789 info = NULL;
790
791 xmlOutputBufferWriteString(buf, "<");
792 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
793 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
794 xmlOutputBufferWriteString(buf, ":");
795 }
796 xmlOutputBufferWriteString(buf, (const char *)cur->name);
797 if (cur->nsDef)
798 xmlNsListDumpOutput(buf, cur->nsDef);
799 attr = cur->properties;
800 while (attr != NULL) {
801 htmlAttrDumpOutput(buf, doc, attr);
802 attr = attr->next;
803 }
804
805 if ((info != NULL) && (info->empty)) {
806 xmlOutputBufferWriteString(buf, ">");
807 } else if (cur->children == NULL) {
808 if ((info != NULL) && (info->saveEndTag != 0) &&
809 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
810 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
811 xmlOutputBufferWriteString(buf, ">");
812 } else {
813 xmlOutputBufferWriteString(buf, "></");
814 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
815 xmlOutputBufferWriteString(buf,
816 (const char *)cur->ns->prefix);
817 xmlOutputBufferWriteString(buf, ":");
818 }
819 xmlOutputBufferWriteString(buf, (const char *)cur->name);
820 xmlOutputBufferWriteString(buf, ">");
821 }
822 } else {
823 xmlOutputBufferWriteString(buf, ">");
824 if ((format) && (info != NULL) && (!info->isinline) &&
825 (cur->children->type != HTML_TEXT_NODE) &&
826 (cur->children->type != HTML_ENTITY_REF_NODE) &&
827 (cur->children != cur->last) &&
828 (cur->name != NULL) &&
829 (cur->name[0] != 'p')) /* p, pre, param */
830 xmlOutputBufferWriteString(buf, "\n");
831 parent = cur;
832 cur = cur->children;
833 continue;
834 }
835
836 if ((format) && (cur->next != NULL) &&
837 (info != NULL) && (!info->isinline)) {
838 if ((cur->next->type != HTML_TEXT_NODE) &&
839 (cur->next->type != HTML_ENTITY_REF_NODE) &&
840 (parent != NULL) &&
841 (parent->name != NULL) &&
842 (parent->name[0] != 'p')) /* p, pre, param */
843 xmlOutputBufferWriteString(buf, "\n");
844 }
845
846 break;
847
848 case XML_ATTRIBUTE_NODE:
849 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
850 break;
851
852 case HTML_TEXT_NODE:
853 if (cur->content == NULL)
854 break;
855 if (((cur->name == (const xmlChar *)xmlStringText) ||
856 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
857 ((parent == NULL) ||
858 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
859 (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
861
862 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
863 if (buffer != NULL) {
864 xmlOutputBufferWriteString(buf, (const char *)buffer);
866 }
867 } else {
868 xmlOutputBufferWriteString(buf, (const char *)cur->content);
869 }
870 break;
871
872 case HTML_COMMENT_NODE:
873 if (cur->content != NULL) {
874 xmlOutputBufferWriteString(buf, "<!--");
875 xmlOutputBufferWriteString(buf, (const char *)cur->content);
876 xmlOutputBufferWriteString(buf, "-->");
877 }
878 break;
879
880 case HTML_PI_NODE:
881 if (cur->name != NULL) {
882 xmlOutputBufferWriteString(buf, "<?");
883 xmlOutputBufferWriteString(buf, (const char *)cur->name);
884 if (cur->content != NULL) {
885 xmlOutputBufferWriteString(buf, " ");
886 xmlOutputBufferWriteString(buf,
887 (const char *)cur->content);
888 }
889 xmlOutputBufferWriteString(buf, ">");
890 }
891 break;
892
893 case HTML_ENTITY_REF_NODE:
894 xmlOutputBufferWriteString(buf, "&");
895 xmlOutputBufferWriteString(buf, (const char *)cur->name);
896 xmlOutputBufferWriteString(buf, ";");
897 break;
898
899 case HTML_PRESERVE_NODE:
900 if (cur->content != NULL) {
901 xmlOutputBufferWriteString(buf, (const char *)cur->content);
902 }
903 break;
904
905 default:
906 break;
907 }
908
909 while (1) {
910 if (cur == root)
911 return;
912 if (cur->next != NULL) {
913 cur = cur->next;
914 break;
915 }
916
917 cur = parent;
918 /* cur->parent was validated when descending. */
919 parent = cur->parent;
920
921 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
922 (cur->type == XML_DOCUMENT_NODE)) {
923 xmlOutputBufferWriteString(buf, "\n");
924 } else {
925 if ((format) && (cur->ns == NULL))
926 info = htmlTagLookup(cur->name);
927 else
928 info = NULL;
929
930 if ((format) && (info != NULL) && (!info->isinline) &&
931 (cur->last->type != HTML_TEXT_NODE) &&
932 (cur->last->type != HTML_ENTITY_REF_NODE) &&
933 (cur->children != cur->last) &&
934 (cur->name != NULL) &&
935 (cur->name[0] != 'p')) /* p, pre, param */
936 xmlOutputBufferWriteString(buf, "\n");
937
938 xmlOutputBufferWriteString(buf, "</");
939 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
940 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
941 xmlOutputBufferWriteString(buf, ":");
942 }
943 xmlOutputBufferWriteString(buf, (const char *)cur->name);
944 xmlOutputBufferWriteString(buf, ">");
945
946 if ((format) && (info != NULL) && (!info->isinline) &&
947 (cur->next != NULL)) {
948 if ((cur->next->type != HTML_TEXT_NODE) &&
949 (cur->next->type != HTML_ENTITY_REF_NODE) &&
950 (parent != NULL) &&
951 (parent->name != NULL) &&
952 (parent->name[0] != 'p')) /* p, pre, param */
953 xmlOutputBufferWriteString(buf, "\n");
954 }
955 }
956 }
957 }
958}
959
970void
971htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
972 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
973 htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
974}
975
985void
986htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
987 const char *encoding ATTRIBUTE_UNUSED,
988 int format) {
989 int type = 0;
990 if (cur) {
991 type = cur->type;
992 cur->type = XML_HTML_DOCUMENT_NODE;
993 }
994 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
995 if (cur)
996 cur->type = (xmlElementType) type;
997}
998
1007void
1008htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1009 const char *encoding ATTRIBUTE_UNUSED) {
1010 htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1011}
1012
1013/************************************************************************
1014 * *
1015 * Saving functions front-ends *
1016 * *
1017 ************************************************************************/
1018
1028int
1029htmlDocDump(FILE *f, xmlDocPtr cur) {
1030 xmlOutputBufferPtr buf;
1032 const char *encoding;
1033 int ret;
1034
1035 xmlInitParser();
1036
1037 if ((cur == NULL) || (f == NULL)) {
1038 return(-1);
1039 }
1040
1041 encoding = (const char *) htmlGetMetaEncoding(cur);
1042
1043 if (encoding != NULL) {
1044 xmlCharEncoding enc;
1045
1046 enc = xmlParseCharEncoding(encoding);
1047 if (enc != XML_CHAR_ENCODING_UTF8) {
1049 if (handler == NULL)
1050 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1051 }
1052 } else {
1053 /*
1054 * Fallback to HTML or ASCII when the encoding is unspecified
1055 */
1056 if (handler == NULL)
1058 if (handler == NULL)
1060 }
1061
1062 buf = xmlOutputBufferCreateFile(f, handler);
1063 if (buf == NULL) return(-1);
1064 htmlDocContentDumpOutput(buf, cur, NULL);
1065
1066 ret = xmlOutputBufferClose(buf);
1067 return(ret);
1068}
1069
1079int
1080htmlSaveFile(const char *filename, xmlDocPtr cur) {
1081 xmlOutputBufferPtr buf;
1083 const char *encoding;
1084 int ret;
1085
1086 if ((cur == NULL) || (filename == NULL))
1087 return(-1);
1088
1089 xmlInitParser();
1090
1091 encoding = (const char *) htmlGetMetaEncoding(cur);
1092
1093 if (encoding != NULL) {
1094 xmlCharEncoding enc;
1095
1096 enc = xmlParseCharEncoding(encoding);
1097 if (enc != XML_CHAR_ENCODING_UTF8) {
1099 if (handler == NULL)
1100 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1101 }
1102 } else {
1103 /*
1104 * Fallback to HTML or ASCII when the encoding is unspecified
1105 */
1106 if (handler == NULL)
1108 if (handler == NULL)
1110 }
1111
1112 /*
1113 * save the content to a temp buffer.
1114 */
1115 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1116 if (buf == NULL) return(0);
1117
1118 htmlDocContentDumpOutput(buf, cur, NULL);
1119
1120 ret = xmlOutputBufferClose(buf);
1121 return(ret);
1122}
1123
1135int
1136htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1137 const char *encoding, int format) {
1138 xmlOutputBufferPtr buf;
1140 int ret;
1141
1142 if ((cur == NULL) || (filename == NULL))
1143 return(-1);
1144
1145 xmlInitParser();
1146
1147 if (encoding != NULL) {
1148 xmlCharEncoding enc;
1149
1150 enc = xmlParseCharEncoding(encoding);
1151 if (enc != XML_CHAR_ENCODING_UTF8) {
1153 if (handler == NULL)
1154 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1155 }
1156 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1157 } else {
1158 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1159
1160 /*
1161 * Fallback to HTML or ASCII when the encoding is unspecified
1162 */
1163 if (handler == NULL)
1165 if (handler == NULL)
1167 }
1168
1169 /*
1170 * save the content to a temp buffer.
1171 */
1172 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1173 if (buf == NULL) return(0);
1174
1175 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1176
1177 ret = xmlOutputBufferClose(buf);
1178 return(ret);
1179}
1180
1192int
1193htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1194 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1195}
1196
1197#endif /* LIBXML_OUTPUT_ENABLED */
1198
1199#endif /* LIBXML_HTML_ENABLED */
struct outqueuenode * head
Definition: adnsresfilter.c:66
#define msg(x)
Definition: auth_time.c:54
struct _root root
xmlBufPtr xmlBufFromBuffer(xmlBufferPtr buffer)
Definition: buf.c:902
xmlChar * xmlBufContent(const xmlBuf *buf)
Definition: buf.c:490
int xmlBufWriteQuotedString(xmlBufPtr buf, const xmlChar *string)
Definition: buf.c:852
xmlBufferPtr xmlBufBackToBuffer(xmlBufPtr buf)
Definition: buf.c:938
size_t xmlBufUse(const xmlBufPtr buf)
Definition: buf.c:570
#define NULL
Definition: types.h:112
content
Definition: atl_ax.c:994
UINT(* handler)(MSIPACKAGE *)
Definition: action.c:7512
#define INT_MAX
Definition: limits.h:26
return ret
Definition: mutex.c:146
r parent
Definition: btrfs.c:3010
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name)
Definition: encoding.c:1677
xmlCharEncoding xmlParseCharEncoding(const char *name)
Definition: encoding.c:1148
xmlCharEncoding
Definition: encoding.h:65
@ XML_CHAR_ENCODING_UTF8
Definition: encoding.h:68
FxCollectionEntry * cur
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLuint buffer
Definition: glext.h:5915
GLsizeiptr size
Definition: glext.h:5919
GLfloat f
Definition: glext.h:7540
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define ATTRIBUTE_UNUSED
Definition: i386-dis.c:36
@ extra
Definition: id3.c:95
const char * filename
Definition: ioapi.h:137
XMLPUBFUN xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input)
static const struct access_res create[16]
Definition: package.c:7505
static WCHAR http[]
Definition: url.c:30
XMLPUBVAR const xmlChar xmlStringTextNoenc[]
XMLPUBVAR const xmlChar xmlStringText[]
#define IS_BLANK_CH(c)
xmlFreeFunc xmlFree
Definition: globals.c:184
xmlMallocFunc xmlMalloc
Definition: globals.c:193
XML_GLOBALS_PARSER XMLPUBFUN void xmlInitParser(void)
Definition: threads.c:569
XML_HIDDEN void XML_HIDDEN void __xmlSimpleError(int domain, int code, struct _xmlNode *node, const char *msg, const char *extra) LIBXML_ATTR_FORMAT(4
#define memset(x, y, z)
Definition: compat.h:39
Definition: cookie.c:202
WCHAR * name
Definition: cookie.c:203
Definition: inflate.c:139
Definition: format.c:58
Definition: mem.c:349
Definition: name.c:39
Definition: dlist.c:348
Definition: pdh_main.c:96
XMLPUBFUN xmlChar * xmlURIEscapeStr(const xmlChar *str, const xmlChar *list)
Definition: uri.c:1689
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
Definition: wcsftime.cpp:383
#define snprintf
Definition: wintirpc.h:48
@ XML_FROM_OUTPUT
Definition: xmlerror.h:44
@ XML_SAVE_CHAR_INVALID
Definition: xmlerror.h:415
@ XML_SAVE_NOT_UTF8
Definition: xmlerror.h:414
@ XML_SAVE_NO_DOCTYPE
Definition: xmlerror.h:416
@ XML_SAVE_UNKNOWN_ENCODING
Definition: xmlerror.h:417
@ XML_ERR_NO_MEMORY
Definition: xmlerror.h:102
XMLPUBFUN xmlChar * xmlStrndup(const xmlChar *cur, int len)
Definition: xmlstring.c:45
XMLPUBFUN const xmlChar * xmlStrcasestr(const xmlChar *str, const xmlChar *val)
Definition: xmlstring.c:375
XMLPUBFUN int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2)
Definition: xmlstring.c:277
XMLPUBFUN int xmlStrcmp(const xmlChar *str1, const xmlChar *str2)
Definition: xmlstring.c:135
XMLPUBFUN const xmlChar * xmlStrstr(const xmlChar *str, const xmlChar *val)
Definition: xmlstring.c:347
#define BAD_CAST
Definition: xmlstring.h:35
XMLPUBFUN int xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
Definition: xmlstring.c:162
unsigned char xmlChar
Definition: xmlstring.h:28