Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > DoxygenHTMLtree.c
Go to the documentation of this file.
00001 /* 00002 * HTMLtree.c : implementation of access function for an HTML tree. 00003 * 00004 * See Copyright for the status of this software. 00005 * 00006 * daniel@veillard.com 00007 */ 00008 00009 00010 #define IN_LIBXML 00011 #include "libxml.h" 00012 #ifdef LIBXML_HTML_ENABLED 00013 00014 #include <string.h> /* for memset() only ! */ 00015 00016 #ifdef HAVE_CTYPE_H 00017 #include <ctype.h> 00018 #endif 00019 #ifdef HAVE_STDLIB_H 00020 #include <stdlib.h> 00021 #endif 00022 00023 #include <libxml/xmlmemory.h> 00024 #include <libxml/HTMLparser.h> 00025 #include <libxml/HTMLtree.h> 00026 #include <libxml/entities.h> 00027 #include <libxml/valid.h> 00028 #include <libxml/xmlerror.h> 00029 #include <libxml/parserInternals.h> 00030 #include <libxml/globals.h> 00031 #include <libxml/uri.h> 00032 00033 /************************************************************************ 00034 * * 00035 * Getting/Setting encoding meta tags * 00036 * * 00037 ************************************************************************/ 00038 00047 const xmlChar * 00048 htmlGetMetaEncoding(htmlDocPtr doc) { 00049 htmlNodePtr cur; 00050 const xmlChar *content; 00051 const xmlChar *encoding; 00052 00053 if (doc == NULL) 00054 return(NULL); 00055 cur = doc->children; 00056 00057 /* 00058 * Search the html 00059 */ 00060 while (cur != NULL) { 00061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00062 if (xmlStrEqual(cur->name, BAD_CAST"html")) 00063 break; 00064 if (xmlStrEqual(cur->name, BAD_CAST"head")) 00065 goto found_head; 00066 if (xmlStrEqual(cur->name, BAD_CAST"meta")) 00067 goto found_meta; 00068 } 00069 cur = cur->next; 00070 } 00071 if (cur == NULL) 00072 return(NULL); 00073 cur = cur->children; 00074 00075 /* 00076 * Search the head 00077 */ 00078 while (cur != NULL) { 00079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00080 if (xmlStrEqual(cur->name, BAD_CAST"head")) 00081 break; 00082 if (xmlStrEqual(cur->name, BAD_CAST"meta")) 00083 goto found_meta; 00084 } 00085 cur = cur->next; 00086 } 00087 if (cur == NULL) 00088 return(NULL); 00089 found_head: 00090 cur = cur->children; 00091 00092 /* 00093 * Search the meta elements 00094 */ 00095 found_meta: 00096 while (cur != NULL) { 00097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) { 00099 xmlAttrPtr attr = cur->properties; 00100 int http; 00101 const xmlChar *value; 00102 00103 content = NULL; 00104 http = 0; 00105 while (attr != NULL) { 00106 if ((attr->children != NULL) && 00107 (attr->children->type == XML_TEXT_NODE) && 00108 (attr->children->next == NULL)) { 00109 value = attr->children->content; 00110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) 00111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 00112 http = 1; 00113 else if ((value != NULL) 00114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) 00115 content = value; 00116 if ((http != 0) && (content != NULL)) 00117 goto found_content; 00118 } 00119 attr = attr->next; 00120 } 00121 } 00122 } 00123 cur = cur->next; 00124 } 00125 return(NULL); 00126 00127 found_content: 00128 encoding = xmlStrstr(content, BAD_CAST"charset="); 00129 if (encoding == NULL) 00130 encoding = xmlStrstr(content, BAD_CAST"Charset="); 00131 if (encoding == NULL) 00132 encoding = xmlStrstr(content, BAD_CAST"CHARSET="); 00133 if (encoding != NULL) { 00134 encoding += 8; 00135 } else { 00136 encoding = xmlStrstr(content, BAD_CAST"charset ="); 00137 if (encoding == NULL) 00138 encoding = xmlStrstr(content, BAD_CAST"Charset ="); 00139 if (encoding == NULL) 00140 encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); 00141 if (encoding != NULL) 00142 encoding += 9; 00143 } 00144 if (encoding != NULL) { 00145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; 00146 } 00147 return(encoding); 00148 } 00149 00161 int 00162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { 00163 htmlNodePtr cur, meta = NULL, head = NULL; 00164 const xmlChar *content = NULL; 00165 char newcontent[100]; 00166 00167 00168 if (doc == NULL) 00169 return(-1); 00170 00171 /* html isn't a real encoding it's just libxml2 way to get entities */ 00172 if (!xmlStrcasecmp(encoding, BAD_CAST "html")) 00173 return(-1); 00174 00175 if (encoding != NULL) { 00176 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", 00177 (char *)encoding); 00178 newcontent[sizeof(newcontent) - 1] = 0; 00179 } 00180 00181 cur = doc->children; 00182 00183 /* 00184 * Search the html 00185 */ 00186 while (cur != NULL) { 00187 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00188 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) 00189 break; 00190 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) 00191 goto found_head; 00192 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) 00193 goto found_meta; 00194 } 00195 cur = cur->next; 00196 } 00197 if (cur == NULL) 00198 return(-1); 00199 cur = cur->children; 00200 00201 /* 00202 * Search the head 00203 */ 00204 while (cur != NULL) { 00205 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00206 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) 00207 break; 00208 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { 00209 head = cur->parent; 00210 goto found_meta; 00211 } 00212 } 00213 cur = cur->next; 00214 } 00215 if (cur == NULL) 00216 return(-1); 00217 found_head: 00218 head = cur; 00219 if (cur->children == NULL) 00220 goto create; 00221 cur = cur->children; 00222 00223 found_meta: 00224 /* 00225 * Search and update all the remaining the meta elements carrying 00226 * encoding informations 00227 */ 00228 while (cur != NULL) { 00229 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { 00230 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { 00231 xmlAttrPtr attr = cur->properties; 00232 int http; 00233 const xmlChar *value; 00234 00235 content = NULL; 00236 http = 0; 00237 while (attr != NULL) { 00238 if ((attr->children != NULL) && 00239 (attr->children->type == XML_TEXT_NODE) && 00240 (attr->children->next == NULL)) { 00241 value = attr->children->content; 00242 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) 00243 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 00244 http = 1; 00245 else 00246 { 00247 if ((value != NULL) && 00248 (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) 00249 content = value; 00250 } 00251 if ((http != 0) && (content != NULL)) 00252 break; 00253 } 00254 attr = attr->next; 00255 } 00256 if ((http != 0) && (content != NULL)) { 00257 meta = cur; 00258 break; 00259 } 00260 00261 } 00262 } 00263 cur = cur->next; 00264 } 00265 create: 00266 if (meta == NULL) { 00267 if ((encoding != NULL) && (head != NULL)) { 00268 /* 00269 * Create a new Meta element with the right attributes 00270 */ 00271 00272 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); 00273 if (head->children == NULL) 00274 xmlAddChild(head, meta); 00275 else 00276 xmlAddPrevSibling(head->children, meta); 00277 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); 00278 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); 00279 } 00280 } else { 00281 /* change the document only if there is a real encoding change */ 00282 if (xmlStrcasestr(content, encoding) == NULL) { 00283 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent); 00284 } 00285 } 00286 00287 00288 return(0); 00289 } 00290 00299 static const char* htmlBooleanAttrs[] = { 00300 "checked", "compact", "declare", "defer", "disabled", "ismap", 00301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", 00302 "selected", NULL 00303 }; 00304 00305 00314 int 00315 htmlIsBooleanAttr(const xmlChar *name) 00316 { 00317 int i = 0; 00318 00319 while (htmlBooleanAttrs[i] != NULL) { 00320 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) 00321 return 1; 00322 i++; 00323 } 00324 return 0; 00325 } 00326 00327 #ifdef LIBXML_OUTPUT_ENABLED 00328 /* 00329 * private routine exported from xmlIO.c 00330 */ 00331 xmlOutputBufferPtr 00332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); 00333 /************************************************************************ 00334 * * 00335 * Output error handlers * 00336 * * 00337 ************************************************************************/ 00344 static void 00345 htmlSaveErrMemory(const char *extra) 00346 { 00347 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); 00348 } 00349 00358 static void 00359 htmlSaveErr(int code, xmlNodePtr node, const char *extra) 00360 { 00361 const char *msg = NULL; 00362 00363 switch(code) { 00364 case XML_SAVE_NOT_UTF8: 00365 msg = "string is not in UTF-8\n"; 00366 break; 00367 case XML_SAVE_CHAR_INVALID: 00368 msg = "invalid character value\n"; 00369 break; 00370 case XML_SAVE_UNKNOWN_ENCODING: 00371 msg = "unknown encoding %s\n"; 00372 break; 00373 case XML_SAVE_NO_DOCTYPE: 00374 msg = "HTML has no DOCTYPE\n"; 00375 break; 00376 default: 00377 msg = "unexpected error number\n"; 00378 } 00379 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); 00380 } 00381 00382 /************************************************************************ 00383 * * 00384 * Dumping HTML tree content to a simple buffer * 00385 * * 00386 ************************************************************************/ 00387 00388 static int 00389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, 00390 int format); 00391 00403 static int 00404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, 00405 int format) { 00406 unsigned int use; 00407 int ret; 00408 xmlOutputBufferPtr outbuf; 00409 00410 if (cur == NULL) { 00411 return (-1); 00412 } 00413 if (buf == NULL) { 00414 return (-1); 00415 } 00416 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); 00417 if (outbuf == NULL) { 00418 htmlSaveErrMemory("allocating HTML output buffer"); 00419 return (-1); 00420 } 00421 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); 00422 outbuf->buffer = buf; 00423 outbuf->encoder = NULL; 00424 outbuf->writecallback = NULL; 00425 outbuf->closecallback = NULL; 00426 outbuf->context = NULL; 00427 outbuf->written = 0; 00428 00429 use = buf->use; 00430 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); 00431 xmlFree(outbuf); 00432 ret = buf->use - use; 00433 return (ret); 00434 } 00435 00447 int 00448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { 00449 xmlInitParser(); 00450 00451 return(htmlNodeDumpFormat(buf, doc, cur, 1)); 00452 } 00453 00468 int 00469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, 00470 xmlNodePtr cur, const char *encoding, int format) { 00471 xmlOutputBufferPtr buf; 00472 xmlCharEncodingHandlerPtr handler = NULL; 00473 int ret; 00474 00475 xmlInitParser(); 00476 00477 if (encoding != NULL) { 00478 xmlCharEncoding enc; 00479 00480 enc = xmlParseCharEncoding(encoding); 00481 if (enc != XML_CHAR_ENCODING_UTF8) { 00482 handler = xmlFindCharEncodingHandler(encoding); 00483 if (handler == NULL) 00484 return(-1); 00485 } 00486 } 00487 00488 /* 00489 * Fallback to HTML or ASCII when the encoding is unspecified 00490 */ 00491 if (handler == NULL) 00492 handler = xmlFindCharEncodingHandler("HTML"); 00493 if (handler == NULL) 00494 handler = xmlFindCharEncodingHandler("ascii"); 00495 00496 /* 00497 * save the content to a temp buffer. 00498 */ 00499 buf = xmlOutputBufferCreateFile(out, handler); 00500 if (buf == NULL) return(0); 00501 00502 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); 00503 00504 ret = xmlOutputBufferClose(buf); 00505 return(ret); 00506 } 00507 00517 void 00518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { 00519 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); 00520 } 00521 00532 void 00533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { 00534 xmlOutputBufferPtr buf; 00535 xmlCharEncodingHandlerPtr handler = NULL; 00536 const char *encoding; 00537 00538 xmlInitParser(); 00539 00540 if ((mem == NULL) || (size == NULL)) 00541 return; 00542 if (cur == NULL) { 00543 *mem = NULL; 00544 *size = 0; 00545 return; 00546 } 00547 00548 encoding = (const char *) htmlGetMetaEncoding(cur); 00549 00550 if (encoding != NULL) { 00551 xmlCharEncoding enc; 00552 00553 enc = xmlParseCharEncoding(encoding); 00554 if (enc != cur->charset) { 00555 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 00556 /* 00557 * Not supported yet 00558 */ 00559 *mem = NULL; 00560 *size = 0; 00561 return; 00562 } 00563 00564 handler = xmlFindCharEncodingHandler(encoding); 00565 if (handler == NULL) { 00566 *mem = NULL; 00567 *size = 0; 00568 return; 00569 } 00570 } else { 00571 handler = xmlFindCharEncodingHandler(encoding); 00572 } 00573 } 00574 00575 /* 00576 * Fallback to HTML or ASCII when the encoding is unspecified 00577 */ 00578 if (handler == NULL) 00579 handler = xmlFindCharEncodingHandler("HTML"); 00580 if (handler == NULL) 00581 handler = xmlFindCharEncodingHandler("ascii"); 00582 00583 buf = xmlAllocOutputBufferInternal(handler); 00584 if (buf == NULL) { 00585 *mem = NULL; 00586 *size = 0; 00587 return; 00588 } 00589 00590 htmlDocContentDumpFormatOutput(buf, cur, NULL, format); 00591 00592 xmlOutputBufferFlush(buf); 00593 if (buf->conv != NULL) { 00594 *size = buf->conv->use; 00595 *mem = xmlStrndup(buf->conv->content, *size); 00596 } else { 00597 *size = buf->buffer->use; 00598 *mem = xmlStrndup(buf->buffer->content, *size); 00599 } 00600 (void)xmlOutputBufferClose(buf); 00601 } 00602 00612 void 00613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { 00614 htmlDocDumpMemoryFormat(cur, mem, size, 1); 00615 } 00616 00617 00618 /************************************************************************ 00619 * * 00620 * Dumping HTML tree content to an I/O output buffer * 00621 * * 00622 ************************************************************************/ 00623 00624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); 00625 00636 static void 00637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 00638 const char *encoding ATTRIBUTE_UNUSED) { 00639 xmlDtdPtr cur = doc->intSubset; 00640 00641 if (cur == NULL) { 00642 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); 00643 return; 00644 } 00645 xmlOutputBufferWriteString(buf, "<!DOCTYPE "); 00646 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00647 if (cur->ExternalID != NULL) { 00648 xmlOutputBufferWriteString(buf, " PUBLIC "); 00649 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); 00650 if (cur->SystemID != NULL) { 00651 xmlOutputBufferWriteString(buf, " "); 00652 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); 00653 } 00654 } else if (cur->SystemID != NULL) { 00655 xmlOutputBufferWriteString(buf, " SYSTEM "); 00656 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); 00657 } 00658 xmlOutputBufferWriteString(buf, ">\n"); 00659 } 00660 00670 static void 00671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, 00672 const char *encoding ATTRIBUTE_UNUSED) { 00673 xmlChar *value; 00674 00675 /* 00676 * TODO: The html output method should not escape a & character 00677 * occurring in an attribute value immediately followed by 00678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). 00679 */ 00680 00681 if (cur == NULL) { 00682 return; 00683 } 00684 xmlOutputBufferWriteString(buf, " "); 00685 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { 00686 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); 00687 xmlOutputBufferWriteString(buf, ":"); 00688 } 00689 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00690 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { 00691 value = xmlNodeListGetString(doc, cur->children, 0); 00692 if (value) { 00693 xmlOutputBufferWriteString(buf, "="); 00694 if ((cur->ns == NULL) && (cur->parent != NULL) && 00695 (cur->parent->ns == NULL) && 00696 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || 00697 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || 00698 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || 00699 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && 00700 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { 00701 xmlChar *escaped; 00702 xmlChar *tmp = value; 00703 00704 while (IS_BLANK_CH(*tmp)) tmp++; 00705 00706 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); 00707 if (escaped != NULL) { 00708 xmlBufferWriteQuotedString(buf->buffer, escaped); 00709 xmlFree(escaped); 00710 } else { 00711 xmlBufferWriteQuotedString(buf->buffer, value); 00712 } 00713 } else { 00714 xmlBufferWriteQuotedString(buf->buffer, value); 00715 } 00716 xmlFree(value); 00717 } else { 00718 xmlOutputBufferWriteString(buf, "=\"\""); 00719 } 00720 } 00721 } 00722 00732 static void 00733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { 00734 if (cur == NULL) { 00735 return; 00736 } 00737 while (cur != NULL) { 00738 htmlAttrDumpOutput(buf, doc, cur, encoding); 00739 cur = cur->next; 00740 } 00741 } 00742 00743 00744 00755 static void 00756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 00757 xmlNodePtr cur, const char *encoding, int format) { 00758 if (cur == NULL) { 00759 return; 00760 } 00761 while (cur != NULL) { 00762 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); 00763 cur = cur->next; 00764 } 00765 } 00766 00777 void 00778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 00779 xmlNodePtr cur, const char *encoding, int format) { 00780 const htmlElemDesc * info; 00781 00782 xmlInitParser(); 00783 00784 if ((cur == NULL) || (buf == NULL)) { 00785 return; 00786 } 00787 /* 00788 * Special cases. 00789 */ 00790 if (cur->type == XML_DTD_NODE) 00791 return; 00792 if ((cur->type == XML_HTML_DOCUMENT_NODE) || 00793 (cur->type == XML_DOCUMENT_NODE)){ 00794 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); 00795 return; 00796 } 00797 if (cur->type == XML_ATTRIBUTE_NODE) { 00798 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); 00799 return; 00800 } 00801 if (cur->type == HTML_TEXT_NODE) { 00802 if (cur->content != NULL) { 00803 if (((cur->name == (const xmlChar *)xmlStringText) || 00804 (cur->name != (const xmlChar *)xmlStringTextNoenc)) && 00805 ((cur->parent == NULL) || 00806 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && 00807 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { 00808 xmlChar *buffer; 00809 00810 buffer = xmlEncodeEntitiesReentrant(doc, cur->content); 00811 if (buffer != NULL) { 00812 xmlOutputBufferWriteString(buf, (const char *)buffer); 00813 xmlFree(buffer); 00814 } 00815 } else { 00816 xmlOutputBufferWriteString(buf, (const char *)cur->content); 00817 } 00818 } 00819 return; 00820 } 00821 if (cur->type == HTML_COMMENT_NODE) { 00822 if (cur->content != NULL) { 00823 xmlOutputBufferWriteString(buf, "<!--"); 00824 xmlOutputBufferWriteString(buf, (const char *)cur->content); 00825 xmlOutputBufferWriteString(buf, "-->"); 00826 } 00827 return; 00828 } 00829 if (cur->type == HTML_PI_NODE) { 00830 if (cur->name == NULL) 00831 return; 00832 xmlOutputBufferWriteString(buf, "<?"); 00833 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00834 if (cur->content != NULL) { 00835 xmlOutputBufferWriteString(buf, " "); 00836 xmlOutputBufferWriteString(buf, (const char *)cur->content); 00837 } 00838 xmlOutputBufferWriteString(buf, ">"); 00839 return; 00840 } 00841 if (cur->type == HTML_ENTITY_REF_NODE) { 00842 xmlOutputBufferWriteString(buf, "&"); 00843 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00844 xmlOutputBufferWriteString(buf, ";"); 00845 return; 00846 } 00847 if (cur->type == HTML_PRESERVE_NODE) { 00848 if (cur->content != NULL) { 00849 xmlOutputBufferWriteString(buf, (const char *)cur->content); 00850 } 00851 return; 00852 } 00853 00854 /* 00855 * Get specific HTML info for that node. 00856 */ 00857 if (cur->ns == NULL) 00858 info = htmlTagLookup(cur->name); 00859 else 00860 info = NULL; 00861 00862 xmlOutputBufferWriteString(buf, "<"); 00863 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { 00864 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); 00865 xmlOutputBufferWriteString(buf, ":"); 00866 } 00867 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00868 if (cur->nsDef) 00869 xmlNsListDumpOutput(buf, cur->nsDef); 00870 if (cur->properties != NULL) 00871 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); 00872 00873 if ((info != NULL) && (info->empty)) { 00874 xmlOutputBufferWriteString(buf, ">"); 00875 if ((format) && (!info->isinline) && (cur->next != NULL)) { 00876 if ((cur->next->type != HTML_TEXT_NODE) && 00877 (cur->next->type != HTML_ENTITY_REF_NODE) && 00878 (cur->parent != NULL) && 00879 (cur->parent->name != NULL) && 00880 (cur->parent->name[0] != 'p')) /* p, pre, param */ 00881 xmlOutputBufferWriteString(buf, "\n"); 00882 } 00883 return; 00884 } 00885 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && 00886 (cur->children == NULL)) { 00887 if ((info != NULL) && (info->saveEndTag != 0) && 00888 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && 00889 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { 00890 xmlOutputBufferWriteString(buf, ">"); 00891 } else { 00892 xmlOutputBufferWriteString(buf, "></"); 00893 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { 00894 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); 00895 xmlOutputBufferWriteString(buf, ":"); 00896 } 00897 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00898 xmlOutputBufferWriteString(buf, ">"); 00899 } 00900 if ((format) && (cur->next != NULL) && 00901 (info != NULL) && (!info->isinline)) { 00902 if ((cur->next->type != HTML_TEXT_NODE) && 00903 (cur->next->type != HTML_ENTITY_REF_NODE) && 00904 (cur->parent != NULL) && 00905 (cur->parent->name != NULL) && 00906 (cur->parent->name[0] != 'p')) /* p, pre, param */ 00907 xmlOutputBufferWriteString(buf, "\n"); 00908 } 00909 return; 00910 } 00911 xmlOutputBufferWriteString(buf, ">"); 00912 if ((cur->type != XML_ELEMENT_NODE) && 00913 (cur->content != NULL)) { 00914 /* 00915 * Uses the OutputBuffer property to automatically convert 00916 * invalids to charrefs 00917 */ 00918 00919 xmlOutputBufferWriteString(buf, (const char *) cur->content); 00920 } 00921 if (cur->children != NULL) { 00922 if ((format) && (info != NULL) && (!info->isinline) && 00923 (cur->children->type != HTML_TEXT_NODE) && 00924 (cur->children->type != HTML_ENTITY_REF_NODE) && 00925 (cur->children != cur->last) && 00926 (cur->name != NULL) && 00927 (cur->name[0] != 'p')) /* p, pre, param */ 00928 xmlOutputBufferWriteString(buf, "\n"); 00929 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); 00930 if ((format) && (info != NULL) && (!info->isinline) && 00931 (cur->last->type != HTML_TEXT_NODE) && 00932 (cur->last->type != HTML_ENTITY_REF_NODE) && 00933 (cur->children != cur->last) && 00934 (cur->name != NULL) && 00935 (cur->name[0] != 'p')) /* p, pre, param */ 00936 xmlOutputBufferWriteString(buf, "\n"); 00937 } 00938 xmlOutputBufferWriteString(buf, "</"); 00939 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { 00940 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); 00941 xmlOutputBufferWriteString(buf, ":"); 00942 } 00943 xmlOutputBufferWriteString(buf, (const char *)cur->name); 00944 xmlOutputBufferWriteString(buf, ">"); 00945 if ((format) && (info != NULL) && (!info->isinline) && 00946 (cur->next != NULL)) { 00947 if ((cur->next->type != HTML_TEXT_NODE) && 00948 (cur->next->type != HTML_ENTITY_REF_NODE) && 00949 (cur->parent != NULL) && 00950 (cur->parent->name != NULL) && 00951 (cur->parent->name[0] != 'p')) /* p, pre, param */ 00952 xmlOutputBufferWriteString(buf, "\n"); 00953 } 00954 } 00955 00966 void 00967 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, 00968 xmlNodePtr cur, const char *encoding) { 00969 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); 00970 } 00971 00981 void 00982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, 00983 const char *encoding, int format) { 00984 int type; 00985 00986 xmlInitParser(); 00987 00988 if ((buf == NULL) || (cur == NULL)) 00989 return; 00990 00991 /* 00992 * force to output the stuff as HTML, especially for entities 00993 */ 00994 type = cur->type; 00995 cur->type = XML_HTML_DOCUMENT_NODE; 00996 if (cur->intSubset != NULL) { 00997 htmlDtdDumpOutput(buf, cur, NULL); 00998 } 00999 if (cur->children != NULL) { 01000 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); 01001 } 01002 xmlOutputBufferWriteString(buf, "\n"); 01003 cur->type = (xmlElementType) type; 01004 } 01005 01014 void 01015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, 01016 const char *encoding) { 01017 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); 01018 } 01019 01020 /************************************************************************ 01021 * * 01022 * Saving functions front-ends * 01023 * * 01024 ************************************************************************/ 01025 01035 int 01036 htmlDocDump(FILE *f, xmlDocPtr cur) { 01037 xmlOutputBufferPtr buf; 01038 xmlCharEncodingHandlerPtr handler = NULL; 01039 const char *encoding; 01040 int ret; 01041 01042 xmlInitParser(); 01043 01044 if ((cur == NULL) || (f == NULL)) { 01045 return(-1); 01046 } 01047 01048 encoding = (const char *) htmlGetMetaEncoding(cur); 01049 01050 if (encoding != NULL) { 01051 xmlCharEncoding enc; 01052 01053 enc = xmlParseCharEncoding(encoding); 01054 if (enc != cur->charset) { 01055 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 01056 /* 01057 * Not supported yet 01058 */ 01059 return(-1); 01060 } 01061 01062 handler = xmlFindCharEncodingHandler(encoding); 01063 if (handler == NULL) 01064 return(-1); 01065 } else { 01066 handler = xmlFindCharEncodingHandler(encoding); 01067 } 01068 } 01069 01070 /* 01071 * Fallback to HTML or ASCII when the encoding is unspecified 01072 */ 01073 if (handler == NULL) 01074 handler = xmlFindCharEncodingHandler("HTML"); 01075 if (handler == NULL) 01076 handler = xmlFindCharEncodingHandler("ascii"); 01077 01078 buf = xmlOutputBufferCreateFile(f, handler); 01079 if (buf == NULL) return(-1); 01080 htmlDocContentDumpOutput(buf, cur, NULL); 01081 01082 ret = xmlOutputBufferClose(buf); 01083 return(ret); 01084 } 01085 01095 int 01096 htmlSaveFile(const char *filename, xmlDocPtr cur) { 01097 xmlOutputBufferPtr buf; 01098 xmlCharEncodingHandlerPtr handler = NULL; 01099 const char *encoding; 01100 int ret; 01101 01102 if ((cur == NULL) || (filename == NULL)) 01103 return(-1); 01104 01105 xmlInitParser(); 01106 01107 encoding = (const char *) htmlGetMetaEncoding(cur); 01108 01109 if (encoding != NULL) { 01110 xmlCharEncoding enc; 01111 01112 enc = xmlParseCharEncoding(encoding); 01113 if (enc != cur->charset) { 01114 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 01115 /* 01116 * Not supported yet 01117 */ 01118 return(-1); 01119 } 01120 01121 handler = xmlFindCharEncodingHandler(encoding); 01122 if (handler == NULL) 01123 return(-1); 01124 } 01125 } 01126 01127 /* 01128 * Fallback to HTML or ASCII when the encoding is unspecified 01129 */ 01130 if (handler == NULL) 01131 handler = xmlFindCharEncodingHandler("HTML"); 01132 if (handler == NULL) 01133 handler = xmlFindCharEncodingHandler("ascii"); 01134 01135 /* 01136 * save the content to a temp buffer. 01137 */ 01138 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); 01139 if (buf == NULL) return(0); 01140 01141 htmlDocContentDumpOutput(buf, cur, NULL); 01142 01143 ret = xmlOutputBufferClose(buf); 01144 return(ret); 01145 } 01146 01158 int 01159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur, 01160 const char *encoding, int format) { 01161 xmlOutputBufferPtr buf; 01162 xmlCharEncodingHandlerPtr handler = NULL; 01163 int ret; 01164 01165 if ((cur == NULL) || (filename == NULL)) 01166 return(-1); 01167 01168 xmlInitParser(); 01169 01170 if (encoding != NULL) { 01171 xmlCharEncoding enc; 01172 01173 enc = xmlParseCharEncoding(encoding); 01174 if (enc != cur->charset) { 01175 if (cur->charset != XML_CHAR_ENCODING_UTF8) { 01176 /* 01177 * Not supported yet 01178 */ 01179 return(-1); 01180 } 01181 01182 handler = xmlFindCharEncodingHandler(encoding); 01183 if (handler == NULL) 01184 return(-1); 01185 } 01186 htmlSetMetaEncoding(cur, (const xmlChar *) encoding); 01187 } else { 01188 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); 01189 } 01190 01191 /* 01192 * Fallback to HTML or ASCII when the encoding is unspecified 01193 */ 01194 if (handler == NULL) 01195 handler = xmlFindCharEncodingHandler("HTML"); 01196 if (handler == NULL) 01197 handler = xmlFindCharEncodingHandler("ascii"); 01198 01199 /* 01200 * save the content to a temp buffer. 01201 */ 01202 buf = xmlOutputBufferCreateFilename(filename, handler, 0); 01203 if (buf == NULL) return(0); 01204 01205 htmlDocContentDumpFormatOutput(buf, cur, encoding, format); 01206 01207 ret = xmlOutputBufferClose(buf); 01208 return(ret); 01209 } 01210 01222 int 01223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { 01224 return(htmlSaveFileFormat(filename, cur, encoding, 1)); 01225 } 01226 01227 #endif /* LIBXML_OUTPUT_ENABLED */ 01228 01229 #define bottom_HTMLtree 01230 #include "elfgcchack.h" 01231 #endif /* LIBXML_HTML_ENABLED */ Generated on Mon May 28 2012 04:34:16 for ReactOS by
1.7.6.1
|