ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

HTMLtree.c
Go to the documentation of this file.
00001 /*
00002  * HTMLtree.c : implementation of access function for an HTML tree.
00003  *
00004  * See Copyright for the status of this software.
00005  *
00006  * daniel@veillard.com
00007  */
00008 
00009 
00010 #define IN_LIBXML
00011 #include "libxml.h"
00012 #ifdef LIBXML_HTML_ENABLED
00013 
00014 #include <string.h> /* for memset() only ! */
00015 
00016 #ifdef HAVE_CTYPE_H
00017 #include <ctype.h>
00018 #endif
00019 #ifdef HAVE_STDLIB_H
00020 #include <stdlib.h>
00021 #endif
00022 
00023 #include <libxml/xmlmemory.h>
00024 #include <libxml/HTMLparser.h>
00025 #include <libxml/HTMLtree.h>
00026 #include <libxml/entities.h>
00027 #include <libxml/valid.h>
00028 #include <libxml/xmlerror.h>
00029 #include <libxml/parserInternals.h>
00030 #include <libxml/globals.h>
00031 #include <libxml/uri.h>
00032 
00033 /************************************************************************
00034  *                                  *
00035  *          Getting/Setting encoding meta tags          *
00036  *                                  *
00037  ************************************************************************/
00038 
00047 const xmlChar *
00048 htmlGetMetaEncoding(htmlDocPtr doc) {
00049     htmlNodePtr cur;
00050     const xmlChar *content;
00051     const xmlChar *encoding;
00052 
00053     if (doc == NULL)
00054     return(NULL);
00055     cur = doc->children;
00056 
00057     /*
00058      * Search the html
00059      */
00060     while (cur != NULL) {
00061     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00062         if (xmlStrEqual(cur->name, BAD_CAST"html"))
00063         break;
00064         if (xmlStrEqual(cur->name, BAD_CAST"head"))
00065         goto found_head;
00066         if (xmlStrEqual(cur->name, BAD_CAST"meta"))
00067         goto found_meta;
00068     }
00069     cur = cur->next;
00070     }
00071     if (cur == NULL)
00072     return(NULL);
00073     cur = cur->children;
00074 
00075     /*
00076      * Search the head
00077      */
00078     while (cur != NULL) {
00079     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00080         if (xmlStrEqual(cur->name, BAD_CAST"head"))
00081         break;
00082         if (xmlStrEqual(cur->name, BAD_CAST"meta"))
00083         goto found_meta;
00084     }
00085     cur = cur->next;
00086     }
00087     if (cur == NULL)
00088     return(NULL);
00089 found_head:
00090     cur = cur->children;
00091 
00092     /*
00093      * Search the meta elements
00094      */
00095 found_meta:
00096     while (cur != NULL) {
00097     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00098         if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
00099         xmlAttrPtr attr = cur->properties;
00100         int http;
00101         const xmlChar *value;
00102 
00103         content = NULL;
00104         http = 0;
00105         while (attr != NULL) {
00106             if ((attr->children != NULL) &&
00107                 (attr->children->type == XML_TEXT_NODE) &&
00108                 (attr->children->next == NULL)) {
00109             value = attr->children->content;
00110             if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
00111              && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
00112                 http = 1;
00113             else if ((value != NULL)
00114              && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
00115                 content = value;
00116             if ((http != 0) && (content != NULL))
00117                 goto found_content;
00118             }
00119             attr = attr->next;
00120         }
00121         }
00122     }
00123     cur = cur->next;
00124     }
00125     return(NULL);
00126 
00127 found_content:
00128     encoding = xmlStrstr(content, BAD_CAST"charset=");
00129     if (encoding == NULL) 
00130     encoding = xmlStrstr(content, BAD_CAST"Charset=");
00131     if (encoding == NULL) 
00132     encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
00133     if (encoding != NULL) {
00134     encoding += 8;
00135     } else {
00136     encoding = xmlStrstr(content, BAD_CAST"charset =");
00137     if (encoding == NULL) 
00138         encoding = xmlStrstr(content, BAD_CAST"Charset =");
00139     if (encoding == NULL) 
00140         encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
00141     if (encoding != NULL)
00142         encoding += 9;
00143     }
00144     if (encoding != NULL) {
00145     while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
00146     }
00147     return(encoding);
00148 }
00149 
00161 int
00162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
00163     htmlNodePtr cur, meta = NULL, head = NULL;
00164     const xmlChar *content = NULL;
00165     char newcontent[100];
00166 
00167 
00168     if (doc == NULL)
00169     return(-1);
00170 
00171     /* html isn't a real encoding it's just libxml2 way to get entities */
00172     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
00173         return(-1);
00174 
00175     if (encoding != NULL) {
00176     snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
00177                 (char *)encoding);
00178     newcontent[sizeof(newcontent) - 1] = 0;
00179     }
00180 
00181     cur = doc->children;
00182 
00183     /*
00184      * Search the html
00185      */
00186     while (cur != NULL) {
00187     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00188         if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
00189         break;
00190         if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
00191         goto found_head;
00192         if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
00193         goto found_meta;
00194     }
00195     cur = cur->next;
00196     }
00197     if (cur == NULL)
00198     return(-1);
00199     cur = cur->children;
00200 
00201     /*
00202      * Search the head
00203      */
00204     while (cur != NULL) {
00205     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00206         if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
00207         break;
00208         if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
00209                 head = cur->parent;
00210         goto found_meta;
00211             }
00212     }
00213     cur = cur->next;
00214     }
00215     if (cur == NULL)
00216     return(-1);
00217 found_head:
00218     head = cur;
00219     if (cur->children == NULL)
00220         goto create;
00221     cur = cur->children;
00222 
00223 found_meta:
00224     /*
00225      * Search and update all the remaining the meta elements carrying
00226      * encoding informations
00227      */
00228     while (cur != NULL) {
00229     if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
00230         if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
00231         xmlAttrPtr attr = cur->properties;
00232         int http;
00233         const xmlChar *value;
00234 
00235         content = NULL;
00236         http = 0;
00237         while (attr != NULL) {
00238             if ((attr->children != NULL) &&
00239                 (attr->children->type == XML_TEXT_NODE) &&
00240                 (attr->children->next == NULL)) {
00241             value = attr->children->content;
00242             if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
00243              && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
00244                 http = 1;
00245             else
00246                         {
00247                            if ((value != NULL) && 
00248                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
00249                    content = value;
00250                         }
00251                 if ((http != 0) && (content != NULL))
00252                 break;
00253             }
00254             attr = attr->next;
00255         }
00256         if ((http != 0) && (content != NULL)) {
00257             meta = cur;
00258             break;
00259         }
00260 
00261         }
00262     }
00263     cur = cur->next;
00264     }
00265 create:
00266     if (meta == NULL) {
00267         if ((encoding != NULL) && (head != NULL)) {
00268             /*
00269              * Create a new Meta element with the right attributes
00270              */
00271 
00272             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
00273             if (head->children == NULL)
00274                 xmlAddChild(head, meta);
00275             else
00276                 xmlAddPrevSibling(head->children, meta);
00277             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
00278             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
00279         }
00280     } else {
00281         /* change the document only if there is a real encoding change */
00282         if (xmlStrcasestr(content, encoding) == NULL) {
00283             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
00284         }
00285     }
00286 
00287 
00288     return(0);
00289 }
00290 
00299 static const char* htmlBooleanAttrs[] = {
00300   "checked", "compact", "declare", "defer", "disabled", "ismap",
00301   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
00302   "selected", NULL
00303 };
00304 
00305 
00314 int
00315 htmlIsBooleanAttr(const xmlChar *name)
00316 {
00317     int i = 0;
00318 
00319     while (htmlBooleanAttrs[i] != NULL) {
00320         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
00321             return 1;
00322         i++;
00323     }
00324     return 0;
00325 }
00326 
00327 #ifdef LIBXML_OUTPUT_ENABLED
00328 /*
00329  * private routine exported from xmlIO.c
00330  */
00331 xmlOutputBufferPtr
00332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
00333 /************************************************************************
00334  *                                  *
00335  *          Output error handlers               *
00336  *                                  *
00337  ************************************************************************/
00344 static void
00345 htmlSaveErrMemory(const char *extra)
00346 {
00347     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
00348 }
00349 
00358 static void
00359 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
00360 {
00361     const char *msg = NULL;
00362 
00363     switch(code) {
00364         case XML_SAVE_NOT_UTF8:
00365         msg = "string is not in UTF-8\n";
00366         break;
00367     case XML_SAVE_CHAR_INVALID:
00368         msg = "invalid character value\n";
00369         break;
00370     case XML_SAVE_UNKNOWN_ENCODING:
00371         msg = "unknown encoding %s\n";
00372         break;
00373     case XML_SAVE_NO_DOCTYPE:
00374         msg = "HTML has no DOCTYPE\n";
00375         break;
00376     default:
00377         msg = "unexpected error number\n";
00378     }
00379     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
00380 }
00381 
00382 /************************************************************************
00383  *                                  *
00384  *          Dumping HTML tree content to a simple buffer        *
00385  *                                  *
00386  ************************************************************************/
00387 
00388 static int
00389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
00390                int format);
00391 
00403 static int
00404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
00405                int format) {
00406     unsigned int use;
00407     int ret;
00408     xmlOutputBufferPtr outbuf;
00409 
00410     if (cur == NULL) {
00411     return (-1);
00412     }
00413     if (buf == NULL) {
00414     return (-1);
00415     }
00416     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
00417     if (outbuf == NULL) {
00418         htmlSaveErrMemory("allocating HTML output buffer");
00419     return (-1);
00420     }
00421     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
00422     outbuf->buffer = buf;
00423     outbuf->encoder = NULL;
00424     outbuf->writecallback = NULL;
00425     outbuf->closecallback = NULL;
00426     outbuf->context = NULL;
00427     outbuf->written = 0;
00428 
00429     use = buf->use;
00430     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
00431     xmlFree(outbuf);
00432     ret = buf->use - use;
00433     return (ret);
00434 }
00435 
00447 int
00448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
00449     xmlInitParser();
00450 
00451     return(htmlNodeDumpFormat(buf, doc, cur, 1));
00452 }
00453 
00468 int
00469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
00470                    xmlNodePtr cur, const char *encoding, int format) {
00471     xmlOutputBufferPtr buf;
00472     xmlCharEncodingHandlerPtr handler = NULL;
00473     int ret;
00474 
00475     xmlInitParser();
00476 
00477     if (encoding != NULL) {
00478     xmlCharEncoding enc;
00479 
00480     enc = xmlParseCharEncoding(encoding);
00481     if (enc != XML_CHAR_ENCODING_UTF8) {
00482         handler = xmlFindCharEncodingHandler(encoding);
00483         if (handler == NULL)
00484         return(-1);
00485     }
00486     }
00487 
00488     /*
00489      * Fallback to HTML or ASCII when the encoding is unspecified
00490      */
00491     if (handler == NULL)
00492     handler = xmlFindCharEncodingHandler("HTML");
00493     if (handler == NULL)
00494     handler = xmlFindCharEncodingHandler("ascii");
00495 
00496     /* 
00497      * save the content to a temp buffer.
00498      */
00499     buf = xmlOutputBufferCreateFile(out, handler);
00500     if (buf == NULL) return(0);
00501 
00502     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
00503 
00504     ret = xmlOutputBufferClose(buf);
00505     return(ret);
00506 }
00507 
00517 void
00518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
00519     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
00520 }
00521 
00532 void
00533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
00534     xmlOutputBufferPtr buf;
00535     xmlCharEncodingHandlerPtr handler = NULL;
00536     const char *encoding;
00537 
00538     xmlInitParser();
00539 
00540     if ((mem == NULL) || (size == NULL))
00541         return;
00542     if (cur == NULL) {
00543     *mem = NULL;
00544     *size = 0;
00545     return;
00546     }
00547 
00548     encoding = (const char *) htmlGetMetaEncoding(cur);
00549 
00550     if (encoding != NULL) {
00551     xmlCharEncoding enc;
00552 
00553     enc = xmlParseCharEncoding(encoding);
00554     if (enc != cur->charset) {
00555         if (cur->charset != XML_CHAR_ENCODING_UTF8) {
00556         /*
00557          * Not supported yet
00558          */
00559         *mem = NULL;
00560         *size = 0;
00561         return;
00562         }
00563 
00564         handler = xmlFindCharEncodingHandler(encoding);
00565         if (handler == NULL) {
00566         *mem = NULL;
00567         *size = 0;
00568         return;
00569         }
00570     } else {
00571         handler = xmlFindCharEncodingHandler(encoding);
00572     }
00573     }
00574 
00575     /*
00576      * Fallback to HTML or ASCII when the encoding is unspecified
00577      */
00578     if (handler == NULL)
00579     handler = xmlFindCharEncodingHandler("HTML");
00580     if (handler == NULL)
00581     handler = xmlFindCharEncodingHandler("ascii");
00582 
00583     buf = xmlAllocOutputBufferInternal(handler);
00584     if (buf == NULL) {
00585     *mem = NULL;
00586     *size = 0;
00587     return;
00588     }
00589 
00590     htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
00591 
00592     xmlOutputBufferFlush(buf);
00593     if (buf->conv != NULL) {
00594     *size = buf->conv->use;
00595     *mem = xmlStrndup(buf->conv->content, *size);
00596     } else {
00597     *size = buf->buffer->use;
00598     *mem = xmlStrndup(buf->buffer->content, *size);
00599     }
00600     (void)xmlOutputBufferClose(buf);
00601 }
00602 
00612 void
00613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
00614     htmlDocDumpMemoryFormat(cur, mem, size, 1);
00615 }
00616 
00617 
00618 /************************************************************************
00619  *                                  *
00620  *          Dumping HTML tree content to an I/O output buffer   *
00621  *                                  *
00622  ************************************************************************/
00623 
00624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
00625 
00636 static void
00637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
00638               const char *encoding ATTRIBUTE_UNUSED) {
00639     xmlDtdPtr cur = doc->intSubset;
00640 
00641     if (cur == NULL) {
00642     htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
00643     return;
00644     }
00645     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
00646     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00647     if (cur->ExternalID != NULL) {
00648     xmlOutputBufferWriteString(buf, " PUBLIC ");
00649     xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
00650     if (cur->SystemID != NULL) {
00651         xmlOutputBufferWriteString(buf, " ");
00652         xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
00653     } 
00654     }  else if (cur->SystemID != NULL) {
00655     xmlOutputBufferWriteString(buf, " SYSTEM ");
00656     xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
00657     }
00658     xmlOutputBufferWriteString(buf, ">\n");
00659 }
00660 
00670 static void
00671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
00672                const char *encoding ATTRIBUTE_UNUSED) {
00673     xmlChar *value;
00674 
00675     /*
00676      * TODO: The html output method should not escape a & character
00677      *       occurring in an attribute value immediately followed by
00678      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
00679      */
00680 
00681     if (cur == NULL) {
00682     return;
00683     }
00684     xmlOutputBufferWriteString(buf, " ");
00685     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
00686         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
00687     xmlOutputBufferWriteString(buf, ":");
00688     }
00689     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00690     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
00691     value = xmlNodeListGetString(doc, cur->children, 0);
00692     if (value) {
00693         xmlOutputBufferWriteString(buf, "=");
00694         if ((cur->ns == NULL) && (cur->parent != NULL) &&
00695         (cur->parent->ns == NULL) &&
00696         ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
00697              (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
00698          (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
00699          ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
00700           (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
00701         xmlChar *escaped;
00702         xmlChar *tmp = value;
00703 
00704         while (IS_BLANK_CH(*tmp)) tmp++;
00705 
00706         escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
00707         if (escaped != NULL) {
00708             xmlBufferWriteQuotedString(buf->buffer, escaped);
00709             xmlFree(escaped);
00710         } else {
00711             xmlBufferWriteQuotedString(buf->buffer, value);
00712         }
00713         } else {
00714         xmlBufferWriteQuotedString(buf->buffer, value);
00715         }
00716         xmlFree(value);
00717     } else  {
00718         xmlOutputBufferWriteString(buf, "=\"\"");
00719     }
00720     }
00721 }
00722 
00732 static void
00733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
00734     if (cur == NULL) {
00735     return;
00736     }
00737     while (cur != NULL) {
00738         htmlAttrDumpOutput(buf, doc, cur, encoding);
00739     cur = cur->next;
00740     }
00741 }
00742 
00743 
00744 
00755 static void
00756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
00757                    xmlNodePtr cur, const char *encoding, int format) {
00758     if (cur == NULL) {
00759     return;
00760     }
00761     while (cur != NULL) {
00762         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
00763     cur = cur->next;
00764     }
00765 }
00766 
00777 void
00778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
00779                      xmlNodePtr cur, const char *encoding, int format) {
00780     const htmlElemDesc * info;
00781 
00782     xmlInitParser();
00783 
00784     if ((cur == NULL) || (buf == NULL)) {
00785     return;
00786     }
00787     /*
00788      * Special cases.
00789      */
00790     if (cur->type == XML_DTD_NODE)
00791     return;
00792     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
00793         (cur->type == XML_DOCUMENT_NODE)){
00794     htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
00795     return;
00796     }
00797     if (cur->type == XML_ATTRIBUTE_NODE) {
00798         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
00799     return;
00800     }
00801     if (cur->type == HTML_TEXT_NODE) {
00802     if (cur->content != NULL) {
00803         if (((cur->name == (const xmlChar *)xmlStringText) ||
00804          (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
00805         ((cur->parent == NULL) ||
00806          ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
00807           (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
00808         xmlChar *buffer;
00809 
00810         buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
00811         if (buffer != NULL) {
00812             xmlOutputBufferWriteString(buf, (const char *)buffer);
00813             xmlFree(buffer);
00814         }
00815         } else {
00816         xmlOutputBufferWriteString(buf, (const char *)cur->content);
00817         }
00818     }
00819     return;
00820     }
00821     if (cur->type == HTML_COMMENT_NODE) {
00822     if (cur->content != NULL) {
00823         xmlOutputBufferWriteString(buf, "<!--");
00824         xmlOutputBufferWriteString(buf, (const char *)cur->content);
00825         xmlOutputBufferWriteString(buf, "-->");
00826     }
00827     return;
00828     }
00829     if (cur->type == HTML_PI_NODE) {
00830     if (cur->name == NULL)
00831         return;
00832     xmlOutputBufferWriteString(buf, "<?");
00833     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00834     if (cur->content != NULL) {
00835         xmlOutputBufferWriteString(buf, " ");
00836         xmlOutputBufferWriteString(buf, (const char *)cur->content);
00837     }
00838     xmlOutputBufferWriteString(buf, ">");
00839     return;
00840     }
00841     if (cur->type == HTML_ENTITY_REF_NODE) {
00842         xmlOutputBufferWriteString(buf, "&");
00843     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00844         xmlOutputBufferWriteString(buf, ";");
00845     return;
00846     }
00847     if (cur->type == HTML_PRESERVE_NODE) {
00848     if (cur->content != NULL) {
00849         xmlOutputBufferWriteString(buf, (const char *)cur->content);
00850     }
00851     return;
00852     }
00853 
00854     /*
00855      * Get specific HTML info for that node.
00856      */
00857     if (cur->ns == NULL)
00858     info = htmlTagLookup(cur->name);
00859     else
00860     info = NULL;
00861 
00862     xmlOutputBufferWriteString(buf, "<");
00863     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
00864         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
00865     xmlOutputBufferWriteString(buf, ":");
00866     }
00867     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00868     if (cur->nsDef)
00869     xmlNsListDumpOutput(buf, cur->nsDef);
00870     if (cur->properties != NULL)
00871         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
00872 
00873     if ((info != NULL) && (info->empty)) {
00874         xmlOutputBufferWriteString(buf, ">");
00875     if ((format) && (!info->isinline) && (cur->next != NULL)) {
00876         if ((cur->next->type != HTML_TEXT_NODE) &&
00877         (cur->next->type != HTML_ENTITY_REF_NODE) &&
00878         (cur->parent != NULL) &&
00879         (cur->parent->name != NULL) &&
00880         (cur->parent->name[0] != 'p')) /* p, pre, param */
00881         xmlOutputBufferWriteString(buf, "\n");
00882     }
00883     return;
00884     }
00885     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
00886     (cur->children == NULL)) {
00887         if ((info != NULL) && (info->saveEndTag != 0) &&
00888         (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
00889         (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
00890         xmlOutputBufferWriteString(buf, ">");
00891     } else {
00892         xmlOutputBufferWriteString(buf, "></");
00893             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
00894                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
00895                 xmlOutputBufferWriteString(buf, ":");
00896             }
00897         xmlOutputBufferWriteString(buf, (const char *)cur->name);
00898         xmlOutputBufferWriteString(buf, ">");
00899     }
00900     if ((format) && (cur->next != NULL) &&
00901             (info != NULL) && (!info->isinline)) {
00902         if ((cur->next->type != HTML_TEXT_NODE) &&
00903         (cur->next->type != HTML_ENTITY_REF_NODE) &&
00904         (cur->parent != NULL) &&
00905         (cur->parent->name != NULL) &&
00906         (cur->parent->name[0] != 'p')) /* p, pre, param */
00907         xmlOutputBufferWriteString(buf, "\n");
00908     }
00909     return;
00910     }
00911     xmlOutputBufferWriteString(buf, ">");
00912     if ((cur->type != XML_ELEMENT_NODE) &&
00913     (cur->content != NULL)) {
00914         /*
00915          * Uses the OutputBuffer property to automatically convert
00916          * invalids to charrefs
00917          */
00918 
00919             xmlOutputBufferWriteString(buf, (const char *) cur->content);
00920     }
00921     if (cur->children != NULL) {
00922         if ((format) && (info != NULL) && (!info->isinline) &&
00923         (cur->children->type != HTML_TEXT_NODE) &&
00924         (cur->children->type != HTML_ENTITY_REF_NODE) &&
00925         (cur->children != cur->last) &&
00926         (cur->name != NULL) &&
00927         (cur->name[0] != 'p')) /* p, pre, param */
00928         xmlOutputBufferWriteString(buf, "\n");
00929     htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
00930         if ((format) && (info != NULL) && (!info->isinline) &&
00931         (cur->last->type != HTML_TEXT_NODE) &&
00932         (cur->last->type != HTML_ENTITY_REF_NODE) &&
00933         (cur->children != cur->last) &&
00934         (cur->name != NULL) &&
00935         (cur->name[0] != 'p')) /* p, pre, param */
00936         xmlOutputBufferWriteString(buf, "\n");
00937     }
00938     xmlOutputBufferWriteString(buf, "</");
00939     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
00940         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
00941     xmlOutputBufferWriteString(buf, ":");
00942     }
00943     xmlOutputBufferWriteString(buf, (const char *)cur->name);
00944     xmlOutputBufferWriteString(buf, ">");
00945     if ((format) && (info != NULL) && (!info->isinline) &&
00946     (cur->next != NULL)) {
00947         if ((cur->next->type != HTML_TEXT_NODE) &&
00948         (cur->next->type != HTML_ENTITY_REF_NODE) &&
00949         (cur->parent != NULL) &&
00950         (cur->parent->name != NULL) &&
00951         (cur->parent->name[0] != 'p')) /* p, pre, param */
00952         xmlOutputBufferWriteString(buf, "\n");
00953     }
00954 }
00955 
00966 void
00967 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
00968                xmlNodePtr cur, const char *encoding) {
00969     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
00970 }
00971 
00981 void
00982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
00983                            const char *encoding, int format) {
00984     int type;
00985 
00986     xmlInitParser();
00987 
00988     if ((buf == NULL) || (cur == NULL))
00989         return;
00990 
00991     /*
00992      * force to output the stuff as HTML, especially for entities
00993      */
00994     type = cur->type;
00995     cur->type = XML_HTML_DOCUMENT_NODE;
00996     if (cur->intSubset != NULL) {
00997         htmlDtdDumpOutput(buf, cur, NULL);
00998     }
00999     if (cur->children != NULL) {
01000         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
01001     }
01002     xmlOutputBufferWriteString(buf, "\n");
01003     cur->type = (xmlElementType) type;
01004 }
01005 
01014 void
01015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
01016                      const char *encoding) {
01017     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
01018 }
01019 
01020 /************************************************************************
01021  *                                  *
01022  *      Saving functions front-ends             *
01023  *                                  *
01024  ************************************************************************/
01025 
01035 int
01036 htmlDocDump(FILE *f, xmlDocPtr cur) {
01037     xmlOutputBufferPtr buf;
01038     xmlCharEncodingHandlerPtr handler = NULL;
01039     const char *encoding;
01040     int ret;
01041 
01042     xmlInitParser();
01043 
01044     if ((cur == NULL) || (f == NULL)) {
01045     return(-1);
01046     }
01047 
01048     encoding = (const char *) htmlGetMetaEncoding(cur);
01049 
01050     if (encoding != NULL) {
01051     xmlCharEncoding enc;
01052 
01053     enc = xmlParseCharEncoding(encoding);
01054     if (enc != cur->charset) {
01055         if (cur->charset != XML_CHAR_ENCODING_UTF8) {
01056         /*
01057          * Not supported yet
01058          */
01059         return(-1);
01060         }
01061 
01062         handler = xmlFindCharEncodingHandler(encoding);
01063         if (handler == NULL)
01064         return(-1);
01065     } else {
01066         handler = xmlFindCharEncodingHandler(encoding);
01067     }
01068     }
01069 
01070     /*
01071      * Fallback to HTML or ASCII when the encoding is unspecified
01072      */
01073     if (handler == NULL)
01074     handler = xmlFindCharEncodingHandler("HTML");
01075     if (handler == NULL)
01076     handler = xmlFindCharEncodingHandler("ascii");
01077 
01078     buf = xmlOutputBufferCreateFile(f, handler);
01079     if (buf == NULL) return(-1);
01080     htmlDocContentDumpOutput(buf, cur, NULL);
01081 
01082     ret = xmlOutputBufferClose(buf);
01083     return(ret);
01084 }
01085 
01095 int
01096 htmlSaveFile(const char *filename, xmlDocPtr cur) {
01097     xmlOutputBufferPtr buf;
01098     xmlCharEncodingHandlerPtr handler = NULL;
01099     const char *encoding;
01100     int ret;
01101 
01102     if ((cur == NULL) || (filename == NULL))
01103         return(-1);
01104        
01105     xmlInitParser();
01106 
01107     encoding = (const char *) htmlGetMetaEncoding(cur);
01108 
01109     if (encoding != NULL) {
01110     xmlCharEncoding enc;
01111 
01112     enc = xmlParseCharEncoding(encoding);
01113     if (enc != cur->charset) {
01114         if (cur->charset != XML_CHAR_ENCODING_UTF8) {
01115         /*
01116          * Not supported yet
01117          */
01118         return(-1);
01119         }
01120 
01121         handler = xmlFindCharEncodingHandler(encoding);
01122         if (handler == NULL)
01123         return(-1);
01124     }
01125     }
01126 
01127     /*
01128      * Fallback to HTML or ASCII when the encoding is unspecified
01129      */
01130     if (handler == NULL)
01131     handler = xmlFindCharEncodingHandler("HTML");
01132     if (handler == NULL)
01133     handler = xmlFindCharEncodingHandler("ascii");
01134 
01135     /* 
01136      * save the content to a temp buffer.
01137      */
01138     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
01139     if (buf == NULL) return(0);
01140 
01141     htmlDocContentDumpOutput(buf, cur, NULL);
01142 
01143     ret = xmlOutputBufferClose(buf);
01144     return(ret);
01145 }
01146 
01158 int
01159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
01160                const char *encoding, int format) {
01161     xmlOutputBufferPtr buf;
01162     xmlCharEncodingHandlerPtr handler = NULL;
01163     int ret;
01164 
01165     if ((cur == NULL) || (filename == NULL))
01166         return(-1);
01167 
01168     xmlInitParser();
01169 
01170     if (encoding != NULL) {
01171     xmlCharEncoding enc;
01172 
01173     enc = xmlParseCharEncoding(encoding);
01174     if (enc != cur->charset) {
01175         if (cur->charset != XML_CHAR_ENCODING_UTF8) {
01176         /*
01177          * Not supported yet
01178          */
01179         return(-1);
01180         }
01181 
01182         handler = xmlFindCharEncodingHandler(encoding);
01183         if (handler == NULL)
01184         return(-1);
01185     }
01186         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
01187     } else {
01188     htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
01189     }
01190 
01191     /*
01192      * Fallback to HTML or ASCII when the encoding is unspecified
01193      */
01194     if (handler == NULL)
01195     handler = xmlFindCharEncodingHandler("HTML");
01196     if (handler == NULL)
01197     handler = xmlFindCharEncodingHandler("ascii");
01198 
01199     /* 
01200      * save the content to a temp buffer.
01201      */
01202     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
01203     if (buf == NULL) return(0);
01204 
01205     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
01206 
01207     ret = xmlOutputBufferClose(buf);
01208     return(ret);
01209 }
01210 
01222 int
01223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
01224     return(htmlSaveFileFormat(filename, cur, encoding, 1));
01225 }
01226 
01227 #endif /* LIBXML_OUTPUT_ENABLED */
01228 
01229 #define bottom_HTMLtree
01230 #include "elfgcchack.h"
01231 #endif /* LIBXML_HTML_ENABLED */

Generated on Mon May 28 2012 04:34:16 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.