Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > DoxygentestHTML.c
Go to the documentation of this file.
00001 /* 00002 * testHTML.c : a small tester program for HTML input. 00003 * 00004 * See Copyright for the status of this software. 00005 * 00006 * daniel@veillard.com 00007 */ 00008 00009 #include "libxml.h" 00010 00011 #ifdef LIBXML_HTML_ENABLED 00012 00013 #include <string.h> 00014 #include <stdarg.h> 00015 00016 00017 #ifdef HAVE_SYS_TYPES_H 00018 #include <sys/types.h> 00019 #endif 00020 #ifdef HAVE_SYS_STAT_H 00021 #include <sys/stat.h> 00022 #endif 00023 #ifdef HAVE_FCNTL_H 00024 #include <fcntl.h> 00025 #endif 00026 #ifdef HAVE_UNISTD_H 00027 #include <unistd.h> 00028 #endif 00029 #ifdef HAVE_STDLIB_H 00030 #include <stdlib.h> 00031 #endif 00032 00033 #include <libxml/xmlmemory.h> 00034 #include <libxml/HTMLparser.h> 00035 #include <libxml/HTMLtree.h> 00036 #include <libxml/debugXML.h> 00037 #include <libxml/xmlerror.h> 00038 #include <libxml/globals.h> 00039 00040 #ifdef LIBXML_DEBUG_ENABLED 00041 static int debug = 0; 00042 #endif 00043 static int copy = 0; 00044 static int sax = 0; 00045 static int repeat = 0; 00046 static int noout = 0; 00047 #ifdef LIBXML_PUSH_ENABLED 00048 static int push = 0; 00049 #endif /* LIBXML_PUSH_ENABLED */ 00050 static char *encoding = NULL; 00051 static int options = 0; 00052 00053 static xmlSAXHandler emptySAXHandlerStruct = { 00054 NULL, /* internalSubset */ 00055 NULL, /* isStandalone */ 00056 NULL, /* hasInternalSubset */ 00057 NULL, /* hasExternalSubset */ 00058 NULL, /* resolveEntity */ 00059 NULL, /* getEntity */ 00060 NULL, /* entityDecl */ 00061 NULL, /* notationDecl */ 00062 NULL, /* attributeDecl */ 00063 NULL, /* elementDecl */ 00064 NULL, /* unparsedEntityDecl */ 00065 NULL, /* setDocumentLocator */ 00066 NULL, /* startDocument */ 00067 NULL, /* endDocument */ 00068 NULL, /* startElement */ 00069 NULL, /* endElement */ 00070 NULL, /* reference */ 00071 NULL, /* characters */ 00072 NULL, /* ignorableWhitespace */ 00073 NULL, /* processingInstruction */ 00074 NULL, /* comment */ 00075 NULL, /* xmlParserWarning */ 00076 NULL, /* xmlParserError */ 00077 NULL, /* xmlParserError */ 00078 NULL, /* getParameterEntity */ 00079 NULL, /* cdataBlock */ 00080 NULL, /* externalSubset */ 00081 1, /* initialized */ 00082 NULL, /* private */ 00083 NULL, /* startElementNsSAX2Func */ 00084 NULL, /* endElementNsSAX2Func */ 00085 NULL /* xmlStructuredErrorFunc */ 00086 }; 00087 00088 static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; 00089 extern xmlSAXHandlerPtr debugSAXHandler; 00090 00091 /************************************************************************ 00092 * * 00093 * Debug Handlers * 00094 * * 00095 ************************************************************************/ 00096 00105 static int 00106 isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED) 00107 { 00108 fprintf(stdout, "SAX.isStandalone()\n"); 00109 return(0); 00110 } 00111 00120 static int 00121 hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) 00122 { 00123 fprintf(stdout, "SAX.hasInternalSubset()\n"); 00124 return(0); 00125 } 00126 00135 static int 00136 hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) 00137 { 00138 fprintf(stdout, "SAX.hasExternalSubset()\n"); 00139 return(0); 00140 } 00141 00148 static void 00149 internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, 00150 const xmlChar *ExternalID, const xmlChar *SystemID) 00151 { 00152 fprintf(stdout, "SAX.internalSubset(%s,", name); 00153 if (ExternalID == NULL) 00154 fprintf(stdout, " ,"); 00155 else 00156 fprintf(stdout, " %s,", ExternalID); 00157 if (SystemID == NULL) 00158 fprintf(stdout, " )\n"); 00159 else 00160 fprintf(stdout, " %s)\n", SystemID); 00161 } 00162 00177 static xmlParserInputPtr 00178 resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId) 00179 { 00180 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ 00181 00182 00183 fprintf(stdout, "SAX.resolveEntity("); 00184 if (publicId != NULL) 00185 fprintf(stdout, "%s", (char *)publicId); 00186 else 00187 fprintf(stdout, " "); 00188 if (systemId != NULL) 00189 fprintf(stdout, ", %s)\n", (char *)systemId); 00190 else 00191 fprintf(stdout, ", )\n"); 00192 /********* 00193 if (systemId != NULL) { 00194 return(xmlNewInputFromFile(ctxt, (char *) systemId)); 00195 } 00196 *********/ 00197 return(NULL); 00198 } 00199 00209 static xmlEntityPtr 00210 getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) 00211 { 00212 fprintf(stdout, "SAX.getEntity(%s)\n", name); 00213 return(NULL); 00214 } 00215 00225 static xmlEntityPtr 00226 getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) 00227 { 00228 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name); 00229 return(NULL); 00230 } 00231 00232 00244 static void 00245 entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, 00246 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) 00247 { 00248 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", 00249 name, type, publicId, systemId, content); 00250 } 00251 00260 static void 00261 attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name, 00262 int type, int def, const xmlChar *defaultValue, 00263 xmlEnumerationPtr tree ATTRIBUTE_UNUSED) 00264 { 00265 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", 00266 elem, name, type, def, defaultValue); 00267 } 00268 00278 static void 00279 elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, 00280 xmlElementContentPtr content ATTRIBUTE_UNUSED) 00281 { 00282 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n", 00283 name, type); 00284 } 00285 00295 static void 00296 notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, 00297 const xmlChar *publicId, const xmlChar *systemId) 00298 { 00299 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", 00300 (char *) name, (char *) publicId, (char *) systemId); 00301 } 00302 00313 static void 00314 unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, 00315 const xmlChar *publicId, const xmlChar *systemId, 00316 const xmlChar *notationName) 00317 { 00318 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", 00319 (char *) name, (char *) publicId, (char *) systemId, 00320 (char *) notationName); 00321 } 00322 00331 static void 00332 setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED) 00333 { 00334 fprintf(stdout, "SAX.setDocumentLocator()\n"); 00335 } 00336 00343 static void 00344 startDocumentDebug(void *ctx ATTRIBUTE_UNUSED) 00345 { 00346 fprintf(stdout, "SAX.startDocument()\n"); 00347 } 00348 00355 static void 00356 endDocumentDebug(void *ctx ATTRIBUTE_UNUSED) 00357 { 00358 fprintf(stdout, "SAX.endDocument()\n"); 00359 } 00360 00368 static void 00369 startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts) 00370 { 00371 int i; 00372 00373 fprintf(stdout, "SAX.startElement(%s", (char *) name); 00374 if (atts != NULL) { 00375 for (i = 0;(atts[i] != NULL);i++) { 00376 fprintf(stdout, ", %s", atts[i++]); 00377 if (atts[i] != NULL) { 00378 unsigned char output[40]; 00379 const unsigned char *att = atts[i]; 00380 int outlen, attlen; 00381 fprintf(stdout, "='"); 00382 while ((attlen = strlen((char*)att)) > 0) { 00383 outlen = sizeof output - 1; 00384 htmlEncodeEntities(output, &outlen, att, &attlen, '\''); 00385 output[outlen] = 0; 00386 fprintf(stdout, "%s", (char *) output); 00387 att += attlen; 00388 } 00389 fprintf(stdout, "'"); 00390 } 00391 } 00392 } 00393 fprintf(stdout, ")\n"); 00394 } 00395 00403 static void 00404 endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) 00405 { 00406 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name); 00407 } 00408 00418 static void 00419 charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) 00420 { 00421 unsigned char output[40]; 00422 int inlen = len, outlen = 30; 00423 00424 htmlEncodeEntities(output, &outlen, ch, &inlen, 0); 00425 output[outlen] = 0; 00426 00427 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len); 00428 } 00429 00439 static void 00440 cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) 00441 { 00442 unsigned char output[40]; 00443 int inlen = len, outlen = 30; 00444 00445 htmlEncodeEntities(output, &outlen, ch, &inlen, 0); 00446 output[outlen] = 0; 00447 00448 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len); 00449 } 00450 00458 static void 00459 referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) 00460 { 00461 fprintf(stdout, "SAX.reference(%s)\n", name); 00462 } 00463 00474 static void 00475 ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) 00476 { 00477 char output[40]; 00478 int i; 00479 00480 for (i = 0;(i<len) && (i < 30);i++) 00481 output[i] = ch[i]; 00482 output[i] = 0; 00483 00484 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len); 00485 } 00486 00496 static void 00497 processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target, 00498 const xmlChar *data) 00499 { 00500 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n", 00501 (char *) target, (char *) data); 00502 } 00503 00511 static void 00512 commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value) 00513 { 00514 fprintf(stdout, "SAX.comment(%s)\n", value); 00515 } 00516 00526 static void XMLCDECL 00527 warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) 00528 { 00529 va_list args; 00530 00531 va_start(args, msg); 00532 fprintf(stdout, "SAX.warning: "); 00533 vfprintf(stdout, msg, args); 00534 va_end(args); 00535 } 00536 00546 static void XMLCDECL 00547 errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) 00548 { 00549 va_list args; 00550 00551 va_start(args, msg); 00552 fprintf(stdout, "SAX.error: "); 00553 vfprintf(stdout, msg, args); 00554 va_end(args); 00555 } 00556 00566 static void XMLCDECL 00567 fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) 00568 { 00569 va_list args; 00570 00571 va_start(args, msg); 00572 fprintf(stdout, "SAX.fatalError: "); 00573 vfprintf(stdout, msg, args); 00574 va_end(args); 00575 } 00576 00577 static xmlSAXHandler debugSAXHandlerStruct = { 00578 internalSubsetDebug, 00579 isStandaloneDebug, 00580 hasInternalSubsetDebug, 00581 hasExternalSubsetDebug, 00582 resolveEntityDebug, 00583 getEntityDebug, 00584 entityDeclDebug, 00585 notationDeclDebug, 00586 attributeDeclDebug, 00587 elementDeclDebug, 00588 unparsedEntityDeclDebug, 00589 setDocumentLocatorDebug, 00590 startDocumentDebug, 00591 endDocumentDebug, 00592 startElementDebug, 00593 endElementDebug, 00594 referenceDebug, 00595 charactersDebug, 00596 ignorableWhitespaceDebug, 00597 processingInstructionDebug, 00598 commentDebug, 00599 warningDebug, 00600 errorDebug, 00601 fatalErrorDebug, 00602 getParameterEntityDebug, 00603 cdataDebug, 00604 NULL, 00605 1, 00606 NULL, 00607 NULL, 00608 NULL, 00609 NULL 00610 }; 00611 00612 xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; 00613 /************************************************************************ 00614 * * 00615 * Debug * 00616 * * 00617 ************************************************************************/ 00618 00619 static void 00620 parseSAXFile(char *filename) { 00621 htmlDocPtr doc = NULL; 00622 00623 /* 00624 * Empty callbacks for checking 00625 */ 00626 #ifdef LIBXML_PUSH_ENABLED 00627 if (push) { 00628 FILE *f; 00629 00630 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) 00631 f = fopen(filename, "rb"); 00632 #else 00633 f = fopen(filename, "r"); 00634 #endif 00635 if (f != NULL) { 00636 int res, size = 3; 00637 char chars[4096]; 00638 htmlParserCtxtPtr ctxt; 00639 00640 /* if (repeat) */ 00641 size = 4096; 00642 res = fread(chars, 1, 4, f); 00643 if (res > 0) { 00644 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL, 00645 chars, res, filename, XML_CHAR_ENCODING_NONE); 00646 while ((res = fread(chars, 1, size, f)) > 0) { 00647 htmlParseChunk(ctxt, chars, res, 0); 00648 } 00649 htmlParseChunk(ctxt, chars, 0, 1); 00650 doc = ctxt->myDoc; 00651 htmlFreeParserCtxt(ctxt); 00652 } 00653 if (doc != NULL) { 00654 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 00655 xmlFreeDoc(doc); 00656 } 00657 fclose(f); 00658 } 00659 if (!noout) { 00660 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) 00661 f = fopen(filename, "rb"); 00662 #else 00663 f = fopen(filename, "r"); 00664 #endif 00665 if (f != NULL) { 00666 int res, size = 3; 00667 char chars[4096]; 00668 htmlParserCtxtPtr ctxt; 00669 00670 /* if (repeat) */ 00671 size = 4096; 00672 res = fread(chars, 1, 4, f); 00673 if (res > 0) { 00674 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL, 00675 chars, res, filename, XML_CHAR_ENCODING_NONE); 00676 while ((res = fread(chars, 1, size, f)) > 0) { 00677 htmlParseChunk(ctxt, chars, res, 0); 00678 } 00679 htmlParseChunk(ctxt, chars, 0, 1); 00680 doc = ctxt->myDoc; 00681 htmlFreeParserCtxt(ctxt); 00682 } 00683 if (doc != NULL) { 00684 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 00685 xmlFreeDoc(doc); 00686 } 00687 fclose(f); 00688 } 00689 } 00690 } else { 00691 #endif /* LIBXML_PUSH_ENABLED */ 00692 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL); 00693 if (doc != NULL) { 00694 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 00695 xmlFreeDoc(doc); 00696 } 00697 00698 if (!noout) { 00699 /* 00700 * Debug callback 00701 */ 00702 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL); 00703 if (doc != NULL) { 00704 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 00705 xmlFreeDoc(doc); 00706 } 00707 } 00708 #ifdef LIBXML_PUSH_ENABLED 00709 } 00710 #endif /* LIBXML_PUSH_ENABLED */ 00711 } 00712 00713 static void 00714 parseAndPrintFile(char *filename) { 00715 htmlDocPtr doc = NULL; 00716 00717 /* 00718 * build an HTML tree from a string; 00719 */ 00720 #ifdef LIBXML_PUSH_ENABLED 00721 if (push) { 00722 FILE *f; 00723 00724 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) 00725 f = fopen(filename, "rb"); 00726 #else 00727 f = fopen(filename, "r"); 00728 #endif 00729 if (f != NULL) { 00730 int res, size = 3; 00731 char chars[4096]; 00732 htmlParserCtxtPtr ctxt; 00733 00734 /* if (repeat) */ 00735 size = 4096; 00736 res = fread(chars, 1, 4, f); 00737 if (res > 0) { 00738 ctxt = htmlCreatePushParserCtxt(NULL, NULL, 00739 chars, res, filename, XML_CHAR_ENCODING_NONE); 00740 while ((res = fread(chars, 1, size, f)) > 0) { 00741 htmlParseChunk(ctxt, chars, res, 0); 00742 } 00743 htmlParseChunk(ctxt, chars, 0, 1); 00744 doc = ctxt->myDoc; 00745 htmlFreeParserCtxt(ctxt); 00746 } 00747 fclose(f); 00748 } 00749 } else { 00750 doc = htmlReadFile(filename, NULL, options); 00751 } 00752 #else 00753 doc = htmlReadFile(filename,NULL,options); 00754 #endif 00755 if (doc == NULL) { 00756 xmlGenericError(xmlGenericErrorContext, 00757 "Could not parse %s\n", filename); 00758 } 00759 00760 #ifdef LIBXML_TREE_ENABLED 00761 /* 00762 * test intermediate copy if needed. 00763 */ 00764 if (copy) { 00765 htmlDocPtr tmp; 00766 00767 tmp = doc; 00768 doc = xmlCopyDoc(doc, 1); 00769 xmlFreeDoc(tmp); 00770 } 00771 #endif 00772 00773 #ifdef LIBXML_OUTPUT_ENABLED 00774 /* 00775 * print it. 00776 */ 00777 if (!noout) { 00778 #ifdef LIBXML_DEBUG_ENABLED 00779 if (!debug) { 00780 if (encoding) 00781 htmlSaveFileEnc("-", doc, encoding); 00782 else 00783 htmlDocDump(stdout, doc); 00784 } else 00785 xmlDebugDumpDocument(stdout, doc); 00786 #else 00787 if (encoding) 00788 htmlSaveFileEnc("-", doc, encoding); 00789 else 00790 htmlDocDump(stdout, doc); 00791 #endif 00792 } 00793 #endif /* LIBXML_OUTPUT_ENABLED */ 00794 00795 /* 00796 * free it. 00797 */ 00798 xmlFreeDoc(doc); 00799 } 00800 00801 int main(int argc, char **argv) { 00802 int i, count; 00803 int files = 0; 00804 00805 for (i = 1; i < argc ; i++) { 00806 #ifdef LIBXML_DEBUG_ENABLED 00807 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) 00808 debug++; 00809 else 00810 #endif 00811 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy"))) 00812 copy++; 00813 #ifdef LIBXML_PUSH_ENABLED 00814 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push"))) 00815 push++; 00816 #endif /* LIBXML_PUSH_ENABLED */ 00817 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax"))) 00818 sax++; 00819 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) 00820 noout++; 00821 else if ((!strcmp(argv[i], "-repeat")) || 00822 (!strcmp(argv[i], "--repeat"))) 00823 repeat++; 00824 else if ((!strcmp(argv[i], "-encode")) || 00825 (!strcmp(argv[i], "--encode"))) { 00826 i++; 00827 encoding = argv[i]; 00828 } 00829 } 00830 for (i = 1; i < argc ; i++) { 00831 if ((!strcmp(argv[i], "-encode")) || 00832 (!strcmp(argv[i], "--encode"))) { 00833 i++; 00834 continue; 00835 } 00836 if (argv[i][0] != '-') { 00837 if (repeat) { 00838 for (count = 0;count < 100 * repeat;count++) { 00839 if (sax) 00840 parseSAXFile(argv[i]); 00841 else 00842 parseAndPrintFile(argv[i]); 00843 } 00844 } else { 00845 if (sax) 00846 parseSAXFile(argv[i]); 00847 else 00848 parseAndPrintFile(argv[i]); 00849 } 00850 files ++; 00851 } 00852 } 00853 if (files == 0) { 00854 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n", 00855 argv[0]); 00856 printf("\tParse the HTML files and output the result of the parsing\n"); 00857 #ifdef LIBXML_DEBUG_ENABLED 00858 printf("\t--debug : dump a debug tree of the in-memory document\n"); 00859 #endif 00860 printf("\t--copy : used to test the internal copy implementation\n"); 00861 printf("\t--sax : debug the sequence of SAX callbacks\n"); 00862 printf("\t--repeat : parse the file 100 times, for timing\n"); 00863 printf("\t--noout : do not print the result\n"); 00864 #ifdef LIBXML_PUSH_ENABLED 00865 printf("\t--push : use the push mode parser\n"); 00866 #endif /* LIBXML_PUSH_ENABLED */ 00867 printf("\t--encode encoding : output in the given encoding\n"); 00868 } 00869 xmlCleanupParser(); 00870 xmlMemoryDump(); 00871 00872 return(0); 00873 } 00874 #else /* !LIBXML_HTML_ENABLED */ 00875 #include <stdio.h> 00876 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { 00877 printf("%s : HTML support not compiled in\n", argv[0]); 00878 return(0); 00879 } 00880 #endif Generated on Fri May 25 2012 04:33:10 for ReactOS by
1.7.6.1
|