Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenxmlstring.c
Go to the documentation of this file.
00001 /* 00002 * string.c : an XML string utilities module 00003 * 00004 * This module provides various utility functions for manipulating 00005 * the xmlChar* type. All functions named xmlStr* have been moved here 00006 * from the parser.c file (their original home). 00007 * 00008 * See Copyright for the status of this software. 00009 * 00010 * UTF8 string routines from: 00011 * William Brack <wbrack@mmm.com.hk> 00012 * 00013 * daniel@veillard.com 00014 */ 00015 00016 #define IN_LIBXML 00017 #include "libxml.h" 00018 00019 #include <stdlib.h> 00020 #include <string.h> 00021 #include <libxml/xmlmemory.h> 00022 #include <libxml/parserInternals.h> 00023 #include <libxml/xmlstring.h> 00024 00025 /************************************************************************ 00026 * * 00027 * Commodity functions to handle xmlChars * 00028 * * 00029 ************************************************************************/ 00030 00040 xmlChar * 00041 xmlStrndup(const xmlChar *cur, int len) { 00042 xmlChar *ret; 00043 00044 if ((cur == NULL) || (len < 0)) return(NULL); 00045 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 00046 if (ret == NULL) { 00047 xmlErrMemory(NULL, NULL); 00048 return(NULL); 00049 } 00050 memcpy(ret, cur, len * sizeof(xmlChar)); 00051 ret[len] = 0; 00052 return(ret); 00053 } 00054 00065 xmlChar * 00066 xmlStrdup(const xmlChar *cur) { 00067 const xmlChar *p = cur; 00068 00069 if (cur == NULL) return(NULL); 00070 while (*p != 0) p++; /* non input consuming */ 00071 return(xmlStrndup(cur, p - cur)); 00072 } 00073 00084 xmlChar * 00085 xmlCharStrndup(const char *cur, int len) { 00086 int i; 00087 xmlChar *ret; 00088 00089 if ((cur == NULL) || (len < 0)) return(NULL); 00090 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar)); 00091 if (ret == NULL) { 00092 xmlErrMemory(NULL, NULL); 00093 return(NULL); 00094 } 00095 for (i = 0;i < len;i++) { 00096 ret[i] = (xmlChar) cur[i]; 00097 if (ret[i] == 0) return(ret); 00098 } 00099 ret[len] = 0; 00100 return(ret); 00101 } 00102 00112 xmlChar * 00113 xmlCharStrdup(const char *cur) { 00114 const char *p = cur; 00115 00116 if (cur == NULL) return(NULL); 00117 while (*p != '\0') p++; /* non input consuming */ 00118 return(xmlCharStrndup(cur, p - cur)); 00119 } 00120 00131 int 00132 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 00133 register int tmp; 00134 00135 if (str1 == str2) return(0); 00136 if (str1 == NULL) return(-1); 00137 if (str2 == NULL) return(1); 00138 do { 00139 tmp = *str1++ - *str2; 00140 if (tmp != 0) return(tmp); 00141 } while (*str2++ != 0); 00142 return 0; 00143 } 00144 00156 int 00157 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { 00158 if (str1 == str2) return(1); 00159 if (str1 == NULL) return(0); 00160 if (str2 == NULL) return(0); 00161 do { 00162 if (*str1++ != *str2) return(0); 00163 } while (*str2++); 00164 return(1); 00165 } 00166 00178 int 00179 xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { 00180 if (pref == NULL) return(xmlStrEqual(name, str)); 00181 if (name == NULL) return(0); 00182 if (str == NULL) return(0); 00183 00184 do { 00185 if (*pref++ != *str) return(0); 00186 } while ((*str++) && (*pref)); 00187 if (*str++ != ':') return(0); 00188 do { 00189 if (*name++ != *str) return(0); 00190 } while (*str++); 00191 return(1); 00192 } 00193 00205 int 00206 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 00207 register int tmp; 00208 00209 if (len <= 0) return(0); 00210 if (str1 == str2) return(0); 00211 if (str1 == NULL) return(-1); 00212 if (str2 == NULL) return(1); 00213 #ifdef __GNUC__ 00214 tmp = strncmp((const char *)str1, (const char *)str2, len); 00215 return tmp; 00216 #else 00217 do { 00218 tmp = *str1++ - *str2; 00219 if (tmp != 0 || --len == 0) return(tmp); 00220 } while (*str2++ != 0); 00221 return 0; 00222 #endif 00223 } 00224 00225 static const xmlChar casemap[256] = { 00226 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 00227 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 00228 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 00229 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 00230 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 00231 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 00232 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 00233 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 00234 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 00235 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 00236 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 00237 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 00238 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 00239 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 00240 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 00241 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 00242 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 00243 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 00244 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 00245 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 00246 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 00247 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 00248 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 00249 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 00250 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 00251 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 00252 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 00253 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 00254 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 00255 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 00256 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 00257 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF 00258 }; 00259 00270 int 00271 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { 00272 register int tmp; 00273 00274 if (str1 == str2) return(0); 00275 if (str1 == NULL) return(-1); 00276 if (str2 == NULL) return(1); 00277 do { 00278 tmp = casemap[*str1++] - casemap[*str2]; 00279 if (tmp != 0) return(tmp); 00280 } while (*str2++ != 0); 00281 return 0; 00282 } 00283 00295 int 00296 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { 00297 register int tmp; 00298 00299 if (len <= 0) return(0); 00300 if (str1 == str2) return(0); 00301 if (str1 == NULL) return(-1); 00302 if (str2 == NULL) return(1); 00303 do { 00304 tmp = casemap[*str1++] - casemap[*str2]; 00305 if (tmp != 0 || --len == 0) return(tmp); 00306 } while (*str2++ != 0); 00307 return 0; 00308 } 00309 00320 const xmlChar * 00321 xmlStrchr(const xmlChar *str, xmlChar val) { 00322 if (str == NULL) return(NULL); 00323 while (*str != 0) { /* non input consuming */ 00324 if (*str == val) return((xmlChar *) str); 00325 str++; 00326 } 00327 return(NULL); 00328 } 00329 00340 const xmlChar * 00341 xmlStrstr(const xmlChar *str, const xmlChar *val) { 00342 int n; 00343 00344 if (str == NULL) return(NULL); 00345 if (val == NULL) return(NULL); 00346 n = xmlStrlen(val); 00347 00348 if (n == 0) return(str); 00349 while (*str != 0) { /* non input consuming */ 00350 if (*str == *val) { 00351 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 00352 } 00353 str++; 00354 } 00355 return(NULL); 00356 } 00357 00368 const xmlChar * 00369 xmlStrcasestr(const xmlChar *str, const xmlChar *val) { 00370 int n; 00371 00372 if (str == NULL) return(NULL); 00373 if (val == NULL) return(NULL); 00374 n = xmlStrlen(val); 00375 00376 if (n == 0) return(str); 00377 while (*str != 0) { /* non input consuming */ 00378 if (casemap[*str] == casemap[*val]) 00379 if (!xmlStrncasecmp(str, val, n)) return(str); 00380 str++; 00381 } 00382 return(NULL); 00383 } 00384 00396 xmlChar * 00397 xmlStrsub(const xmlChar *str, int start, int len) { 00398 int i; 00399 00400 if (str == NULL) return(NULL); 00401 if (start < 0) return(NULL); 00402 if (len < 0) return(NULL); 00403 00404 for (i = 0;i < start;i++) { 00405 if (*str == 0) return(NULL); 00406 str++; 00407 } 00408 if (*str == 0) return(NULL); 00409 return(xmlStrndup(str, len)); 00410 } 00411 00421 int 00422 xmlStrlen(const xmlChar *str) { 00423 int len = 0; 00424 00425 if (str == NULL) return(0); 00426 while (*str != 0) { /* non input consuming */ 00427 str++; 00428 len++; 00429 } 00430 return(len); 00431 } 00432 00447 xmlChar * 00448 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 00449 int size; 00450 xmlChar *ret; 00451 00452 if ((add == NULL) || (len == 0)) 00453 return(cur); 00454 if (len < 0) 00455 return(NULL); 00456 if (cur == NULL) 00457 return(xmlStrndup(add, len)); 00458 00459 size = xmlStrlen(cur); 00460 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 00461 if (ret == NULL) { 00462 xmlErrMemory(NULL, NULL); 00463 return(cur); 00464 } 00465 memcpy(&ret[size], add, len * sizeof(xmlChar)); 00466 ret[size + len] = 0; 00467 return(ret); 00468 } 00469 00482 xmlChar * 00483 xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) { 00484 int size; 00485 xmlChar *ret; 00486 00487 if (len < 0) 00488 len = xmlStrlen(str2); 00489 if ((str2 == NULL) || (len == 0)) 00490 return(xmlStrdup(str1)); 00491 if (str1 == NULL) 00492 return(xmlStrndup(str2, len)); 00493 00494 size = xmlStrlen(str1); 00495 ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar)); 00496 if (ret == NULL) { 00497 xmlErrMemory(NULL, NULL); 00498 return(xmlStrndup(str1, size)); 00499 } 00500 memcpy(ret, str1, size * sizeof(xmlChar)); 00501 memcpy(&ret[size], str2, len * sizeof(xmlChar)); 00502 ret[size + len] = 0; 00503 return(ret); 00504 } 00505 00517 xmlChar * 00518 xmlStrcat(xmlChar *cur, const xmlChar *add) { 00519 const xmlChar *p = add; 00520 00521 if (add == NULL) return(cur); 00522 if (cur == NULL) 00523 return(xmlStrdup(add)); 00524 00525 while (*p != 0) p++; /* non input consuming */ 00526 return(xmlStrncat(cur, add, p - add)); 00527 } 00528 00540 int XMLCDECL 00541 xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) { 00542 va_list args; 00543 int ret; 00544 00545 if((buf == NULL) || (msg == NULL)) { 00546 return(-1); 00547 } 00548 00549 va_start(args, msg); 00550 ret = vsnprintf((char *) buf, len, (const char *) msg, args); 00551 va_end(args); 00552 buf[len - 1] = 0; /* be safe ! */ 00553 00554 return(ret); 00555 } 00556 00568 int 00569 xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) { 00570 int ret; 00571 00572 if((buf == NULL) || (msg == NULL)) { 00573 return(-1); 00574 } 00575 00576 ret = vsnprintf((char *) buf, len, (const char *) msg, ap); 00577 buf[len - 1] = 0; /* be safe ! */ 00578 00579 return(ret); 00580 } 00581 00582 /************************************************************************ 00583 * * 00584 * Generic UTF8 handling routines * 00585 * * 00586 * From rfc2044: encoding of the Unicode values on UTF-8: * 00587 * * 00588 * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 00589 * 0000 0000-0000 007F 0xxxxxxx * 00590 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 00591 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * 00592 * * 00593 * I hope we won't use values > 0xFFFF anytime soon ! * 00594 * * 00595 ************************************************************************/ 00596 00597 00606 int 00607 xmlUTF8Size(const xmlChar *utf) { 00608 xmlChar mask; 00609 int len; 00610 00611 if (utf == NULL) 00612 return -1; 00613 if (*utf < 0x80) 00614 return 1; 00615 /* check valid UTF8 character */ 00616 if (!(*utf & 0x40)) 00617 return -1; 00618 /* determine number of bytes in char */ 00619 len = 2; 00620 for (mask=0x20; mask != 0; mask>>=1) { 00621 if (!(*utf & mask)) 00622 return len; 00623 len++; 00624 } 00625 return -1; 00626 } 00627 00637 int 00638 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) { 00639 00640 if (utf1 == NULL ) { 00641 if (utf2 == NULL) 00642 return 0; 00643 return -1; 00644 } 00645 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1)); 00646 } 00647 00657 int 00658 xmlUTF8Strlen(const xmlChar *utf) { 00659 int ret = 0; 00660 00661 if (utf == NULL) 00662 return(-1); 00663 00664 while (*utf != 0) { 00665 if (utf[0] & 0x80) { 00666 if ((utf[1] & 0xc0) != 0x80) 00667 return(-1); 00668 if ((utf[0] & 0xe0) == 0xe0) { 00669 if ((utf[2] & 0xc0) != 0x80) 00670 return(-1); 00671 if ((utf[0] & 0xf0) == 0xf0) { 00672 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) 00673 return(-1); 00674 utf += 4; 00675 } else { 00676 utf += 3; 00677 } 00678 } else { 00679 utf += 2; 00680 } 00681 } else { 00682 utf++; 00683 } 00684 ret++; 00685 } 00686 return(ret); 00687 } 00688 00701 int 00702 xmlGetUTF8Char(const unsigned char *utf, int *len) { 00703 unsigned int c; 00704 00705 if (utf == NULL) 00706 goto error; 00707 if (len == NULL) 00708 goto error; 00709 if (*len < 1) 00710 goto error; 00711 00712 c = utf[0]; 00713 if (c & 0x80) { 00714 if (*len < 2) 00715 goto error; 00716 if ((utf[1] & 0xc0) != 0x80) 00717 goto error; 00718 if ((c & 0xe0) == 0xe0) { 00719 if (*len < 3) 00720 goto error; 00721 if ((utf[2] & 0xc0) != 0x80) 00722 goto error; 00723 if ((c & 0xf0) == 0xf0) { 00724 if (*len < 4) 00725 goto error; 00726 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) 00727 goto error; 00728 *len = 4; 00729 /* 4-byte code */ 00730 c = (utf[0] & 0x7) << 18; 00731 c |= (utf[1] & 0x3f) << 12; 00732 c |= (utf[2] & 0x3f) << 6; 00733 c |= utf[3] & 0x3f; 00734 } else { 00735 /* 3-byte code */ 00736 *len = 3; 00737 c = (utf[0] & 0xf) << 12; 00738 c |= (utf[1] & 0x3f) << 6; 00739 c |= utf[2] & 0x3f; 00740 } 00741 } else { 00742 /* 2-byte code */ 00743 *len = 2; 00744 c = (utf[0] & 0x1f) << 6; 00745 c |= utf[1] & 0x3f; 00746 } 00747 } else { 00748 /* 1-byte code */ 00749 *len = 1; 00750 } 00751 return(c); 00752 00753 error: 00754 if (len != NULL) 00755 *len = 0; 00756 return(-1); 00757 } 00758 00772 int 00773 xmlCheckUTF8(const unsigned char *utf) 00774 { 00775 int ix; 00776 unsigned char c; 00777 00778 if (utf == NULL) 00779 return(0); 00780 /* 00781 * utf is a string of 1, 2, 3 or 4 bytes. The valid strings 00782 * are as follows (in "bit format"): 00783 * 0xxxxxxx valid 1-byte 00784 * 110xxxxx 10xxxxxx valid 2-byte 00785 * 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte 00786 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte 00787 */ 00788 for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */ 00789 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 00790 ix++; 00791 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 00792 if ((utf[ix+1] & 0xc0 ) != 0x80) 00793 return 0; 00794 ix += 2; 00795 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 00796 if (((utf[ix+1] & 0xc0) != 0x80) || 00797 ((utf[ix+2] & 0xc0) != 0x80)) 00798 return 0; 00799 ix += 3; 00800 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 00801 if (((utf[ix+1] & 0xc0) != 0x80) || 00802 ((utf[ix+2] & 0xc0) != 0x80) || 00803 ((utf[ix+3] & 0xc0) != 0x80)) 00804 return 0; 00805 ix += 4; 00806 } else /* unknown encoding */ 00807 return 0; 00808 } 00809 return(1); 00810 } 00811 00824 int 00825 xmlUTF8Strsize(const xmlChar *utf, int len) { 00826 const xmlChar *ptr=utf; 00827 xmlChar ch; 00828 00829 if (utf == NULL) 00830 return(0); 00831 00832 if (len <= 0) 00833 return(0); 00834 00835 while ( len-- > 0) { 00836 if ( !*ptr ) 00837 break; 00838 if ( (ch = *ptr++) & 0x80) 00839 while ((ch<<=1) & 0x80 ) { 00840 ptr++; 00841 if (*ptr == 0) break; 00842 } 00843 } 00844 return (ptr - utf); 00845 } 00846 00847 00857 xmlChar * 00858 xmlUTF8Strndup(const xmlChar *utf, int len) { 00859 xmlChar *ret; 00860 int i; 00861 00862 if ((utf == NULL) || (len < 0)) return(NULL); 00863 i = xmlUTF8Strsize(utf, len); 00864 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar)); 00865 if (ret == NULL) { 00866 xmlGenericError(xmlGenericErrorContext, 00867 "malloc of %ld byte failed\n", 00868 (len + 1) * (long)sizeof(xmlChar)); 00869 return(NULL); 00870 } 00871 memcpy(ret, utf, i * sizeof(xmlChar)); 00872 ret[i] = 0; 00873 return(ret); 00874 } 00875 00886 const xmlChar * 00887 xmlUTF8Strpos(const xmlChar *utf, int pos) { 00888 xmlChar ch; 00889 00890 if (utf == NULL) return(NULL); 00891 if (pos < 0) 00892 return(NULL); 00893 while (pos--) { 00894 if ((ch=*utf++) == 0) return(NULL); 00895 if ( ch & 0x80 ) { 00896 /* if not simple ascii, verify proper format */ 00897 if ( (ch & 0xc0) != 0xc0 ) 00898 return(NULL); 00899 /* then skip over remaining bytes for this char */ 00900 while ( (ch <<= 1) & 0x80 ) 00901 if ( (*utf++ & 0xc0) != 0x80 ) 00902 return(NULL); 00903 } 00904 } 00905 return((xmlChar *)utf); 00906 } 00907 00918 int 00919 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { 00920 int i, size; 00921 xmlChar ch; 00922 00923 if (utf==NULL || utfchar==NULL) return -1; 00924 size = xmlUTF8Strsize(utfchar, 1); 00925 for(i=0; (ch=*utf) != 0; i++) { 00926 if (xmlStrncmp(utf, utfchar, size)==0) 00927 return(i); 00928 utf++; 00929 if ( ch & 0x80 ) { 00930 /* if not simple ascii, verify proper format */ 00931 if ( (ch & 0xc0) != 0xc0 ) 00932 return(-1); 00933 /* then skip over remaining bytes for this char */ 00934 while ( (ch <<= 1) & 0x80 ) 00935 if ( (*utf++ & 0xc0) != 0x80 ) 00936 return(-1); 00937 } 00938 } 00939 00940 return(-1); 00941 } 00955 xmlChar * 00956 xmlUTF8Strsub(const xmlChar *utf, int start, int len) { 00957 int i; 00958 xmlChar ch; 00959 00960 if (utf == NULL) return(NULL); 00961 if (start < 0) return(NULL); 00962 if (len < 0) return(NULL); 00963 00964 /* 00965 * Skip over any leading chars 00966 */ 00967 for (i = 0;i < start;i++) { 00968 if ((ch=*utf++) == 0) return(NULL); 00969 if ( ch & 0x80 ) { 00970 /* if not simple ascii, verify proper format */ 00971 if ( (ch & 0xc0) != 0xc0 ) 00972 return(NULL); 00973 /* then skip over remaining bytes for this char */ 00974 while ( (ch <<= 1) & 0x80 ) 00975 if ( (*utf++ & 0xc0) != 0x80 ) 00976 return(NULL); 00977 } 00978 } 00979 00980 return(xmlUTF8Strndup(utf, len)); 00981 } 00982 00983 #define bottom_xmlstring 00984 #include "elfgcchack.h" Generated on Sun May 27 2012 04:35:04 for ReactOS by
1.7.6.1
|