Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenmbtowc.c
Go to the documentation of this file.
00001 /* 00002 * MultiByteToWideChar implementation 00003 * 00004 * Copyright 2000 Alexandre Julliard 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00019 */ 00020 00021 #include <string.h> 00022 00023 #include "wine/unicode.h" 00024 00025 /* get the decomposition of a Unicode char */ 00026 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen ) 00027 { 00028 extern const WCHAR unicode_decompose_table[]; 00029 const WCHAR *ptr = unicode_decompose_table; 00030 int res; 00031 00032 *dst = src; 00033 ptr = unicode_decompose_table + ptr[src >> 8]; 00034 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f); 00035 if (!*ptr) return 1; 00036 if (dstlen <= 1) return 0; 00037 /* apply the decomposition recursively to the first char */ 00038 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1]; 00039 return res; 00040 } 00041 00042 /* check the code whether it is in Unicode Private Use Area (PUA). */ 00043 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */ 00044 static inline int is_private_use_area_char(WCHAR code) 00045 { 00046 return (code >= 0xe000 && code <= 0xf8ff); 00047 } 00048 00049 /* check src string for invalid chars; return non-zero if invalid char found */ 00050 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags, 00051 const unsigned char *src, unsigned int srclen ) 00052 { 00053 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 00054 const WCHAR def_unicode_char = table->info.def_unicode_char; 00055 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 00056 + (def_unicode_char & 0xff)]; 00057 while (srclen) 00058 { 00059 if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 00060 is_private_use_area_char(cp2uni[*src])) break; 00061 src++; 00062 srclen--; 00063 } 00064 return srclen; 00065 } 00066 00067 /* mbstowcs for single-byte code page */ 00068 /* all lengths are in characters, not bytes */ 00069 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags, 00070 const unsigned char *src, unsigned int srclen, 00071 WCHAR *dst, unsigned int dstlen ) 00072 { 00073 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 00074 int ret = srclen; 00075 00076 if (dstlen < srclen) 00077 { 00078 /* buffer too small: fill it up to dstlen and return error */ 00079 srclen = dstlen; 00080 ret = -1; 00081 } 00082 00083 for (;;) 00084 { 00085 switch(srclen) 00086 { 00087 default: 00088 case 16: dst[15] = cp2uni[src[15]]; 00089 case 15: dst[14] = cp2uni[src[14]]; 00090 case 14: dst[13] = cp2uni[src[13]]; 00091 case 13: dst[12] = cp2uni[src[12]]; 00092 case 12: dst[11] = cp2uni[src[11]]; 00093 case 11: dst[10] = cp2uni[src[10]]; 00094 case 10: dst[9] = cp2uni[src[9]]; 00095 case 9: dst[8] = cp2uni[src[8]]; 00096 case 8: dst[7] = cp2uni[src[7]]; 00097 case 7: dst[6] = cp2uni[src[6]]; 00098 case 6: dst[5] = cp2uni[src[5]]; 00099 case 5: dst[4] = cp2uni[src[4]]; 00100 case 4: dst[3] = cp2uni[src[3]]; 00101 case 3: dst[2] = cp2uni[src[2]]; 00102 case 2: dst[1] = cp2uni[src[1]]; 00103 case 1: dst[0] = cp2uni[src[0]]; 00104 case 0: break; 00105 } 00106 if (srclen < 16) return ret; 00107 dst += 16; 00108 src += 16; 00109 srclen -= 16; 00110 } 00111 } 00112 00113 /* mbstowcs for single-byte code page with char decomposition */ 00114 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags, 00115 const unsigned char *src, unsigned int srclen, 00116 WCHAR *dst, unsigned int dstlen ) 00117 { 00118 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 00119 unsigned int len; 00120 00121 if (!dstlen) /* compute length */ 00122 { 00123 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 00124 for (len = 0; srclen; srclen--, src++) 00125 len += get_decomposition( cp2uni[*src], dummy, 4 ); 00126 return len; 00127 } 00128 00129 for (len = dstlen; srclen && len; srclen--, src++) 00130 { 00131 int res = get_decomposition( cp2uni[*src], dst, len ); 00132 if (!res) break; 00133 len -= res; 00134 dst += res; 00135 } 00136 if (srclen) return -1; /* overflow */ 00137 return dstlen - len; 00138 } 00139 00140 /* query necessary dst length for src string */ 00141 static inline int get_length_dbcs( const struct dbcs_table *table, 00142 const unsigned char *src, unsigned int srclen ) 00143 { 00144 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 00145 int len; 00146 00147 for (len = 0; srclen; srclen--, src++, len++) 00148 { 00149 if (cp2uni_lb[*src]) 00150 { 00151 if (!--srclen) break; /* partial char, ignore it */ 00152 src++; 00153 } 00154 } 00155 return len; 00156 } 00157 00158 /* check src string for invalid chars; return non-zero if invalid char found */ 00159 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table, 00160 const unsigned char *src, unsigned int srclen ) 00161 { 00162 const WCHAR * const cp2uni = table->cp2uni; 00163 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 00164 const WCHAR def_unicode_char = table->info.def_unicode_char; 00165 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 00166 + (def_unicode_char & 0xff)]; 00167 while (srclen) 00168 { 00169 unsigned char off = cp2uni_lb[*src]; 00170 if (off) /* multi-byte char */ 00171 { 00172 if (srclen == 1) break; /* partial char, error */ 00173 if (cp2uni[(off << 8) + src[1]] == def_unicode_char && 00174 ((src[0] << 8) | src[1]) != def_char) break; 00175 src++; 00176 srclen--; 00177 } 00178 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 00179 is_private_use_area_char(cp2uni[*src])) break; 00180 src++; 00181 srclen--; 00182 } 00183 return srclen; 00184 } 00185 00186 /* mbstowcs for double-byte code page */ 00187 /* all lengths are in characters, not bytes */ 00188 static inline int mbstowcs_dbcs( const struct dbcs_table *table, 00189 const unsigned char *src, unsigned int srclen, 00190 WCHAR *dst, unsigned int dstlen ) 00191 { 00192 const WCHAR * const cp2uni = table->cp2uni; 00193 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 00194 unsigned int len; 00195 00196 if (!dstlen) return get_length_dbcs( table, src, srclen ); 00197 00198 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++) 00199 { 00200 unsigned char off = cp2uni_lb[*src]; 00201 if (off) 00202 { 00203 if (!--srclen) break; /* partial char, ignore it */ 00204 src++; 00205 *dst = cp2uni[(off << 8) + *src]; 00206 } 00207 else *dst = cp2uni[*src]; 00208 } 00209 if (srclen) return -1; /* overflow */ 00210 return dstlen - len; 00211 } 00212 00213 00214 /* mbstowcs for double-byte code page with character decomposition */ 00215 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, 00216 const unsigned char *src, unsigned int srclen, 00217 WCHAR *dst, unsigned int dstlen ) 00218 { 00219 const WCHAR * const cp2uni = table->cp2uni; 00220 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 00221 unsigned int len; 00222 WCHAR ch; 00223 int res; 00224 00225 if (!dstlen) /* compute length */ 00226 { 00227 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 00228 for (len = 0; srclen; srclen--, src++) 00229 { 00230 unsigned char off = cp2uni_lb[*src]; 00231 if (off) 00232 { 00233 if (!--srclen) break; /* partial char, ignore it */ 00234 src++; 00235 ch = cp2uni[(off << 8) + *src]; 00236 } 00237 else ch = cp2uni[*src]; 00238 len += get_decomposition( ch, dummy, 4 ); 00239 } 00240 return len; 00241 } 00242 00243 for (len = dstlen; srclen && len; srclen--, src++) 00244 { 00245 unsigned char off = cp2uni_lb[*src]; 00246 if (off) 00247 { 00248 if (!--srclen) break; /* partial char, ignore it */ 00249 src++; 00250 ch = cp2uni[(off << 8) + *src]; 00251 } 00252 else ch = cp2uni[*src]; 00253 if (!(res = get_decomposition( ch, dst, len ))) break; 00254 dst += res; 00255 len -= res; 00256 } 00257 if (srclen) return -1; /* overflow */ 00258 return dstlen - len; 00259 } 00260 00261 00262 /* return -1 on dst buffer overflow, -2 on invalid input char */ 00263 int wine_cp_mbstowcs( const union cptable *table, int flags, 00264 const char *s, int srclen, 00265 WCHAR *dst, int dstlen ) 00266 { 00267 const unsigned char *src = (const unsigned char*) s; 00268 00269 if (table->info.char_size == 1) 00270 { 00271 if (flags & MB_ERR_INVALID_CHARS) 00272 { 00273 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2; 00274 } 00275 if (!(flags & MB_COMPOSITE)) 00276 { 00277 if (!dstlen) return srclen; 00278 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen ); 00279 } 00280 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen ); 00281 } 00282 else /* mbcs */ 00283 { 00284 if (flags & MB_ERR_INVALID_CHARS) 00285 { 00286 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2; 00287 } 00288 if (!(flags & MB_COMPOSITE)) 00289 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); 00290 else 00291 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen ); 00292 } 00293 } 00294 00295 /* CP_SYMBOL implementation */ 00296 /* return -1 on dst buffer overflow */ 00297 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen) 00298 { 00299 int len, i; 00300 if( dstlen == 0) return srclen; 00301 len = dstlen > srclen ? srclen : dstlen; 00302 for( i = 0; i < len; i++) 00303 { 00304 unsigned char c = src [ i ]; 00305 if( c < 0x20 ) 00306 dst[i] = c; 00307 else 00308 dst[i] = c + 0xf000; 00309 } 00310 if( srclen > len) return -1; 00311 return len; 00312 } Generated on Sat May 26 2012 04:35:35 for ReactOS by
1.7.6.1
|