Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenwctomb.c
Go to the documentation of this file.
00001 /* 00002 * WideCharToMultiByte implementation 00003 * 00004 * Copyright 2000 Alexandre Julliard 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00019 */ 00020 00021 #include <string.h> 00022 00023 #include "wine/unicode.h" 00024 00025 /* search for a character in the unicode_compose_table; helper for compose() */ 00026 static inline int binary_search( WCHAR ch, int low, int high ) 00027 { 00028 extern const WCHAR unicode_compose_table[]; 00029 while (low <= high) 00030 { 00031 int pos = (low + high) / 2; 00032 if (unicode_compose_table[2*pos] < ch) 00033 { 00034 low = pos + 1; 00035 continue; 00036 } 00037 if (unicode_compose_table[2*pos] > ch) 00038 { 00039 high = pos - 1; 00040 continue; 00041 } 00042 return pos; 00043 } 00044 return -1; 00045 } 00046 00047 /* return the result of the composition of two Unicode chars, or 0 if none */ 00048 WCHAR compose( const WCHAR *str ) 00049 { 00050 extern const WCHAR unicode_compose_table[]; 00051 extern const unsigned int unicode_compose_table_size; 00052 00053 int idx = 1, low = 0, high = unicode_compose_table_size - 1; 00054 for (;;) 00055 { 00056 int pos = binary_search( str[idx], low, high ); 00057 if (pos == -1) return 0; 00058 if (!idx--) return unicode_compose_table[2*pos+1]; 00059 low = unicode_compose_table[2*pos+1]; 00060 high = unicode_compose_table[2*pos+3] - 1; 00061 } 00062 } 00063 00064 00065 /****************************************************************/ 00066 /* sbcs support */ 00067 00068 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */ 00069 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags, 00070 WCHAR wch, unsigned char ch ) 00071 { 00072 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char) 00073 return (table->cp2uni[ch] == wch); 00074 return 1; 00075 } 00076 00077 /* query necessary dst length for src string */ 00078 static int get_length_sbcs( const struct sbcs_table *table, int flags, 00079 const WCHAR *src, unsigned int srclen, int *used ) 00080 { 00081 const unsigned char * const uni2cp_low = table->uni2cp_low; 00082 const unsigned short * const uni2cp_high = table->uni2cp_high; 00083 int ret, tmp; 00084 WCHAR composed; 00085 00086 if (!used) used = &tmp; /* avoid checking on every char */ 00087 *used = 0; 00088 00089 for (ret = 0; srclen; ret++, src++, srclen--) 00090 { 00091 WCHAR wch = *src; 00092 unsigned char ch; 00093 00094 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) 00095 { 00096 /* now check if we can use the composed char */ 00097 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 00098 if (is_valid_sbcs_mapping( table, flags, composed, ch )) 00099 { 00100 /* we have a good mapping, use it */ 00101 src++; 00102 srclen--; 00103 continue; 00104 } 00105 /* no mapping for the composed char, check the other flags */ 00106 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 00107 { 00108 *used = 1; 00109 src++; /* skip the non-spacing char */ 00110 srclen--; 00111 continue; 00112 } 00113 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 00114 { 00115 src++; 00116 srclen--; 00117 } 00118 /* WC_SEPCHARS is the default */ 00119 } 00120 if (!*used) 00121 { 00122 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 00123 *used = !is_valid_sbcs_mapping( table, flags, wch, ch ); 00124 } 00125 } 00126 return ret; 00127 } 00128 00129 /* wcstombs for single-byte code page */ 00130 static inline int wcstombs_sbcs( const struct sbcs_table *table, 00131 const WCHAR *src, unsigned int srclen, 00132 char *dst, unsigned int dstlen ) 00133 { 00134 const unsigned char * const uni2cp_low = table->uni2cp_low; 00135 const unsigned short * const uni2cp_high = table->uni2cp_high; 00136 int ret = srclen; 00137 00138 if (dstlen < srclen) 00139 { 00140 /* buffer too small: fill it up to dstlen and return error */ 00141 srclen = dstlen; 00142 ret = -1; 00143 } 00144 00145 while (srclen >= 16) 00146 { 00147 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)]; 00148 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)]; 00149 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)]; 00150 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)]; 00151 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)]; 00152 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)]; 00153 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)]; 00154 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)]; 00155 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)]; 00156 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)]; 00157 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)]; 00158 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)]; 00159 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)]; 00160 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)]; 00161 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)]; 00162 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)]; 00163 src += 16; 00164 dst += 16; 00165 srclen -= 16; 00166 } 00167 00168 /* now handle remaining characters */ 00169 src += srclen; 00170 dst += srclen; 00171 switch(srclen) 00172 { 00173 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)]; 00174 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)]; 00175 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)]; 00176 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)]; 00177 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)]; 00178 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)]; 00179 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)]; 00180 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)]; 00181 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)]; 00182 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)]; 00183 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)]; 00184 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)]; 00185 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)]; 00186 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)]; 00187 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)]; 00188 case 0: break; 00189 } 00190 return ret; 00191 } 00192 00193 /* slow version of wcstombs_sbcs that handles the various flags */ 00194 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags, 00195 const WCHAR *src, unsigned int srclen, 00196 char *dst, unsigned int dstlen, 00197 const char *defchar, int *used ) 00198 { 00199 const unsigned char * const uni2cp_low = table->uni2cp_low; 00200 const unsigned short * const uni2cp_high = table->uni2cp_high; 00201 unsigned char def; 00202 unsigned int len; 00203 int tmp; 00204 WCHAR composed; 00205 00206 if (!defchar) 00207 def = table->info.def_char & 0xff; 00208 else 00209 def = *defchar; 00210 00211 if (!used) used = &tmp; /* avoid checking on every char */ 00212 *used = 0; 00213 00214 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--) 00215 { 00216 WCHAR wch = *src; 00217 00218 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) 00219 { 00220 /* now check if we can use the composed char */ 00221 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 00222 if (is_valid_sbcs_mapping( table, flags, composed, *dst )) 00223 { 00224 /* we have a good mapping, use it */ 00225 src++; 00226 srclen--; 00227 continue; 00228 } 00229 /* no mapping for the composed char, check the other flags */ 00230 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 00231 { 00232 *dst = def; 00233 *used = 1; 00234 src++; /* skip the non-spacing char */ 00235 srclen--; 00236 continue; 00237 } 00238 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 00239 { 00240 src++; 00241 srclen--; 00242 } 00243 /* WC_SEPCHARS is the default */ 00244 } 00245 00246 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 00247 if (!is_valid_sbcs_mapping( table, flags, wch, *dst )) 00248 { 00249 *dst = def; 00250 *used = 1; 00251 } 00252 } 00253 if (srclen) return -1; /* overflow */ 00254 return dstlen - len; 00255 } 00256 00257 00258 /****************************************************************/ 00259 /* dbcs support */ 00260 00261 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */ 00262 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags, 00263 WCHAR wch, unsigned short ch ) 00264 { 00265 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char) 00266 { 00267 /* check if char maps back to the same Unicode value */ 00268 if (ch & 0xff00) 00269 { 00270 unsigned char off = table->cp2uni_leadbytes[ch >> 8]; 00271 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch); 00272 } 00273 return (table->cp2uni[ch & 0xff] == wch); 00274 } 00275 return 1; 00276 } 00277 00278 /* query necessary dst length for src string */ 00279 static int get_length_dbcs( const struct dbcs_table *table, int flags, 00280 const WCHAR *src, unsigned int srclen, 00281 const char *defchar, int *used ) 00282 { 00283 const unsigned short * const uni2cp_low = table->uni2cp_low; 00284 const unsigned short * const uni2cp_high = table->uni2cp_high; 00285 WCHAR defchar_value = table->info.def_char; 00286 WCHAR composed; 00287 int len, tmp; 00288 00289 if (!defchar && !used && !(flags & WC_COMPOSITECHECK)) 00290 { 00291 for (len = 0; srclen; srclen--, src++, len++) 00292 { 00293 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++; 00294 } 00295 return len; 00296 } 00297 00298 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0]; 00299 if (!used) used = &tmp; /* avoid checking on every char */ 00300 *used = 0; 00301 for (len = 0; srclen; len++, srclen--, src++) 00302 { 00303 unsigned short res; 00304 WCHAR wch = *src; 00305 00306 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) 00307 { 00308 /* now check if we can use the composed char */ 00309 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 00310 00311 if (is_valid_dbcs_mapping( table, flags, composed, res )) 00312 { 00313 /* we have a good mapping for the composed char, use it */ 00314 if (res & 0xff00) len++; 00315 src++; 00316 srclen--; 00317 continue; 00318 } 00319 /* no mapping for the composed char, check the other flags */ 00320 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 00321 { 00322 if (defchar_value & 0xff00) len++; 00323 *used = 1; 00324 src++; /* skip the non-spacing char */ 00325 srclen--; 00326 continue; 00327 } 00328 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 00329 { 00330 src++; 00331 srclen--; 00332 } 00333 /* WC_SEPCHARS is the default */ 00334 } 00335 00336 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 00337 if (!is_valid_dbcs_mapping( table, flags, wch, res )) 00338 { 00339 res = defchar_value; 00340 *used = 1; 00341 } 00342 if (res & 0xff00) len++; 00343 } 00344 return len; 00345 } 00346 00347 /* wcstombs for double-byte code page */ 00348 static inline int wcstombs_dbcs( const struct dbcs_table *table, 00349 const WCHAR *src, unsigned int srclen, 00350 char *dst, unsigned int dstlen ) 00351 { 00352 const unsigned short * const uni2cp_low = table->uni2cp_low; 00353 const unsigned short * const uni2cp_high = table->uni2cp_high; 00354 int len; 00355 00356 for (len = dstlen; srclen && len; len--, srclen--, src++) 00357 { 00358 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)]; 00359 if (res & 0xff00) 00360 { 00361 if (len == 1) break; /* do not output a partial char */ 00362 len--; 00363 *dst++ = res >> 8; 00364 } 00365 *dst++ = (char)res; 00366 } 00367 if (srclen) return -1; /* overflow */ 00368 return dstlen - len; 00369 } 00370 00371 /* slow version of wcstombs_dbcs that handles the various flags */ 00372 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags, 00373 const WCHAR *src, unsigned int srclen, 00374 char *dst, unsigned int dstlen, 00375 const char *defchar, int *used ) 00376 { 00377 const unsigned short * const uni2cp_low = table->uni2cp_low; 00378 const unsigned short * const uni2cp_high = table->uni2cp_high; 00379 WCHAR defchar_value = table->info.def_char; 00380 WCHAR composed; 00381 int len, tmp; 00382 00383 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0]; 00384 if (!used) used = &tmp; /* avoid checking on every char */ 00385 *used = 0; 00386 00387 for (len = dstlen; srclen && len; len--, srclen--, src++) 00388 { 00389 unsigned short res; 00390 WCHAR wch = *src; 00391 00392 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src))) 00393 { 00394 /* now check if we can use the composed char */ 00395 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 00396 00397 if (is_valid_dbcs_mapping( table, flags, composed, res )) 00398 { 00399 /* we have a good mapping for the composed char, use it */ 00400 src++; 00401 srclen--; 00402 goto output_char; 00403 } 00404 /* no mapping for the composed char, check the other flags */ 00405 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 00406 { 00407 res = defchar_value; 00408 *used = 1; 00409 src++; /* skip the non-spacing char */ 00410 srclen--; 00411 goto output_char; 00412 } 00413 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 00414 { 00415 src++; 00416 srclen--; 00417 } 00418 /* WC_SEPCHARS is the default */ 00419 } 00420 00421 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 00422 if (!is_valid_dbcs_mapping( table, flags, wch, res )) 00423 { 00424 res = defchar_value; 00425 *used = 1; 00426 } 00427 00428 output_char: 00429 if (res & 0xff00) 00430 { 00431 if (len == 1) break; /* do not output a partial char */ 00432 len--; 00433 *dst++ = res >> 8; 00434 } 00435 *dst++ = (char)res; 00436 } 00437 if (srclen) return -1; /* overflow */ 00438 return dstlen - len; 00439 } 00440 00441 /* wide char to multi byte string conversion */ 00442 /* return -1 on dst buffer overflow */ 00443 int wine_cp_wcstombs( const union cptable *table, int flags, 00444 const WCHAR *src, int srclen, 00445 char *dst, int dstlen, const char *defchar, int *used ) 00446 { 00447 if (table->info.char_size == 1) 00448 { 00449 if (flags || defchar || used) 00450 { 00451 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used ); 00452 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen, 00453 dst, dstlen, defchar, used ); 00454 } 00455 if (!dstlen) return srclen; 00456 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen ); 00457 } 00458 else /* mbcs */ 00459 { 00460 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used ); 00461 if (flags || defchar || used) 00462 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen, 00463 dst, dstlen, defchar, used ); 00464 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); 00465 } 00466 } 00467 00468 /* CP_SYMBOL implementation */ 00469 /* return -1 on dst buffer overflow, -2 on invalid character */ 00470 int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen) 00471 { 00472 int len, i; 00473 if( dstlen == 0) return srclen; 00474 len = dstlen > srclen ? srclen : dstlen; 00475 for( i = 0; i < len; i++) 00476 { 00477 WCHAR w = src [ i ]; 00478 if( w < 0x20 ) 00479 dst[i] = w; 00480 else if( w >= 0xf020 && w < 0xf100) 00481 dst[i] = w - 0xf000; 00482 else 00483 return -2; 00484 } 00485 if( srclen > len) return -1; 00486 return len; 00487 } Generated on Sun May 27 2012 04:37:50 for ReactOS by
1.7.6.1
|