ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

wctomb.c
Go to the documentation of this file.
00001 /*
00002  * WideCharToMultiByte implementation
00003  *
00004  * Copyright 2000 Alexandre Julliard
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00019  */
00020 
00021 #include <string.h>
00022 
00023 #include "wine/unicode.h"
00024 
00025 /* search for a character in the unicode_compose_table; helper for compose() */
00026 static inline int binary_search( WCHAR ch, int low, int high )
00027 {
00028     extern const WCHAR unicode_compose_table[];
00029     while (low <= high)
00030     {
00031         int pos = (low + high) / 2;
00032         if (unicode_compose_table[2*pos] < ch)
00033         {
00034             low = pos + 1;
00035             continue;
00036         }
00037         if (unicode_compose_table[2*pos] > ch)
00038         {
00039             high = pos - 1;
00040             continue;
00041         }
00042         return pos;
00043     }
00044     return -1;
00045 }
00046 
00047 /* return the result of the composition of two Unicode chars, or 0 if none */
00048 WCHAR compose( const WCHAR *str )
00049 {
00050     extern const WCHAR unicode_compose_table[];
00051     extern const unsigned int unicode_compose_table_size;
00052 
00053     int idx = 1, low = 0, high = unicode_compose_table_size - 1;
00054     for (;;)
00055     {
00056         int pos = binary_search( str[idx], low, high );
00057         if (pos == -1) return 0;
00058         if (!idx--) return unicode_compose_table[2*pos+1];
00059         low = unicode_compose_table[2*pos+1];
00060         high = unicode_compose_table[2*pos+3] - 1;
00061     }
00062 }
00063 
00064 
00065 /****************************************************************/
00066 /* sbcs support */
00067 
00068 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
00069 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
00070                                          WCHAR wch, unsigned char ch )
00071 {
00072     if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char)
00073         return (table->cp2uni[ch] == wch);
00074     return 1;
00075 }
00076 
00077 /* query necessary dst length for src string */
00078 static int get_length_sbcs( const struct sbcs_table *table, int flags,
00079                             const WCHAR *src, unsigned int srclen, int *used )
00080 {
00081     const unsigned char  * const uni2cp_low = table->uni2cp_low;
00082     const unsigned short * const uni2cp_high = table->uni2cp_high;
00083     int ret, tmp;
00084     WCHAR composed;
00085 
00086     if (!used) used = &tmp;  /* avoid checking on every char */
00087     *used = 0;
00088 
00089     for (ret = 0; srclen; ret++, src++, srclen--)
00090     {
00091         WCHAR wch = *src;
00092         unsigned char ch;
00093 
00094         if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
00095         {
00096             /* now check if we can use the composed char */
00097             ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
00098             if (is_valid_sbcs_mapping( table, flags, composed, ch ))
00099             {
00100                 /* we have a good mapping, use it */
00101                 src++;
00102                 srclen--;
00103                 continue;
00104             }
00105             /* no mapping for the composed char, check the other flags */
00106             if (flags & WC_DEFAULTCHAR) /* use the default char instead */
00107             {
00108                 *used = 1;
00109                 src++;  /* skip the non-spacing char */
00110                 srclen--;
00111                 continue;
00112             }
00113             if (flags & WC_DISCARDNS) /* skip the second char of the composition */
00114             {
00115                 src++;
00116                 srclen--;
00117             }
00118             /* WC_SEPCHARS is the default */
00119         }
00120         if (!*used)
00121         {
00122             ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
00123             *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
00124         }
00125     }
00126     return ret;
00127 }
00128 
00129 /* wcstombs for single-byte code page */
00130 static inline int wcstombs_sbcs( const struct sbcs_table *table,
00131                                  const WCHAR *src, unsigned int srclen,
00132                                  char *dst, unsigned int dstlen )
00133 {
00134     const unsigned char  * const uni2cp_low = table->uni2cp_low;
00135     const unsigned short * const uni2cp_high = table->uni2cp_high;
00136     int ret = srclen;
00137 
00138     if (dstlen < srclen)
00139     {
00140         /* buffer too small: fill it up to dstlen and return error */
00141         srclen = dstlen;
00142         ret = -1;
00143     }
00144 
00145     while (srclen >= 16)
00146     {
00147         dst[0]  = uni2cp_low[uni2cp_high[src[0]  >> 8] + (src[0]  & 0xff)];
00148         dst[1]  = uni2cp_low[uni2cp_high[src[1]  >> 8] + (src[1]  & 0xff)];
00149         dst[2]  = uni2cp_low[uni2cp_high[src[2]  >> 8] + (src[2]  & 0xff)];
00150         dst[3]  = uni2cp_low[uni2cp_high[src[3]  >> 8] + (src[3]  & 0xff)];
00151         dst[4]  = uni2cp_low[uni2cp_high[src[4]  >> 8] + (src[4]  & 0xff)];
00152         dst[5]  = uni2cp_low[uni2cp_high[src[5]  >> 8] + (src[5]  & 0xff)];
00153         dst[6]  = uni2cp_low[uni2cp_high[src[6]  >> 8] + (src[6]  & 0xff)];
00154         dst[7]  = uni2cp_low[uni2cp_high[src[7]  >> 8] + (src[7]  & 0xff)];
00155         dst[8]  = uni2cp_low[uni2cp_high[src[8]  >> 8] + (src[8]  & 0xff)];
00156         dst[9]  = uni2cp_low[uni2cp_high[src[9]  >> 8] + (src[9]  & 0xff)];
00157         dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
00158         dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
00159         dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
00160         dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
00161         dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
00162         dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
00163         src += 16;
00164         dst += 16;
00165         srclen -= 16;
00166     }
00167 
00168     /* now handle remaining characters */
00169     src += srclen;
00170     dst += srclen;
00171     switch(srclen)
00172     {
00173     case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
00174     case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
00175     case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
00176     case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
00177     case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
00178     case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
00179     case 9:  dst[-9]  = uni2cp_low[uni2cp_high[src[-9]  >> 8] + (src[-9]  & 0xff)];
00180     case 8:  dst[-8]  = uni2cp_low[uni2cp_high[src[-8]  >> 8] + (src[-8]  & 0xff)];
00181     case 7:  dst[-7]  = uni2cp_low[uni2cp_high[src[-7]  >> 8] + (src[-7]  & 0xff)];
00182     case 6:  dst[-6]  = uni2cp_low[uni2cp_high[src[-6]  >> 8] + (src[-6]  & 0xff)];
00183     case 5:  dst[-5]  = uni2cp_low[uni2cp_high[src[-5]  >> 8] + (src[-5]  & 0xff)];
00184     case 4:  dst[-4]  = uni2cp_low[uni2cp_high[src[-4]  >> 8] + (src[-4]  & 0xff)];
00185     case 3:  dst[-3]  = uni2cp_low[uni2cp_high[src[-3]  >> 8] + (src[-3]  & 0xff)];
00186     case 2:  dst[-2]  = uni2cp_low[uni2cp_high[src[-2]  >> 8] + (src[-2]  & 0xff)];
00187     case 1:  dst[-1]  = uni2cp_low[uni2cp_high[src[-1]  >> 8] + (src[-1]  & 0xff)];
00188     case 0: break;
00189     }
00190     return ret;
00191 }
00192 
00193 /* slow version of wcstombs_sbcs that handles the various flags */
00194 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
00195                                const WCHAR *src, unsigned int srclen,
00196                                char *dst, unsigned int dstlen,
00197                                const char *defchar, int *used )
00198 {
00199     const unsigned char  * const uni2cp_low = table->uni2cp_low;
00200     const unsigned short * const uni2cp_high = table->uni2cp_high;
00201     unsigned char def;
00202     unsigned int len;
00203     int tmp;
00204     WCHAR composed;
00205 
00206     if (!defchar)
00207         def = table->info.def_char & 0xff;
00208     else
00209         def = *defchar;
00210 
00211     if (!used) used = &tmp;  /* avoid checking on every char */
00212     *used = 0;
00213 
00214     for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
00215     {
00216         WCHAR wch = *src;
00217 
00218         if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
00219         {
00220             /* now check if we can use the composed char */
00221             *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
00222             if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
00223             {
00224                 /* we have a good mapping, use it */
00225                 src++;
00226                 srclen--;
00227                 continue;
00228             }
00229             /* no mapping for the composed char, check the other flags */
00230             if (flags & WC_DEFAULTCHAR) /* use the default char instead */
00231             {
00232                 *dst = def;
00233                 *used = 1;
00234                 src++;  /* skip the non-spacing char */
00235                 srclen--;
00236                 continue;
00237             }
00238             if (flags & WC_DISCARDNS) /* skip the second char of the composition */
00239             {
00240                 src++;
00241                 srclen--;
00242             }
00243             /* WC_SEPCHARS is the default */
00244         }
00245 
00246         *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
00247         if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
00248         {
00249             *dst = def;
00250             *used = 1;
00251         }
00252     }
00253     if (srclen) return -1;  /* overflow */
00254     return dstlen - len;
00255 }
00256 
00257 
00258 /****************************************************************/
00259 /* dbcs support */
00260 
00261 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
00262 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
00263                                          WCHAR wch, unsigned short ch )
00264 {
00265     if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char)
00266     {
00267         /* check if char maps back to the same Unicode value */
00268         if (ch & 0xff00)
00269         {
00270             unsigned char off = table->cp2uni_leadbytes[ch >> 8];
00271             return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
00272         }
00273         return (table->cp2uni[ch & 0xff] == wch);
00274     }
00275     return 1;
00276 }
00277 
00278 /* query necessary dst length for src string */
00279 static int get_length_dbcs( const struct dbcs_table *table, int flags,
00280                             const WCHAR *src, unsigned int srclen,
00281                             const char *defchar, int *used )
00282 {
00283     const unsigned short * const uni2cp_low = table->uni2cp_low;
00284     const unsigned short * const uni2cp_high = table->uni2cp_high;
00285     WCHAR defchar_value = table->info.def_char;
00286     WCHAR composed;
00287     int len, tmp;
00288 
00289     if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
00290     {
00291         for (len = 0; srclen; srclen--, src++, len++)
00292         {
00293             if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
00294         }
00295         return len;
00296     }
00297 
00298     if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
00299     if (!used) used = &tmp;  /* avoid checking on every char */
00300     *used = 0;
00301     for (len = 0; srclen; len++, srclen--, src++)
00302     {
00303         unsigned short res;
00304         WCHAR wch = *src;
00305 
00306         if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
00307         {
00308             /* now check if we can use the composed char */
00309             res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
00310 
00311             if (is_valid_dbcs_mapping( table, flags, composed, res ))
00312             {
00313                 /* we have a good mapping for the composed char, use it */
00314                 if (res & 0xff00) len++;
00315                 src++;
00316                 srclen--;
00317                 continue;
00318             }
00319             /* no mapping for the composed char, check the other flags */
00320             if (flags & WC_DEFAULTCHAR) /* use the default char instead */
00321             {
00322                 if (defchar_value & 0xff00) len++;
00323                 *used = 1;
00324                 src++;  /* skip the non-spacing char */
00325                 srclen--;
00326                 continue;
00327             }
00328             if (flags & WC_DISCARDNS) /* skip the second char of the composition */
00329             {
00330                 src++;
00331                 srclen--;
00332             }
00333             /* WC_SEPCHARS is the default */
00334         }
00335 
00336         res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
00337         if (!is_valid_dbcs_mapping( table, flags, wch, res ))
00338         {
00339             res = defchar_value;
00340             *used = 1;
00341         }
00342         if (res & 0xff00) len++;
00343     }
00344     return len;
00345 }
00346 
00347 /* wcstombs for double-byte code page */
00348 static inline int wcstombs_dbcs( const struct dbcs_table *table,
00349                                  const WCHAR *src, unsigned int srclen,
00350                                  char *dst, unsigned int dstlen )
00351 {
00352     const unsigned short * const uni2cp_low = table->uni2cp_low;
00353     const unsigned short * const uni2cp_high = table->uni2cp_high;
00354     int len;
00355 
00356     for (len = dstlen; srclen && len; len--, srclen--, src++)
00357     {
00358         unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
00359         if (res & 0xff00)
00360         {
00361             if (len == 1) break;  /* do not output a partial char */
00362             len--;
00363             *dst++ = res >> 8;
00364         }
00365         *dst++ = (char)res;
00366     }
00367     if (srclen) return -1;  /* overflow */
00368     return dstlen - len;
00369 }
00370 
00371 /* slow version of wcstombs_dbcs that handles the various flags */
00372 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
00373                                const WCHAR *src, unsigned int srclen,
00374                                char *dst, unsigned int dstlen,
00375                                const char *defchar, int *used )
00376 {
00377     const unsigned short * const uni2cp_low = table->uni2cp_low;
00378     const unsigned short * const uni2cp_high = table->uni2cp_high;
00379     WCHAR defchar_value = table->info.def_char;
00380     WCHAR composed;
00381     int len, tmp;
00382 
00383     if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
00384     if (!used) used = &tmp;  /* avoid checking on every char */
00385     *used = 0;
00386 
00387     for (len = dstlen; srclen && len; len--, srclen--, src++)
00388     {
00389         unsigned short res;
00390         WCHAR wch = *src;
00391 
00392         if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
00393         {
00394             /* now check if we can use the composed char */
00395             res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
00396 
00397             if (is_valid_dbcs_mapping( table, flags, composed, res ))
00398             {
00399                 /* we have a good mapping for the composed char, use it */
00400                 src++;
00401                 srclen--;
00402                 goto output_char;
00403             }
00404             /* no mapping for the composed char, check the other flags */
00405             if (flags & WC_DEFAULTCHAR) /* use the default char instead */
00406             {
00407                 res = defchar_value;
00408                 *used = 1;
00409                 src++;  /* skip the non-spacing char */
00410                 srclen--;
00411                 goto output_char;
00412             }
00413             if (flags & WC_DISCARDNS) /* skip the second char of the composition */
00414             {
00415                 src++;
00416                 srclen--;
00417             }
00418             /* WC_SEPCHARS is the default */
00419         }
00420 
00421         res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
00422         if (!is_valid_dbcs_mapping( table, flags, wch, res ))
00423         {
00424             res = defchar_value;
00425             *used = 1;
00426         }
00427 
00428     output_char:
00429         if (res & 0xff00)
00430         {
00431             if (len == 1) break;  /* do not output a partial char */
00432             len--;
00433             *dst++ = res >> 8;
00434         }
00435         *dst++ = (char)res;
00436     }
00437     if (srclen) return -1;  /* overflow */
00438     return dstlen - len;
00439 }
00440 
00441 /* wide char to multi byte string conversion */
00442 /* return -1 on dst buffer overflow */
00443 int wine_cp_wcstombs( const union cptable *table, int flags,
00444                       const WCHAR *src, int srclen,
00445                       char *dst, int dstlen, const char *defchar, int *used )
00446 {
00447     if (table->info.char_size == 1)
00448     {
00449         if (flags || defchar || used)
00450         {
00451             if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
00452             return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
00453                                        dst, dstlen, defchar, used );
00454         }
00455         if (!dstlen) return srclen;
00456         return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
00457     }
00458     else /* mbcs */
00459     {
00460         if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
00461         if (flags || defchar || used)
00462             return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
00463                                        dst, dstlen, defchar, used );
00464         return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
00465     }
00466 }
00467 
00468 /* CP_SYMBOL implementation */
00469 /* return -1 on dst buffer overflow, -2 on invalid character */
00470 int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen)
00471 {
00472     int len, i;
00473     if( dstlen == 0) return srclen;
00474     len = dstlen > srclen ? srclen : dstlen;
00475     for( i = 0; i < len; i++)
00476     {
00477         WCHAR w = src [ i ];
00478         if( w < 0x20 )
00479             dst[i] = w;
00480         else if( w >= 0xf020 && w < 0xf100)
00481             dst[i] = w - 0xf000;
00482         else
00483             return -2;
00484     }
00485     if( srclen > len) return -1;
00486     return len;
00487 }

Generated on Sun May 27 2012 04:37:50 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.