ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

mbtowc.c
Go to the documentation of this file.
00001 /*
00002  * MultiByteToWideChar implementation
00003  *
00004  * Copyright 2000 Alexandre Julliard
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00019  */
00020 
00021 #include <string.h>
00022 
00023 #include "wine/unicode.h"
00024 
00025 /* get the decomposition of a Unicode char */
00026 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
00027 {
00028     extern const WCHAR unicode_decompose_table[];
00029     const WCHAR *ptr = unicode_decompose_table;
00030     int res;
00031 
00032     *dst = src;
00033     ptr = unicode_decompose_table + ptr[src >> 8];
00034     ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
00035     if (!*ptr) return 1;
00036     if (dstlen <= 1) return 0;
00037     /* apply the decomposition recursively to the first char */
00038     if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
00039     return res;
00040 }
00041 
00042 /* check the code whether it is in Unicode Private Use Area (PUA). */
00043 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
00044 static inline int is_private_use_area_char(WCHAR code)
00045 {
00046     return (code >= 0xe000 && code <= 0xf8ff);
00047 }
00048 
00049 /* check src string for invalid chars; return non-zero if invalid char found */
00050 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
00051                                             const unsigned char *src, unsigned int srclen )
00052 {
00053     const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
00054     const WCHAR def_unicode_char = table->info.def_unicode_char;
00055     const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
00056                                                      + (def_unicode_char & 0xff)];
00057     while (srclen)
00058     {
00059         if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
00060             is_private_use_area_char(cp2uni[*src])) break;
00061         src++;
00062         srclen--;
00063     }
00064     return srclen;
00065 }
00066 
00067 /* mbstowcs for single-byte code page */
00068 /* all lengths are in characters, not bytes */
00069 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
00070                                  const unsigned char *src, unsigned int srclen,
00071                                  WCHAR *dst, unsigned int dstlen )
00072 {
00073     const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
00074     int ret = srclen;
00075 
00076     if (dstlen < srclen)
00077     {
00078         /* buffer too small: fill it up to dstlen and return error */
00079         srclen = dstlen;
00080         ret = -1;
00081     }
00082 
00083     for (;;)
00084     {
00085         switch(srclen)
00086         {
00087         default:
00088         case 16: dst[15] = cp2uni[src[15]];
00089         case 15: dst[14] = cp2uni[src[14]];
00090         case 14: dst[13] = cp2uni[src[13]];
00091         case 13: dst[12] = cp2uni[src[12]];
00092         case 12: dst[11] = cp2uni[src[11]];
00093         case 11: dst[10] = cp2uni[src[10]];
00094         case 10: dst[9]  = cp2uni[src[9]];
00095         case 9:  dst[8]  = cp2uni[src[8]];
00096         case 8:  dst[7]  = cp2uni[src[7]];
00097         case 7:  dst[6]  = cp2uni[src[6]];
00098         case 6:  dst[5]  = cp2uni[src[5]];
00099         case 5:  dst[4]  = cp2uni[src[4]];
00100         case 4:  dst[3]  = cp2uni[src[3]];
00101         case 3:  dst[2]  = cp2uni[src[2]];
00102         case 2:  dst[1]  = cp2uni[src[1]];
00103         case 1:  dst[0]  = cp2uni[src[0]];
00104         case 0: break;
00105         }
00106         if (srclen < 16) return ret;
00107         dst += 16;
00108         src += 16;
00109         srclen -= 16;
00110     }
00111 }
00112 
00113 /* mbstowcs for single-byte code page with char decomposition */
00114 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
00115                                     const unsigned char *src, unsigned int srclen,
00116                                     WCHAR *dst, unsigned int dstlen )
00117 {
00118     const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
00119     unsigned int len;
00120 
00121     if (!dstlen)  /* compute length */
00122     {
00123         WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
00124         for (len = 0; srclen; srclen--, src++)
00125             len += get_decomposition( cp2uni[*src], dummy, 4 );
00126         return len;
00127     }
00128 
00129     for (len = dstlen; srclen && len; srclen--, src++)
00130     {
00131         int res = get_decomposition( cp2uni[*src], dst, len );
00132         if (!res) break;
00133         len -= res;
00134         dst += res;
00135     }
00136     if (srclen) return -1;  /* overflow */
00137     return dstlen - len;
00138 }
00139 
00140 /* query necessary dst length for src string */
00141 static inline int get_length_dbcs( const struct dbcs_table *table,
00142                                    const unsigned char *src, unsigned int srclen )
00143 {
00144     const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
00145     int len;
00146 
00147     for (len = 0; srclen; srclen--, src++, len++)
00148     {
00149         if (cp2uni_lb[*src])
00150         {
00151             if (!--srclen) break;  /* partial char, ignore it */
00152             src++;
00153         }
00154     }
00155     return len;
00156 }
00157 
00158 /* check src string for invalid chars; return non-zero if invalid char found */
00159 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
00160                                             const unsigned char *src, unsigned int srclen )
00161 {
00162     const WCHAR * const cp2uni = table->cp2uni;
00163     const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
00164     const WCHAR def_unicode_char = table->info.def_unicode_char;
00165     const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
00166                                                       + (def_unicode_char & 0xff)];
00167     while (srclen)
00168     {
00169         unsigned char off = cp2uni_lb[*src];
00170         if (off)  /* multi-byte char */
00171         {
00172             if (srclen == 1) break;  /* partial char, error */
00173             if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
00174                 ((src[0] << 8) | src[1]) != def_char) break;
00175             src++;
00176             srclen--;
00177         }
00178         else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
00179                  is_private_use_area_char(cp2uni[*src])) break;
00180         src++;
00181         srclen--;
00182     }
00183     return srclen;
00184 }
00185 
00186 /* mbstowcs for double-byte code page */
00187 /* all lengths are in characters, not bytes */
00188 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
00189                                  const unsigned char *src, unsigned int srclen,
00190                                  WCHAR *dst, unsigned int dstlen )
00191 {
00192     const WCHAR * const cp2uni = table->cp2uni;
00193     const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
00194     unsigned int len;
00195 
00196     if (!dstlen) return get_length_dbcs( table, src, srclen );
00197 
00198     for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
00199     {
00200         unsigned char off = cp2uni_lb[*src];
00201         if (off)
00202         {
00203             if (!--srclen) break;  /* partial char, ignore it */
00204             src++;
00205             *dst = cp2uni[(off << 8) + *src];
00206         }
00207         else *dst = cp2uni[*src];
00208     }
00209     if (srclen) return -1;  /* overflow */
00210     return dstlen - len;
00211 }
00212 
00213 
00214 /* mbstowcs for double-byte code page with character decomposition */
00215 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
00216                                     const unsigned char *src, unsigned int srclen,
00217                                     WCHAR *dst, unsigned int dstlen )
00218 {
00219     const WCHAR * const cp2uni = table->cp2uni;
00220     const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
00221     unsigned int len;
00222     WCHAR ch;
00223     int res;
00224 
00225     if (!dstlen)  /* compute length */
00226     {
00227         WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
00228         for (len = 0; srclen; srclen--, src++)
00229         {
00230             unsigned char off = cp2uni_lb[*src];
00231             if (off)
00232             {
00233                 if (!--srclen) break;  /* partial char, ignore it */
00234                 src++;
00235                 ch = cp2uni[(off << 8) + *src];
00236             }
00237             else ch = cp2uni[*src];
00238             len += get_decomposition( ch, dummy, 4 );
00239         }
00240         return len;
00241     }
00242 
00243     for (len = dstlen; srclen && len; srclen--, src++)
00244     {
00245         unsigned char off = cp2uni_lb[*src];
00246         if (off)
00247         {
00248             if (!--srclen) break;  /* partial char, ignore it */
00249             src++;
00250             ch = cp2uni[(off << 8) + *src];
00251         }
00252         else ch = cp2uni[*src];
00253         if (!(res = get_decomposition( ch, dst, len ))) break;
00254         dst += res;
00255         len -= res;
00256     }
00257     if (srclen) return -1;  /* overflow */
00258     return dstlen - len;
00259 }
00260 
00261 
00262 /* return -1 on dst buffer overflow, -2 on invalid input char */
00263 int wine_cp_mbstowcs( const union cptable *table, int flags,
00264                       const char *s, int srclen,
00265                       WCHAR *dst, int dstlen )
00266 {
00267     const unsigned char *src = (const unsigned char*) s;
00268 
00269     if (table->info.char_size == 1)
00270     {
00271         if (flags & MB_ERR_INVALID_CHARS)
00272         {
00273             if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
00274         }
00275         if (!(flags & MB_COMPOSITE))
00276         {
00277             if (!dstlen) return srclen;
00278             return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
00279         }
00280         return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
00281     }
00282     else /* mbcs */
00283     {
00284         if (flags & MB_ERR_INVALID_CHARS)
00285         {
00286             if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
00287         }
00288         if (!(flags & MB_COMPOSITE))
00289             return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
00290         else
00291             return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
00292     }
00293 }
00294 
00295 /* CP_SYMBOL implementation */
00296 /* return -1 on dst buffer overflow */
00297 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
00298 {
00299     int len, i;
00300     if( dstlen == 0) return srclen;
00301     len = dstlen > srclen ? srclen : dstlen;
00302     for( i = 0; i < len; i++)
00303     {
00304         unsigned char c = src [ i ];
00305         if( c < 0x20 )
00306             dst[i] = c;
00307         else
00308             dst[i] = c + 0xf000;
00309     }
00310     if( srclen > len) return -1;
00311     return len;
00312 }

Generated on Sat May 26 2012 04:35:35 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.