ReactOS  0.4.15-dev-489-g75a0787
mbtowc.c
Go to the documentation of this file.
1 /*
2  * MultiByteToWideChar implementation
3  *
4  * Copyright 2000 Alexandre Julliard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #include <string.h>
22 
23 #include "wine/unicode.h"
24 
25 extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
26 
27 /* check the code whether it is in Unicode Private Use Area (PUA). */
28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
30 {
31  return (code >= 0xe000 && code <= 0xf8ff);
32 }
33 
34 /* check src string for invalid chars; return non-zero if invalid char found */
35 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
36  const unsigned char *src, unsigned int srclen )
37 {
38  const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
39  const WCHAR def_unicode_char = table->info.def_unicode_char;
40  const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
41  + (def_unicode_char & 0xff)];
42  while (srclen)
43  {
44  if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
46  src++;
47  srclen--;
48  }
49  return srclen;
50 }
51 
52 /* mbstowcs for single-byte code page */
53 /* all lengths are in characters, not bytes */
54 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
55  const unsigned char *src, unsigned int srclen,
56  WCHAR *dst, unsigned int dstlen )
57 {
58  const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
59  int ret = srclen;
60 
61  if (dstlen < srclen)
62  {
63  /* buffer too small: fill it up to dstlen and return error */
64  srclen = dstlen;
65  ret = -1;
66  }
67 
68  while (srclen >= 16)
69  {
70  dst[0] = cp2uni[src[0]];
71  dst[1] = cp2uni[src[1]];
72  dst[2] = cp2uni[src[2]];
73  dst[3] = cp2uni[src[3]];
74  dst[4] = cp2uni[src[4]];
75  dst[5] = cp2uni[src[5]];
76  dst[6] = cp2uni[src[6]];
77  dst[7] = cp2uni[src[7]];
78  dst[8] = cp2uni[src[8]];
79  dst[9] = cp2uni[src[9]];
80  dst[10] = cp2uni[src[10]];
81  dst[11] = cp2uni[src[11]];
82  dst[12] = cp2uni[src[12]];
83  dst[13] = cp2uni[src[13]];
84  dst[14] = cp2uni[src[14]];
85  dst[15] = cp2uni[src[15]];
86  src += 16;
87  dst += 16;
88  srclen -= 16;
89  }
90 
91  /* now handle the remaining characters */
92  src += srclen;
93  dst += srclen;
94  switch (srclen)
95  {
96  case 15: dst[-15] = cp2uni[src[-15]];
97  case 14: dst[-14] = cp2uni[src[-14]];
98  case 13: dst[-13] = cp2uni[src[-13]];
99  case 12: dst[-12] = cp2uni[src[-12]];
100  case 11: dst[-11] = cp2uni[src[-11]];
101  case 10: dst[-10] = cp2uni[src[-10]];
102  case 9: dst[-9] = cp2uni[src[-9]];
103  case 8: dst[-8] = cp2uni[src[-8]];
104  case 7: dst[-7] = cp2uni[src[-7]];
105  case 6: dst[-6] = cp2uni[src[-6]];
106  case 5: dst[-5] = cp2uni[src[-5]];
107  case 4: dst[-4] = cp2uni[src[-4]];
108  case 3: dst[-3] = cp2uni[src[-3]];
109  case 2: dst[-2] = cp2uni[src[-2]];
110  case 1: dst[-1] = cp2uni[src[-1]];
111  case 0: break;
112  }
113  return ret;
114 }
115 
116 /* mbstowcs for single-byte code page with char decomposition */
117 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
118  const unsigned char *src, unsigned int srclen,
119  WCHAR *dst, unsigned int dstlen )
120 {
121  const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
122  unsigned int len;
123 
124  if (!dstlen) /* compute length */
125  {
126  WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
127  for (len = 0; srclen; srclen--, src++)
128  len += wine_decompose( 0, cp2uni[*src], dummy, 4 );
129  return len;
130  }
131 
132  for (len = dstlen; srclen && len; srclen--, src++)
133  {
134  unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len );
135  if (!res) break;
136  len -= res;
137  dst += res;
138  }
139  if (srclen) return -1; /* overflow */
140  return dstlen - len;
141 }
142 
143 /* query necessary dst length for src string */
144 static inline int get_length_dbcs( const struct dbcs_table *table,
145  const unsigned char *src, unsigned int srclen )
146 {
147  const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
148  int len;
149 
150  for (len = 0; srclen; srclen--, src++, len++)
151  {
152  if (cp2uni_lb[*src] && srclen > 1 && src[1])
153  {
154  src++;
155  srclen--;
156  }
157  }
158  return len;
159 }
160 
161 /* check src string for invalid chars; return non-zero if invalid char found */
162 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
163  const unsigned char *src, unsigned int srclen )
164 {
165  const WCHAR * const cp2uni = table->cp2uni;
166  const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
167  const WCHAR def_unicode_char = table->info.def_unicode_char;
168  const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
169  + (def_unicode_char & 0xff)];
170  while (srclen)
171  {
172  unsigned char off = cp2uni_lb[*src];
173  if (off) /* multi-byte char */
174  {
175  if (srclen == 1) break; /* partial char, error */
176  if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
177  ((src[0] << 8) | src[1]) != def_char) break;
178  src++;
179  srclen--;
180  }
181  else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
183  src++;
184  srclen--;
185  }
186  return srclen;
187 }
188 
189 /* mbstowcs for double-byte code page */
190 /* all lengths are in characters, not bytes */
191 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
192  const unsigned char *src, unsigned int srclen,
193  WCHAR *dst, unsigned int dstlen )
194 {
195  const WCHAR * const cp2uni = table->cp2uni;
196  const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
197  unsigned int len;
198 
199  if (!dstlen) return get_length_dbcs( table, src, srclen );
200 
201  for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
202  {
203  unsigned char off = cp2uni_lb[*src];
204  if (off && srclen > 1 && src[1])
205  {
206  src++;
207  srclen--;
208  *dst = cp2uni[(off << 8) + *src];
209  }
210  else *dst = cp2uni[*src];
211  }
212  if (srclen) return -1; /* overflow */
213  return dstlen - len;
214 }
215 
216 
217 /* mbstowcs for double-byte code page with character decomposition */
218 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
219  const unsigned char *src, unsigned int srclen,
220  WCHAR *dst, unsigned int dstlen )
221 {
222  const WCHAR * const cp2uni = table->cp2uni;
223  const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
224  unsigned int len, res;
225  WCHAR ch;
226 
227  if (!dstlen) /* compute length */
228  {
229  WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
230  for (len = 0; srclen; srclen--, src++)
231  {
232  unsigned char off = cp2uni_lb[*src];
233  if (off && srclen > 1 && src[1])
234  {
235  src++;
236  srclen--;
237  ch = cp2uni[(off << 8) + *src];
238  }
239  else ch = cp2uni[*src];
240  len += wine_decompose( 0, ch, dummy, 4 );
241  }
242  return len;
243  }
244 
245  for (len = dstlen; srclen && len; srclen--, src++)
246  {
247  unsigned char off = cp2uni_lb[*src];
248  if (off && srclen > 1 && src[1])
249  {
250  src++;
251  srclen--;
252  ch = cp2uni[(off << 8) + *src];
253  }
254  else ch = cp2uni[*src];
255  if (!(res = wine_decompose( 0, ch, dst, len ))) break;
256  dst += res;
257  len -= res;
258  }
259  if (srclen) return -1; /* overflow */
260  return dstlen - len;
261 }
262 
263 
264 /* return -1 on dst buffer overflow, -2 on invalid input char */
265 int wine_cp_mbstowcs( const union cptable *table, int flags,
266  const char *s, int srclen,
267  WCHAR *dst, int dstlen )
268 {
269  const unsigned char *src = (const unsigned char*) s;
270 
271  if (table->info.char_size == 1)
272  {
274  {
275  if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
276  }
277  if (!(flags & MB_COMPOSITE))
278  {
279  if (!dstlen) return srclen;
280  return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
281  }
282  return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
283  }
284  else /* mbcs */
285  {
287  {
288  if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
289  }
290  if (!(flags & MB_COMPOSITE))
291  return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
292  else
293  return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
294  }
295 }
static int mbstowcs_sbcs(const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen)
Definition: mbtowc.c:54
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
#define MB_USEGLYPHCHARS
Definition: unicode.h:42
static int check_invalid_chars_sbcs(const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen)
Definition: mbtowc.c:35
#define DECLSPEC_HIDDEN
Definition: precomp.h:8
#define MB_COMPOSITE
Definition: unicode.h:40
static int check_invalid_chars_dbcs(const struct dbcs_table *table, const unsigned char *src, unsigned int srclen)
Definition: mbtowc.c:162
int wine_cp_mbstowcs(const union cptable *table, int flags, const char *s, int srclen, WCHAR *dst, int dstlen)
Definition: mbtowc.c:265
static int mbstowcs_dbcs(const struct dbcs_table *table, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen)
Definition: mbtowc.c:191
static int get_length_dbcs(const struct dbcs_table *table, const unsigned char *src, unsigned int srclen)
Definition: mbtowc.c:144
if(!(yy_init))
Definition: macro.lex.yy.c:714
__wchar_t WCHAR
Definition: xmlstorage.h:180
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:51
GLbitfield flags
Definition: glext.h:7161
static int mbstowcs_sbcs_decompose(const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen)
Definition: mbtowc.c:117
int ret
GLenum GLsizei len
Definition: glext.h:6722
GLdouble s
Definition: gl.h:2039
static int is_private_use_area_char(WCHAR code)
Definition: mbtowc.c:29
GLenum src
Definition: glext.h:6340
unsigned int wine_decompose(int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen) DECLSPEC_HIDDEN
static int mbstowcs_dbcs_decompose(const struct dbcs_table *table, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen)
Definition: mbtowc.c:218
static DWORD dstlen
Definition: directory.c:51
GLenum GLenum dst
Definition: glext.h:6340
static const WCHAR cp2uni[256]
Definition: c_037.c:7
GLuint res
Definition: glext.h:9613
off
Definition: i386-dis.c:3909