ReactOS 0.4.16-dev-959-g2ec3a19
mbrtowc.cpp
Go to the documentation of this file.
1/***
2*mbrtowc.c - Convert multibyte char to wide char.
3*
4* Copyright (c) Microsoft Corporation. All rights reserved.
5*
6*Purpose:
7* Convert a multibyte character into the equivalent wide character.
8*
9*******************************************************************************/
13#include <limits.h>
14#include <locale.h>
15#include <stdio.h>
16#include <uchar.h>
17#include <wchar.h>
18
19using namespace __crt_mbstring;
20
21/***
22*errno_t _mbrtowc_internal() - Helper function to convert multibyte char to wide character.
23*
24*Purpose:
25* Convert a multi-byte character into the equivalent wide character,
26* according to the specified LC_CTYPE category, or the current locale.
27* [ANSI].
28*
29* NOTE: Currently, the C libraries support the "C" locale only.
30* Non-C locale support now available under _INTL switch.
31*Entry:
32* wchar_t *dst = pointer to (single) destination wide character
33* const char *s = pointer to multibyte character
34* size_t n = maximum length of multibyte character to consider
35* mbstate_t *pmbst = pointer to state (must be not nullptr)
36* _locale_t plocinfo = locale info
37*
38*Exit:
39* returns, in *pRetValue:
40* If s = nullptr, 0, indicating we only use state-independent
41* character encodings.
42* If s != nullptr: 0 (if *s = null char)
43* -1 (if the next n or fewer bytes not valid mbc)
44* number of bytes comprising converted mbc
45*
46*Exceptions:
47*
48*******************************************************************************/
49
50_Success_(return != 0)
51_Post_satisfies_(*pRetValue <= _String_length_(s))
52static errno_t __cdecl _mbrtowc_internal(
53 _Inout_ _Out_range_(<=, 1) int * pRetValue,
55 _In_opt_z_ const char * s,
56 _In_ size_t n,
57 _Inout_ mbstate_t * pmbst,
58 _Inout_ __crt_cached_ptd_host& ptd
59 ) throw()
60{
61 _ASSERTE(pmbst != nullptr);
63
64 if (!s || n == 0)
65 {
66 /* indicate do not have state-dependent encodings,
67 handle zero length string */
68 _ASSIGN_IF_NOT_NULL(pRetValue, 0);
69 return 0;
70 }
71
72 if (!*s)
73 {
74 /* handle nullptr char */
75 _ASSIGN_IF_NOT_NULL(pRetValue, 0);
76 return 0;
77 }
78
79 const _locale_t locale = ptd.get_locale();
80
81 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
82 {
83 const size_t retval = __mbrtowc_utf8(dst, s, n, pmbst, ptd);
84 _ASSIGN_IF_NOT_NULL(pRetValue, static_cast<int>(retval));
85 return ptd.get_errno().value_or(0);
86 }
87
88 const int locale_mb_cur_max = locale->locinfo->_public._locale_mb_cur_max;
89 _ASSERTE(locale_mb_cur_max == 1 || locale_mb_cur_max == 2);
90
91 if (locale->locinfo->locale_name[LC_CTYPE] == nullptr)
92 {
93 _ASSIGN_IF_NOT_NULL(dst, (wchar_t) (unsigned char) *s);
94 _ASSIGN_IF_NOT_NULL(pRetValue, 1);
95 return 0;
96 }
97
98 if (pmbst->_Wchar != 0)
99 {
100 /* complete two-byte multibyte character */
101 ((char *) pmbst)[1] = *s;
102 if (locale_mb_cur_max <= 1 ||
103 (__acrt_MultiByteToWideChar(
104 locale->locinfo->_public._locale_lc_codepage,
106 (char *) pmbst,
107 2,
108 dst,
109 (dst != nullptr ? 1 : 0)) == 0))
110 {
111 /* translation failed */
112 pmbst->_Wchar = 0;
114 _ASSIGN_IF_NOT_NULL(pRetValue, -1);
115 return ptd.get_errno().set(EILSEQ);
116 }
117 pmbst->_Wchar = 0;
118 _ASSIGN_IF_NOT_NULL(pRetValue, locale_mb_cur_max);
119 return 0;
120 }
121 else if (_isleadbyte_fast_internal((unsigned char) *s, locale))
122 {
123 /* multi-byte char */
124 if (n < (size_t) locale_mb_cur_max)
125 {
126 /* save partial multibyte character */
127 ((char *) pmbst)[0] = *s;
128 _ASSIGN_IF_NOT_NULL(pRetValue, -2);
129 return 0;
130 }
131 else if (locale_mb_cur_max <= 1 ||
132 (__acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
134 s,
135 static_cast<int>(__min(strlen(s), INT_MAX)),
136 dst,
137 (dst != nullptr ? 1 : 0)) == 0))
138 {
139 /* validate high byte of mbcs char */
140 if (!*(s + 1))
141 {
142 pmbst->_Wchar = 0;
144 _ASSIGN_IF_NOT_NULL(pRetValue, -1);
145 return ptd.get_errno().set(EILSEQ);
146 }
147 }
148 _ASSIGN_IF_NOT_NULL(pRetValue, locale_mb_cur_max);
149 return 0;
150 }
151 else {
152 /* single byte char */
153 if (__acrt_MultiByteToWideChar(
154 locale->locinfo->_public._locale_lc_codepage,
156 s,
157 1,
158 dst,
159 (dst != nullptr ? 1 : 0)) == 0)
160 {
162 _ASSIGN_IF_NOT_NULL(pRetValue, -1);
163 return ptd.get_errno().set(EILSEQ);
164 }
165
166 _ASSIGN_IF_NOT_NULL(pRetValue, sizeof(char) );
167 return 0;
168 }
169}
170
171
172/***
173*wint_t btowc(c) - translate single byte to wide char
174*
175*Purpose:
176*
177*Entry:
178*
179*Exit:
180*
181*Exceptions:
182*
183*******************************************************************************/
184
185extern "C" wint_t __cdecl btowc(
186 int c
187 )
188{
189 if (c == EOF)
190 {
191 return WEOF;
192 }
193 else
194 {
195 /* convert as one-byte string */
196 char ch = (char) c;
197 mbstate_t mbst = {};
198 wchar_t wc = 0;
199 int retValue = -1;
200
201 __crt_cached_ptd_host ptd;
202 _mbrtowc_internal(&retValue, &wc, &ch, 1, &mbst, ptd);
203 return (retValue < 0 ? WEOF : wc);
204 }
205}
206
207
208/***
209*size_t mbrlen(s, n, pst) - determine next multibyte code, restartably
210*
211*Purpose:
212*
213*Entry:
214*
215*Exit:
216*
217*Exceptions:
218*
219*******************************************************************************/
220
221extern "C" size_t __cdecl mbrlen(
222 const char *s,
223 size_t n,
224 mbstate_t *pst
225 )
226{
227 static mbstate_t mbst = {};
228 int retValue = -1;
229
230 __crt_cached_ptd_host ptd;
231 _mbrtowc_internal(&retValue, nullptr, s, n, (pst != nullptr ? pst : &mbst), ptd);
232 return retValue;
233}
234
235
236/***
237*size_t mbrtowc(pwc, s, n, pst) - translate multibyte to wchar_t, restartably
238*
239*Purpose:
240*
241*Entry:
242*
243*Exit:
244*
245*Exceptions:
246*
247*******************************************************************************/
248
249extern "C" size_t __cdecl mbrtowc(
250 wchar_t *dst,
251 const char *s,
252 size_t n,
253 mbstate_t *pst
254 )
255{
256 static mbstate_t mbst = {};
257 int retValue = -1;
258
259 __crt_cached_ptd_host ptd;
260
261 if (s != nullptr)
262 {
263 _mbrtowc_internal(&retValue, dst, s, n, (pst != nullptr ? pst : &mbst), ptd);
264 }
265 else
266 {
267 _mbrtowc_internal(&retValue, nullptr, "", 1, (pst != nullptr ? pst : &mbst), ptd);
268 }
269 return retValue;
270}
271
272
273/***
274*size_t mbsrtowcs(wcs, ps, n, pst) - translate multibyte string to wide,
275* restartably
276*
277*Purpose:
278*
279*Entry:
280*
281*Exit:
282*
283*Exceptions:
284*
285*******************************************************************************/
286
287/* Helper function shared by the secure and non-secure versions. */
288
289_Success_(return == 0)
290static size_t __cdecl _mbsrtowcs_helper(
291 _Out_writes_opt_z_(n) wchar_t * wcs,
292 _Deref_pre_opt_z_ const char ** ps,
293 _In_ size_t n,
294 _Inout_ mbstate_t * pst,
295 _Inout_ __crt_cached_ptd_host& ptd
296 ) throw()
297{
298 /* validation section */
299 _UCRT_VALIDATE_RETURN(ptd, ps != nullptr, EINVAL, (size_t) - 1);
300
301 static mbstate_t mbst = {};
302 const char *s = *ps;
303 int i = 0;
304 size_t nwc = 0;
305
306 // Use the static cached state if necessary
307 if (pst == nullptr)
308 {
309 pst = &mbst;
310 }
311
312 const _locale_t locale = ptd.get_locale();
313
314 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
315 {
316 return __mbsrtowcs_utf8(wcs, ps, n, pst, ptd);
317 }
318
319 if (wcs == nullptr)
320 {
321 for (;; ++nwc, s += i)
322 {
323 /* translate but don't store */
324 wchar_t wc;
325 _mbrtowc_internal(&i, &wc, s, INT_MAX, pst, ptd);
326 if (i < 0)
327 {
328 return (size_t) - 1;
329 }
330 else if (i == 0)
331 {
332 return nwc;
333 }
334 }
335 }
336
337 for (; 0 < n; ++nwc, s += i, ++wcs, --n)
338 {
339 /* translate and store */
340 _mbrtowc_internal(&i, wcs, s, INT_MAX, pst, ptd);
341 if (i < 0)
342 {
343 /* encountered invalid sequence */
344 nwc = (size_t) - 1;
345 break;
346 }
347 else if (i == 0)
348 {
349 /* encountered terminating null */
350 s = 0;
351 break;
352 }
353 }
354
355 *ps = s;
356 return nwc;
357}
358
359/***
360*size_t mbsrtowcs() - Convert multibyte char string to wide char string.
361*
362*Purpose:
363* Convert a multi-byte char string into the equivalent wide char string,
364* according to the LC_CTYPE category of the current locale.
365* Same as mbsrtowcs_s(), but the destination may not be null terminated.
366* If there's not enough space, we return EINVAL.
367*
368*Entry:
369* wchar_t *pwcs = pointer to destination wide character string buffer
370* const char **s = pointer to source multibyte character string
371* size_t n = maximum number of wide characters to store (not including the terminating null character)
372* mbstate_t *pst = pointer to the conversion state
373*
374*Exit:
375* The nunber if wide characters written to *wcs, not including any terminating null character)
376*
377*Exceptions:
378* Input parameters are validated. Refer to the validation section of the function.
379*
380*******************************************************************************/
381extern "C" size_t __cdecl mbsrtowcs(
382 wchar_t * wcs,
383 const char ** ps,
384 size_t n,
385 mbstate_t * pst
386 )
387{
388 /* Call a non-deprecated helper to do the work. */
389 __crt_cached_ptd_host ptd;
390 return _mbsrtowcs_helper(wcs, ps, n, pst, ptd);
391}
392
393
394/***
395*errno_t mbsrtowcs_s() - Convert multibyte char string to wide char string.
396*
397*Purpose:
398* Convert a multi-byte char string into the equivalent wide char string,
399* according to the LC_CTYPE category of the current locale.
400* Same as mbsrtowcs(), but the destination is ensured to be null terminated.
401* If there's not enough space, we return EINVAL.
402*
403*Entry:
404* size_t *pRetValue = Number of bytes modified including the terminating nullptr
405* This pointer can be nullptr.
406* wchar_t *pwcs = pointer to destination wide character string buffer
407* size_t sizeInWords = size of the destination buffer
408* const char **s = pointer to source multibyte character string
409* size_t n = maximum number of wide characters to store (not including the terminating null character)
410* mbstate_t *pst = pointer to the conversion state
411*
412*Exit:
413* The error code.
414*
415*Exceptions:
416* Input parameters are validated. Refer to the validation section of the function.
417*
418*******************************************************************************/
419
420static errno_t __cdecl mbsrtowcs_s_internal(
421 size_t * pRetValue,
422 wchar_t * dst,
423 size_t sizeInWords,
424 const char ** ps,
425 size_t n,
426 mbstate_t * pmbst,
427 __crt_cached_ptd_host& ptd
428 )
429{
430 size_t retsize;
431
432 /* validation section */
433 _ASSIGN_IF_NOT_NULL(pRetValue, (size_t) - 1);
434 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (dst == nullptr && sizeInWords == 0) || (dst != nullptr && sizeInWords > 0), EINVAL);
435 if (dst != nullptr)
436 {
437 _RESET_STRING(dst, sizeInWords);
438 }
440
441 /* Call a non-deprecated helper to do the work. */
442
443 retsize = _mbsrtowcs_helper(dst, ps, (n > sizeInWords ? sizeInWords : n), pmbst, ptd);
444
445 if (retsize == (size_t) - 1)
446 {
447 if (dst != nullptr)
448 {
449 _RESET_STRING(dst, sizeInWords);
450 }
451 return ptd.get_errno().value_or(0);
452 }
453
454 /* count the null terminator */
455 retsize++;
456
457 if (dst != nullptr)
458 {
459 /* return error if the string does not fit */
460 if (retsize > sizeInWords)
461 {
462 _RESET_STRING(dst, sizeInWords);
463 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, sizeInWords <= retsize, ERANGE);
464 }
465 else
466 {
467 /* ensure the string is null terminated */
468 dst[retsize - 1] = '\0';
469 }
470 }
471
472 _ASSIGN_IF_NOT_NULL(pRetValue, retsize);
473
474 return 0;
475}
476
477extern "C" errno_t __cdecl mbsrtowcs_s(
478 size_t * pRetValue,
479 wchar_t * dst,
480 size_t sizeInWords,
481 const char ** ps,
482 size_t n,
483 mbstate_t * pmbst
484 )
485{
486 __crt_cached_ptd_host ptd;
487 return mbsrtowcs_s_internal(pRetValue, dst, sizeInWords, ps, n, pmbst, ptd);
488}
489
490size_t __cdecl __crt_mbstring::__mbrtowc_utf8(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd)
491{
492 static_assert(sizeof(wchar_t) == 2, "wchar_t is assumed to be 16 bits");
493 char32_t c32;
494 const size_t retval = __mbrtoc32_utf8(&c32, s, n, ps, ptd);
495 // If we succesfully consumed a character, write the result after a quick range check
496 if (retval <= 4)
497 {
498 if (c32 > 0xffff)
499 {
500 // A 4-byte UTF-8 character won't fit into a single UTF-16 wchar
501 // So return the "replacement char"
502 c32 = 0xfffd;
503 }
504 _ASSIGN_IF_NOT_NULL(pwc, static_cast<wchar_t>(c32));
505 }
506 return retval;
507}
508
509size_t __cdecl __crt_mbstring::__mbsrtowcs_utf8(wchar_t* dst, const char** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd)
510{
511 const char* current_src = *src;
512
513 auto compute_available = [](const char* s) -> size_t
514 {
515 // We shouldn't just blindly request to read 4 bytes, because there might not be 4 bytes left to read.
516 if (s[0] == '\0')
517 {
518 return 1;
519 }
520 else if (s[1] == '\0')
521 {
522 return 2;
523 }
524 else if (s[2] == '\0')
525 {
526 return 3;
527 }
528 return 4;
529 };
530
531 if (dst != nullptr)
532 {
533 wchar_t* current_dest = dst;
534 for (; len > 0; --len)
535 {
536 const size_t avail = compute_available(current_src);
537 char32_t c32;
538 const size_t retval = __mbrtoc32_utf8(&c32, current_src, avail, ps, ptd);
540 {
541 // Set src to the beginning of the invalid char
542 *src = current_src;
543 ptd.get_errno().set(EILSEQ);
544 return retval;
545 }
546 else if (retval == 0)
547 {
548 current_src = nullptr;
549 *current_dest = L'\0';
550 break;
551 }
552 else if (c32 > 0xffff)
553 {
554 // This is going to take two output wchars. Make sure we have enough room for this output.
555 if (len > 1)
556 {
557 --len;
558 c32 -= 0x10000;
559 const char16_t high_surrogate = static_cast<char16_t>((c32 >> 10) | 0xd800);
560 const char16_t low_surrogate = static_cast<char16_t>((c32 & 0x03ff) | 0xdc00);
561 *current_dest++ = high_surrogate;
562 *current_dest++ = low_surrogate;
563 }
564 else
565 {
566 break;
567 }
568 }
569 else
570 {
571 *current_dest++ = static_cast<wchar_t>(c32);
572 }
573 current_src += retval;
574 }
575 *src = current_src;
576 return current_dest - dst;
577 }
578 else
579 {
580 size_t total_count = 0;
581 for (;; ++total_count)
582 {
583 const size_t avail = compute_available(current_src);
584
585 const size_t retval = __mbrtoc32_utf8(nullptr, current_src, avail, ps, ptd);
587 {
588 ptd.get_errno().set(EILSEQ);
589 return retval;
590 }
591 else if (retval == 0)
592 {
593 break;
594 }
595 else if (retval == 4)
596 {
597 // SMP characters take two UTF-16 wide chars
598 ++total_count;
599 }
600 else
601 {
602 // This should be impossible. Means we encountered a multibyte char
603 // that extended past the null terminator, or is more than 4 bytes long
605 }
606 current_src += retval;
607 }
608 return total_count;
609 }
610}
int wint_t
Definition: _apple.h:38
#define EINVAL
Definition: acclib.h:90
ACPI_SIZE strlen(const char *String)
Definition: utclib.c:269
#define ERANGE
Definition: acclib.h:92
#define __cdecl
Definition: accygwin.h:79
static int avail
Definition: adh-main.c:39
Definition: _locale.h:75
_Check_return_ __forceinline unsigned short __cdecl _isleadbyte_fast_internal(_In_ unsigned char const c, _In_ _locale_t const locale)
#define _UCRT_VALIDATE_RETURN(ptd, expr, errorcode, retexpr)
#define _UCRT_VALIDATE_RETURN_ERRCODE(ptd, expr, errorcode)
#define _ASSIGN_IF_NOT_NULL(_Pointer, _Value)
#define _RESET_STRING(_String, _Size)
#define _ASSERTE(expr)
Definition: crtdbg.h:114
result_buffer_count char *const _In_ int const _In_ bool const _In_ unsigned const _In_ STRFLT const _In_ bool const _Inout_ __crt_cached_ptd_host &ptd throw()
Definition: cvt.cpp:119
_In_ size_t const _In_ int _In_ bool const _In_ unsigned const _In_ __acrt_rounding_mode const _Inout_ __crt_cached_ptd_host & ptd
Definition: cvt.cpp:355
unsigned char
Definition: typeof.h:29
__kernel_size_t size_t
Definition: linux.h:237
GLdouble s
Definition: gl.h:2039
GLdouble n
Definition: glext.h:7729
GLenum src
Definition: glext.h:6340
const GLubyte * c
Definition: glext.h:8905
GLenum GLenum dst
Definition: glext.h:6340
GLenum GLsizei len
Definition: glext.h:6722
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
#define LC_CTYPE
Definition: locale.h:19
#define EOF
Definition: stdio.h:24
#define __min(a, b)
Definition: stdlib.h:102
wint_t __cdecl btowc(int)
size_t __cdecl mbsrtowcs(_Pre_notnull_ _Post_z_ wchar_t *_Dest, _Inout_ _Deref_prepost_opt_valid_ const char **_PSrc, _In_ size_t _Count, _Inout_opt_ mbstate_t *_State)
size_t __cdecl mbrlen(_In_reads_bytes_opt_(_SizeInBytes) _Pre_opt_z_ const char *_Ch, _In_ size_t _SizeInBytes, _Out_opt_ mbstate_t *_State)
#define INT_MAX
Definition: intsafe.h:150
#define _Deref_pre_opt_z_
Definition: ms_sal.h:1030
constexpr size_t INVALID
size_t __cdecl __mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
size_t __cdecl __mbrtoc32_utf8(char32_t *pc32, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
Definition: mbrtoc32.cpp:22
size_t __cdecl __mbsrtowcs_utf8(wchar_t *dst, const char **src, size_t len, mbstate_t *ps, __crt_cached_ptd_host &ptd)
constexpr size_t INCOMPLETE
#define _Inout_
Definition: no_sal2.h:162
#define _Success_(c)
Definition: no_sal2.h:84
#define _Pre_maybenull_
Definition: no_sal2.h:514
#define _In_opt_z_
Definition: no_sal2.h:218
#define _Out_range_(l, h)
Definition: no_sal2.h:370
#define _Out_writes_opt_z_(s)
Definition: no_sal2.h:230
#define _Post_satisfies_(e)
Definition: no_sal2.h:66
#define _In_
Definition: no_sal2.h:158
#define L(x)
Definition: ntvdm.h:50
#define WEOF
Definition: conio.h:185
#define EILSEQ
Definition: errno.h:109
#define CP_UTF8
Definition: nls.h:20
#define mbrtowc(wp, cp, len, sp)
Definition: wchar.h:158
#define wchar_t
Definition: wchar.h:102
int errno_t
Definition: corecrt.h:615
wchar_t wcs[5]
int retval
Definition: wcstombs.cpp:91
#define MB_PRECOMPOSED
Definition: winnls.h:283
#define const
Definition: zconf.h:233