ReactOS 0.4.16-dev-814-g656a5dc
__crt_mbstring Namespace Reference

Functions

size_t return_illegal_sequence (mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t reset_and_return (size_t retval, mbstate_t *ps)
 
size_t __cdecl __c16rtomb_utf8 (char *s, char16_t c16, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __c32rtomb_utf8 (char *s, char32_t c32, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __mbrtoc16_utf8 (char16_t *pc32, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __mbrtoc32_utf8 (char32_t *pc32, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __mbrtowc_utf8 (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __mbsrtowcs_utf8 (wchar_t *dst, const char **src, size_t len, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 
size_t __cdecl __wcsrtombs_utf8 (char *dst, const wchar_t **src, size_t len, mbstate_t *ps, __crt_cached_ptd_host &ptd)
 

Variables

constexpr size_t INVALID = static_cast<size_t>(-1)
 
constexpr size_t INCOMPLETE = static_cast<size_t>(-2)
 

Function Documentation

◆ __c16rtomb_utf8()

size_t __cdecl __crt_mbstring::__c16rtomb_utf8 ( char s,
char16_t  c16,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 54 of file c16rtomb.cpp.

55{
56 static mbstate_t internal_pst{};
57 if (ps == nullptr)
58 {
59 ps = &internal_pst;
60 }
61
62 if (!has_surrogate(ps))
63 {
64 if (is_second_surrogate(c16))
65 {
66 return return_illegal_sequence(ps, ptd);
67 }
68 else if (is_first_surrogate(c16))
69 {
70 store_first_surrogate(c16, ps);
71 return 0;
72 }
73 else
74 {
75 return __c32rtomb_utf8(s, static_cast<char32_t>(c16), ps, ptd);
76 }
77 }
78 else
79 {
80 // We've already seen the first (high) surrogate, so we're
81 // expecting to complete this code point with its other half
82 // in the second (low) surrogate
83 if (!is_second_surrogate(c16))
84 {
85 return return_illegal_sequence(ps, ptd);
86 }
87 const char32_t c32 = combine_second_surrogate(c16, ps);
88
90 const size_t retval = __c32rtomb_utf8(s, c32, &temp, ptd);
91 return reset_and_return(retval, ps);
92 }
93}
_In_ size_t const _In_ int _In_ bool const _In_ unsigned const _In_ __acrt_rounding_mode const _Inout_ __crt_cached_ptd_host & ptd
Definition: cvt.cpp:355
GLdouble s
Definition: gl.h:2039
size_t return_illegal_sequence(mbstate_t *ps, __crt_cached_ptd_host &ptd)
Definition: common_utf8.cpp:15
size_t reset_and_return(size_t retval, mbstate_t *ps)
Definition: common_utf8.cpp:22
size_t __cdecl __c32rtomb_utf8(char *s, char32_t c32, mbstate_t *ps, __crt_cached_ptd_host &ptd)
Definition: c32rtomb.cpp:21
constexpr bool has_surrogate(mbstate_t *ps)
Definition: c16rtomb.cpp:29
constexpr bool is_second_surrogate(char16_t c16)
Definition: c16rtomb.cpp:40
char32_t combine_second_surrogate(char16_t c16, mbstate_t *ps)
Definition: c16rtomb.cpp:24
constexpr bool is_first_surrogate(char16_t c16)
Definition: c16rtomb.cpp:34
void store_first_surrogate(char16_t c16, mbstate_t *ps)
Definition: c16rtomb.cpp:15
static calc_node_t temp
Definition: rpn_ieee.c:38
int retval
Definition: wcstombs.cpp:91

Referenced by __wcsrtombs_utf8(), and c16rtomb().

◆ __c32rtomb_utf8()

size_t __cdecl __crt_mbstring::__c32rtomb_utf8 ( char s,
char32_t  c32,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 21 of file c32rtomb.cpp.

22{
23 if (!s)
24 {
25 // Equivalent to c32rtomb(buf, U'\0', ps) for some internal buffer buf
26 *ps = {};
27 return 1;
28 }
29
30 if (c32 == U'\0')
31 {
32 *s = '\0';
33 *ps = {};
34 return 1;
35 }
36
37 // Fast path for ASCII
38 if ((c32 & ~0x7f) == 0)
39 {
40 *s = static_cast<char>(c32);
41 return 1;
42 }
43
44 // Figure out how many trail bytes we need
45 size_t trail_bytes;
46 uint8_t lead_byte;
47 if ((c32 & ~0x7ff) == 0)
48 {
49 trail_bytes = 1;
50 lead_byte = 0xc0;
51 }
52 else if ((c32 & ~0xffff) == 0)
53 {
54 // high/low surrogates are only valid in UTF-16 encoded data
55 if (0xd800 <= c32 && c32 <= 0xdfff)
56 {
57 return return_illegal_sequence(ps, ptd);
58 }
59 trail_bytes = 2;
60 lead_byte = 0xe0;
61 }
62 else if ((c32 & ~0x001fffff) == 0)
63 {
64 // Unicode's max code point is 0x10ffff
65 if (0x10ffff < c32)
66 {
67 return return_illegal_sequence(ps, ptd);
68 }
69 trail_bytes = 3;
70 lead_byte = 0xf0;
71 }
72 else
73 {
74 return return_illegal_sequence(ps, ptd);
75 }
76 _ASSERTE(1 <= trail_bytes && trail_bytes <= 3);
77
78 // Put six bits into each of the trail bytes
79 // Lowest bits are in the last UTF-8 byte.
80 // Filling back to front.
81 for (size_t i = trail_bytes; i > 0; --i)
82 {
83 s[i] = (c32 & 0x3f) | 0x80;
84 c32 >>= 6;
85 }
86
87 // The first byte needs the upper (trail_bytes + 1) bits to store the length
88 // And the lower (7 - trail_bytes) to store the upper bits of the code point
89 _ASSERTE(c32 < (1u << (7 - trail_bytes)));
90 s[0] = static_cast<uint8_t>(c32) | lead_byte;
91
92 return reset_and_return(trail_bytes + 1, ps);
93}
#define U(x)
Definition: wordpad.c:45
#define _ASSERTE(expr)
Definition: crtdbg.h:114
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
BYTE uint8_t
Definition: msvideo1.c:66

Referenced by __c16rtomb_utf8(), _wctomb_internal(), c32rtomb(), and if().

◆ __mbrtoc16_utf8()

size_t __cdecl __crt_mbstring::__mbrtoc16_utf8 ( char16_t pc32,
const char s,
size_t  n,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 50 of file mbrtoc16.cpp.

51{
52 static mbstate_t internal_pst{};
53 if (ps == nullptr)
54 {
55 ps = &internal_pst;
56 }
57
58 if (is_surrogate_state(ps))
59 {
60 return end_surrogate_state(pc16, ps);
61 }
62
63 char32_t c32;
64 const size_t retval = __mbrtoc32_utf8(&c32, s, n, ps, ptd);
65 if (!s || retval == INVALID || retval == INCOMPLETE)
66 {
67 return retval;
68 }
69 else if (c32 > 0x10ffff)
70 {
71 // Input is out of range for UTF-16
72 return return_illegal_sequence(ps, ptd);
73 }
74
75 // Got a valid character
76 if (c32 <= 0xffff)
77 {
78 if (pc16)
79 {
80 *pc16 = static_cast<char16_t>(c32);
81 }
82 return reset_and_return(retval, ps);
83 }
84 else
85 {
86 return begin_surrogate_state(pc16, c32, retval, ps);
87 }
88}
GLdouble n
Definition: glext.h:7729
constexpr size_t INVALID
size_t __cdecl __mbrtoc32_utf8(char32_t *pc32, const char *s, size_t n, mbstate_t *ps, __crt_cached_ptd_host &ptd)
Definition: mbrtoc32.cpp:22
constexpr size_t INCOMPLETE
bool is_surrogate_state(const mbstate_t *ps)
Definition: mbrtoc16.cpp:37
size_t end_surrogate_state(char16_t *pc16, mbstate_t *ps)
Definition: mbrtoc16.cpp:28
size_t begin_surrogate_state(char16_t *pc16, char32_t c32, size_t retval, mbstate_t *ps)
Definition: mbrtoc16.cpp:17

Referenced by mbrtoc16().

◆ __mbrtoc32_utf8()

size_t __cdecl __crt_mbstring::__mbrtoc32_utf8 ( char32_t pc32,
const char s,
size_t  n,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 22 of file mbrtoc32.cpp.

23{
24 const char* begin = s;
25 static mbstate_t internal_pst{};
26 if (ps == nullptr)
27 {
28 ps = &internal_pst;
29 }
30
31 if (!s)
32 {
33 s = "";
34 n = 1;
35 pc32 = nullptr;
36 }
37
38 if (n == 0)
39 {
40 return INCOMPLETE;
41 }
42
43 // Retrieve the first byte from the string, or from the previous state
45 uint8_t bytes_needed;
46 char32_t c32;
47 const bool init_state = (ps->_State == 0);
48 if (init_state)
49 {
50 const uint8_t first_byte = static_cast<uint8_t>(*s++);
51
52 // Optimize for ASCII if in initial state
53 if ((first_byte & 0x80) == 0)
54 {
55 if (pc32 != nullptr)
56 {
57 *pc32 = first_byte;
58 }
59 return first_byte != '\0' ? 1 : 0;
60 }
61
62 if ((first_byte & 0xe0) == 0xc0)
63 {
64 length = 2;
65 }
66 else if ((first_byte & 0xf0) == 0xe0)
67 {
68 length = 3;
69 }
70 else if ((first_byte & 0xf8) == 0xf0)
71 {
72 length = 4;
73 }
74 else
75 {
76 return return_illegal_sequence(ps, ptd);
77 }
78 bytes_needed = length;
79 // Mask out the length bits
80 c32 = first_byte & ((1 << (7 - length)) - 1);
81 }
82 else
83 {
84 c32 = ps->_Wchar;
85 length = static_cast<uint8_t>(ps->_Byte);
86 bytes_needed = static_cast<uint8_t>(ps->_State);
87
88 // Make sure we don't have some sort of invalid/corrupted state.
89 // Any input that left behind state would have been more than one byte long
90 // and the first byte should have been processed already.
91 if (length < 2 || length > 4 || bytes_needed < 1 || bytes_needed >= length)
92 {
93 return return_illegal_sequence(ps, ptd);
94 }
95 }
96
97 // Don't read more bytes than we're allowed
98 if (bytes_needed < n)
99 {
100 n = bytes_needed;
101 }
102
103 // We've already read the first byte.
104 // All remaining bytes should be continuation bytes
105 while (static_cast<size_t>(s - begin) < n)
106 {
107 uint8_t current_byte = static_cast<uint8_t>(*s++);
108 if ((current_byte & 0xc0) != 0x80)
109 {
110 // Not a continuation character
111 return return_illegal_sequence(ps, ptd);
112 }
113 c32 = (c32 << 6) | (current_byte & 0x3f);
114 }
115
116 if (n < bytes_needed)
117 {
118 // Store state and return incomplete
119 auto bytes_remaining = static_cast<uint8_t>(bytes_needed - n);
120 static_assert(sizeof(mbstate_t::_Wchar) >= sizeof(char32_t), "mbstate_t has broken mbrtoc32");
121 ps->_Wchar = c32;
122 ps->_Byte = length;
123 ps->_State = bytes_remaining;
124 return INCOMPLETE;
125 }
126
127 if ((0xd800 <= c32 && c32 <= 0xdfff) || (0x10ffff < c32))
128 {
129 // Invalid code point (surrogate or out of range)
130 return return_illegal_sequence(ps, ptd);
131 }
132
133 constexpr char32_t min_legal[3]{ 0x80, 0x800, 0x10000 };
134 if (c32 < min_legal[length - 2])
135 {
136 // Overlong encoding
137 return return_illegal_sequence(ps, ptd);
138 }
139
140 // Success! Store results
141 if (pc32 != nullptr)
142 {
143 *pc32 = c32;
144 }
145
146 return reset_and_return(c32 == U'\0' ? 0 : bytes_needed, ps);
147}
GLuint GLsizei GLsizei * length
Definition: glext.h:6040
static clock_t begin
Definition: xmllint.c:458

Referenced by __mbrtoc16_utf8(), and mbrtoc32().

◆ __mbrtowc_utf8()

size_t __cdecl __crt_mbstring::__mbrtowc_utf8 ( wchar_t pwc,
const char s,
size_t  n,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

◆ __mbsrtowcs_utf8()

size_t __cdecl __crt_mbstring::__mbsrtowcs_utf8 ( wchar_t dst,
const char **  src,
size_t  len,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

◆ __wcsrtombs_utf8()

size_t __cdecl __crt_mbstring::__wcsrtombs_utf8 ( char dst,
const wchar_t **  src,
size_t  len,
mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 439 of file wcrtomb.cpp.

440{
441 const wchar_t* current_src = *src;
442 char buf[MB_LEN_MAX];
443
444 if (dst != nullptr)
445 {
446 char* current_dest = dst;
447
448 // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair)
449 // In case of a failure, keep track of where the current code point began, which might be the previous
450 // wchar for a surrogate pair
451 const wchar_t* start_of_code_point = current_src;
452 for (;;)
453 {
454 // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer
455 // the next char isn't guaranteed to fit, so put it into a temp buffer
456 char* temp;
457 if (len < 4)
458 {
459 temp = buf;
460 }
461 else
462 {
463 temp = current_dest;
464 }
465 const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd);
466
468 {
469 // Set src to the beginning of the invalid char
470 // If this was the second half of a surrogate pair, return the beginning of the surrogate pair
471 *src = start_of_code_point;
472 return retval;
473 }
474
475 if (temp == current_dest)
476 {
477 // We wrote in-place. Nothing to do.
478 }
479 else if (len < retval)
480 {
481 // Won't fit, so bail out
482 // If this was the second half of a surrogate pair, make sure we return that location
483 current_src = start_of_code_point;
484 break;
485 }
486 else
487 {
488 // Will fit in remaining buffer, so let's copy it over
489 memcpy(current_dest, temp, retval);
490 }
491
492 if (retval > 0 && current_dest[retval - 1] == '\0')
493 {
494 // Reached null terminator, so break out, but don't count that last terminating byte
495 current_src = nullptr;
496 current_dest += retval - 1;
497 break;
498 }
499
500 ++current_src;
501 if (retval > 0)
502 {
503 start_of_code_point = current_src;
504 }
505
506 len -= retval;
507 current_dest += retval;
508 }
509 *src = current_src;
510 return current_dest - dst;
511 }
512 else
513 {
514 size_t total_count = 0;
515 for (;;)
516 {
517 const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd);
519 {
520 return retval;
521 }
522 else if (retval > 0 && buf[retval - 1] == '\0')
523 {
524 // Hit null terminator. Don't count it in the return value.
525 total_count += retval - 1;
526 break;
527 }
528 total_count += retval;
529 ++current_src;
530 }
531 return total_count;
532 }
533}
GLenum src
Definition: glext.h:6340
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLenum GLenum dst
Definition: glext.h:6340
GLenum GLsizei len
Definition: glext.h:6722
#define MB_LEN_MAX
Definition: stdlib.h:19
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
size_t __cdecl __c16rtomb_utf8(char *s, char16_t c16, mbstate_t *ps, __crt_cached_ptd_host &ptd)
Definition: c16rtomb.cpp:54
size_t const wchar_t const mbstate_t *const _Inout_ __crt_cached_ptd_host & ptd
Definition: wcrtomb.cpp:57

Referenced by __mbsrtowcs_utf8(), _wcsrtombs_internal(), and if().

◆ reset_and_return()

size_t __crt_mbstring::reset_and_return ( size_t  retval,
mbstate_t ps 
)

◆ return_illegal_sequence()

size_t __crt_mbstring::return_illegal_sequence ( mbstate_t ps,
__crt_cached_ptd_host &  ptd 
)

Definition at line 15 of file common_utf8.cpp.

16 {
17 *ps = {};
18 ptd.get_errno().set(EILSEQ);
19 return INVALID;
20 }
#define EILSEQ
Definition: errno.h:109

Referenced by __c16rtomb_utf8(), __c32rtomb_utf8(), __mbrtoc16_utf8(), and __mbrtoc32_utf8().

Variable Documentation

◆ INCOMPLETE

constexpr size_t __crt_mbstring::INCOMPLETE = static_cast<size_t>(-2)
constexpr

Definition at line 148 of file corecrt_internal_mbstring.h.

Referenced by __mbrtoc16_utf8(), and __mbrtoc32_utf8().

◆ INVALID

constexpr size_t __crt_mbstring::INVALID = static_cast<size_t>(-1)
constexpr