ReactOS 0.4.15-dev-8332-ga943bb4
utf8.c File Reference
#include <rtl_vista.h>
#include <debug.h>
Include dependency graph for utf8.c:

Go to the source code of this file.

Macros

#define NDEBUG
 

Functions

NTSTATUS NTAPI RtlUnicodeToUTF8N (CHAR *utf8_dest, ULONG utf8_bytes_max, ULONG *utf8_bytes_written, const WCHAR *uni_src, ULONG uni_bytes)
 
NTSTATUS NTAPI RtlUTF8ToUnicodeN (WCHAR *uni_dest, ULONG uni_bytes_max, ULONG *uni_bytes_written, const CHAR *utf8_src, ULONG utf8_bytes)
 

Macro Definition Documentation

◆ NDEBUG

#define NDEBUG

Definition at line 12 of file utf8.c.

Function Documentation

◆ RtlUnicodeToUTF8N()

NTSTATUS NTAPI RtlUnicodeToUTF8N ( CHAR utf8_dest,
ULONG  utf8_bytes_max,
ULONG utf8_bytes_written,
const WCHAR uni_src,
ULONG  uni_bytes 
)

Definition at line 20 of file utf8.c.

23{
25 ULONG i;
26 ULONG written;
27 ULONG ch;
28 BYTE utf8_ch[4];
29 ULONG utf8_ch_len;
30
31 if (!uni_src)
33 if (!utf8_bytes_written)
35 if (utf8_dest && uni_bytes % sizeof(WCHAR))
37
38 written = 0;
40
41 for (i = 0; i < uni_bytes / sizeof(WCHAR); i++)
42 {
43 /* decode UTF-16 into ch */
44 ch = uni_src[i];
45 if (ch >= 0xdc00 && ch <= 0xdfff)
46 {
47 ch = 0xfffd;
49 }
50 else if (ch >= 0xd800 && ch <= 0xdbff)
51 {
52 if (i + 1 < uni_bytes / sizeof(WCHAR))
53 {
54 ch -= 0xd800;
55 ch <<= 10;
56 if (uni_src[i + 1] >= 0xdc00 && uni_src[i + 1] <= 0xdfff)
57 {
58 ch |= uni_src[i + 1] - 0xdc00;
59 ch += 0x010000;
60 i++;
61 }
62 else
63 {
64 ch = 0xfffd;
66 }
67 }
68 else
69 {
70 ch = 0xfffd;
72 }
73 }
74
75 /* encode ch as UTF-8 */
76 ASSERT(ch <= 0x10ffff);
77 if (ch < 0x80)
78 {
79 utf8_ch[0] = ch & 0x7f;
80 utf8_ch_len = 1;
81 }
82 else if (ch < 0x800)
83 {
84 utf8_ch[0] = 0xc0 | (ch >> 6 & 0x1f);
85 utf8_ch[1] = 0x80 | (ch >> 0 & 0x3f);
86 utf8_ch_len = 2;
87 }
88 else if (ch < 0x10000)
89 {
90 utf8_ch[0] = 0xe0 | (ch >> 12 & 0x0f);
91 utf8_ch[1] = 0x80 | (ch >> 6 & 0x3f);
92 utf8_ch[2] = 0x80 | (ch >> 0 & 0x3f);
93 utf8_ch_len = 3;
94 }
95 else if (ch < 0x200000)
96 {
97 utf8_ch[0] = 0xf0 | (ch >> 18 & 0x07);
98 utf8_ch[1] = 0x80 | (ch >> 12 & 0x3f);
99 utf8_ch[2] = 0x80 | (ch >> 6 & 0x3f);
100 utf8_ch[3] = 0x80 | (ch >> 0 & 0x3f);
101 utf8_ch_len = 4;
102 }
103
104 if (!utf8_dest)
105 {
106 written += utf8_ch_len;
107 continue;
108 }
109
110 if (utf8_bytes_max >= utf8_ch_len)
111 {
112 memcpy(utf8_dest, utf8_ch, utf8_ch_len);
113 utf8_dest += utf8_ch_len;
114 utf8_bytes_max -= utf8_ch_len;
115 written += utf8_ch_len;
116 }
117 else
118 {
119 utf8_bytes_max = 0;
121 }
122 }
123
124 *utf8_bytes_written = written;
125 return status;
126}
LONG NTSTATUS
Definition: precomp.h:26
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define ASSERT(a)
Definition: mode.c:44
#define STATUS_INVALID_PARAMETER_4
Definition: ntstatus.h:478
#define STATUS_SOME_NOT_MAPPED
Definition: ntstatus.h:86
#define STATUS_INVALID_PARAMETER_5
Definition: ntstatus.h:479
#define STATUS_SUCCESS
Definition: shellext.h:65
#define STATUS_BUFFER_TOO_SMALL
Definition: shellext.h:69
Definition: ps.c:97
uint32_t ULONG
Definition: typedefs.h:59
#define STATUS_INVALID_PARAMETER
Definition: udferr_usr.h:135
__wchar_t WCHAR
Definition: xmlstorage.h:180
unsigned char BYTE
Definition: xxhash.c:193

Referenced by length_as_utf8(), and marshall_unicode_as_utf8().

◆ RtlUTF8ToUnicodeN()

NTSTATUS NTAPI RtlUTF8ToUnicodeN ( WCHAR uni_dest,
ULONG  uni_bytes_max,
ULONG uni_bytes_written,
const CHAR utf8_src,
ULONG  utf8_bytes 
)

Definition at line 132 of file utf8.c.

135{
137 ULONG i, j;
138 ULONG written;
139 ULONG ch;
140 ULONG utf8_trail_bytes;
141 WCHAR utf16_ch[3];
142 ULONG utf16_ch_len;
143
144 if (!utf8_src)
146 if (!uni_bytes_written)
148
149 written = 0;
151
152 for (i = 0; i < utf8_bytes; i++)
153 {
154 /* read UTF-8 lead byte */
155 ch = (BYTE)utf8_src[i];
156 utf8_trail_bytes = 0;
157 if (ch >= 0xf5)
158 {
159 ch = 0xfffd;
161 }
162 else if (ch >= 0xf0)
163 {
164 ch &= 0x07;
165 utf8_trail_bytes = 3;
166 }
167 else if (ch >= 0xe0)
168 {
169 ch &= 0x0f;
170 utf8_trail_bytes = 2;
171 }
172 else if (ch >= 0xc2)
173 {
174 ch &= 0x1f;
175 utf8_trail_bytes = 1;
176 }
177 else if (ch >= 0x80)
178 {
179 /* overlong or trail byte */
180 ch = 0xfffd;
182 }
183
184 /* read UTF-8 trail bytes */
185 if (i + utf8_trail_bytes < utf8_bytes)
186 {
187 for (j = 0; j < utf8_trail_bytes; j++)
188 {
189 if ((utf8_src[i + 1] & 0xc0) == 0x80)
190 {
191 ch <<= 6;
192 ch |= utf8_src[i + 1] & 0x3f;
193 i++;
194 }
195 else
196 {
197 ch = 0xfffd;
198 utf8_trail_bytes = 0;
200 break;
201 }
202 }
203 }
204 else
205 {
206 ch = 0xfffd;
207 utf8_trail_bytes = 0;
209 i = utf8_bytes;
210 }
211
212 /* encode ch as UTF-16 */
213 if ((ch > 0x10ffff) ||
214 (ch >= 0xd800 && ch <= 0xdfff) ||
215 (utf8_trail_bytes == 2 && ch < 0x00800) ||
216 (utf8_trail_bytes == 3 && ch < 0x10000))
217 {
218 /* invalid codepoint or overlong encoding */
219 utf16_ch[0] = 0xfffd;
220 utf16_ch[1] = 0xfffd;
221 utf16_ch[2] = 0xfffd;
222 utf16_ch_len = utf8_trail_bytes;
224 }
225 else if (ch >= 0x10000)
226 {
227 /* surrogate pair */
228 ch -= 0x010000;
229 utf16_ch[0] = 0xd800 + (ch >> 10 & 0x3ff);
230 utf16_ch[1] = 0xdc00 + (ch >> 0 & 0x3ff);
231 utf16_ch_len = 2;
232 }
233 else
234 {
235 /* single unit */
236 utf16_ch[0] = ch;
237 utf16_ch_len = 1;
238 }
239
240 if (!uni_dest)
241 {
242 written += utf16_ch_len;
243 continue;
244 }
245
246 for (j = 0; j < utf16_ch_len; j++)
247 {
248 if (uni_bytes_max >= sizeof(WCHAR))
249 {
250 *uni_dest++ = utf16_ch[j];
251 uni_bytes_max -= sizeof(WCHAR);
252 written++;
253 }
254 else
255 {
256 uni_bytes_max = 0;
258 }
259 }
260 }
261
262 *uni_bytes_written = written * sizeof(WCHAR);
263 return status;
264}
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250