ReactOS 0.4.16-dev-329-g9223134
utf8.c
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS Kernel - Vista+ APIs
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * PURPOSE: Rtl functions of Vista+
5 * COPYRIGHT: 2016 Thomas Faber <thomas.faber@reactos.org>
6 */
7
8/* INCLUDES ******************************************************************/
9
10#include <rtl_vista.h>
11
12#define NDEBUG
13#include <debug.h>
14
15/* FUNCTIONS *****************************************************************/
16
17/******************************************************************************
18 * RtlUnicodeToUTF8N [NTDLL.@]
19 */
20NTSTATUS NTAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max,
21 ULONG *utf8_bytes_written,
22 const WCHAR *uni_src, ULONG uni_bytes)
23{
25 ULONG i;
26 ULONG written;
27 ULONG ch;
28 BYTE utf8_ch[4];
29 ULONG utf8_ch_len;
30
31 if (!uni_src)
33 if (!utf8_bytes_written)
35 if (utf8_dest && uni_bytes % sizeof(WCHAR))
37
38 written = 0;
40
41 for (i = 0; i < uni_bytes / sizeof(WCHAR); i++)
42 {
43 /* decode UTF-16 into ch */
44 ch = uni_src[i];
45 if (ch >= 0xdc00 && ch <= 0xdfff)
46 {
47 ch = 0xfffd;
49 }
50 else if (ch >= 0xd800 && ch <= 0xdbff)
51 {
52 if (i + 1 < uni_bytes / sizeof(WCHAR))
53 {
54 ch -= 0xd800;
55 ch <<= 10;
56 if (uni_src[i + 1] >= 0xdc00 && uni_src[i + 1] <= 0xdfff)
57 {
58 ch |= uni_src[i + 1] - 0xdc00;
59 ch += 0x010000;
60 i++;
61 }
62 else
63 {
64 ch = 0xfffd;
66 }
67 }
68 else
69 {
70 ch = 0xfffd;
72 }
73 }
74
75 /* encode ch as UTF-8 */
76 ASSERT(ch <= 0x10ffff);
77 if (ch < 0x80)
78 {
79 utf8_ch[0] = ch & 0x7f;
80 utf8_ch_len = 1;
81 }
82 else if (ch < 0x800)
83 {
84 utf8_ch[0] = 0xc0 | (ch >> 6 & 0x1f);
85 utf8_ch[1] = 0x80 | (ch >> 0 & 0x3f);
86 utf8_ch_len = 2;
87 }
88 else if (ch < 0x10000)
89 {
90 utf8_ch[0] = 0xe0 | (ch >> 12 & 0x0f);
91 utf8_ch[1] = 0x80 | (ch >> 6 & 0x3f);
92 utf8_ch[2] = 0x80 | (ch >> 0 & 0x3f);
93 utf8_ch_len = 3;
94 }
95 else if (ch < 0x200000)
96 {
97 utf8_ch[0] = 0xf0 | (ch >> 18 & 0x07);
98 utf8_ch[1] = 0x80 | (ch >> 12 & 0x3f);
99 utf8_ch[2] = 0x80 | (ch >> 6 & 0x3f);
100 utf8_ch[3] = 0x80 | (ch >> 0 & 0x3f);
101 utf8_ch_len = 4;
102 }
103
104 if (!utf8_dest)
105 {
106 written += utf8_ch_len;
107 continue;
108 }
109
110 if (utf8_bytes_max >= utf8_ch_len)
111 {
112 memcpy(utf8_dest, utf8_ch, utf8_ch_len);
113 utf8_dest += utf8_ch_len;
114 utf8_bytes_max -= utf8_ch_len;
115 written += utf8_ch_len;
116 }
117 else
118 {
119 utf8_bytes_max = 0;
121 }
122 }
123
124 *utf8_bytes_written = written;
125 return status;
126}
127
128
129/******************************************************************************
130 * RtlUTF8ToUnicodeN [NTDLL.@]
131 */
132NTSTATUS NTAPI RtlUTF8ToUnicodeN(WCHAR *uni_dest, ULONG uni_bytes_max,
133 ULONG *uni_bytes_written,
134 const CHAR *utf8_src, ULONG utf8_bytes)
135{
137 ULONG i, j;
138 ULONG written;
139 ULONG ch;
140 ULONG utf8_trail_bytes;
141 WCHAR utf16_ch[3];
142 ULONG utf16_ch_len;
143
144 if (!utf8_src)
146 if (!uni_bytes_written)
148
149 written = 0;
151
152 for (i = 0; i < utf8_bytes; i++)
153 {
154 /* read UTF-8 lead byte */
155 ch = (BYTE)utf8_src[i];
156 utf8_trail_bytes = 0;
157 if (ch >= 0xf5)
158 {
159 ch = 0xfffd;
161 }
162 else if (ch >= 0xf0)
163 {
164 ch &= 0x07;
165 utf8_trail_bytes = 3;
166 }
167 else if (ch >= 0xe0)
168 {
169 ch &= 0x0f;
170 utf8_trail_bytes = 2;
171 }
172 else if (ch >= 0xc2)
173 {
174 ch &= 0x1f;
175 utf8_trail_bytes = 1;
176 }
177 else if (ch >= 0x80)
178 {
179 /* overlong or trail byte */
180 ch = 0xfffd;
182 }
183
184 /* read UTF-8 trail bytes */
185 if (i + utf8_trail_bytes < utf8_bytes)
186 {
187 for (j = 0; j < utf8_trail_bytes; j++)
188 {
189 if ((utf8_src[i + 1] & 0xc0) == 0x80)
190 {
191 ch <<= 6;
192 ch |= utf8_src[i + 1] & 0x3f;
193 i++;
194 }
195 else
196 {
197 ch = 0xfffd;
198 utf8_trail_bytes = 0;
200 break;
201 }
202 }
203 }
204 else
205 {
206 ch = 0xfffd;
207 utf8_trail_bytes = 0;
209 i = utf8_bytes;
210 }
211
212 /* encode ch as UTF-16 */
213 if ((ch > 0x10ffff) ||
214 (ch >= 0xd800 && ch <= 0xdfff) ||
215 (utf8_trail_bytes == 2 && ch < 0x00800) ||
216 (utf8_trail_bytes == 3 && ch < 0x10000))
217 {
218 /* invalid codepoint or overlong encoding */
219 utf16_ch[0] = 0xfffd;
220 utf16_ch[1] = 0xfffd;
221 utf16_ch[2] = 0xfffd;
222 utf16_ch_len = utf8_trail_bytes;
224 }
225 else if (ch >= 0x10000)
226 {
227 /* surrogate pair */
228 ch -= 0x010000;
229 utf16_ch[0] = 0xd800 + (ch >> 10 & 0x3ff);
230 utf16_ch[1] = 0xdc00 + (ch >> 0 & 0x3ff);
231 utf16_ch_len = 2;
232 }
233 else
234 {
235 /* single unit */
236 utf16_ch[0] = ch;
237 utf16_ch_len = 1;
238 }
239
240 if (!uni_dest)
241 {
242 written += utf16_ch_len;
243 continue;
244 }
245
246 for (j = 0; j < utf16_ch_len; j++)
247 {
248 if (uni_bytes_max >= sizeof(WCHAR))
249 {
250 *uni_dest++ = utf16_ch[j];
251 uni_bytes_max -= sizeof(WCHAR);
252 written++;
253 }
254 else
255 {
256 uni_bytes_max = 0;
258 }
259 }
260 }
261
262 *uni_bytes_written = written * sizeof(WCHAR);
263 return status;
264}
LONG NTSTATUS
Definition: precomp.h:26
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
NTSTATUS NTAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max, ULONG *utf8_bytes_written, const WCHAR *uni_src, ULONG uni_bytes)
Definition: utf8.c:20
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define ASSERT(a)
Definition: mode.c:44
#define STATUS_INVALID_PARAMETER_4
Definition: ntstatus.h:478
#define STATUS_SOME_NOT_MAPPED
Definition: ntstatus.h:86
#define STATUS_INVALID_PARAMETER_5
Definition: ntstatus.h:479
#define RtlUTF8ToUnicodeN
Definition: reactos.cpp:12
#define STATUS_SUCCESS
Definition: shellext.h:65
#define STATUS_BUFFER_TOO_SMALL
Definition: shellext.h:69
Definition: ps.c:97
#define NTAPI
Definition: typedefs.h:36
uint32_t ULONG
Definition: typedefs.h:59
#define STATUS_INVALID_PARAMETER
Definition: udferr_usr.h:135
__wchar_t WCHAR
Definition: xmlstorage.h:180
char CHAR
Definition: xmlstorage.h:175
unsigned char BYTE
Definition: xxhash.c:193