ReactOS 0.4.15-dev-7674-gc0b4db1
sortkey.c
Go to the documentation of this file.
1/*
2 * Unicode sort key generation
3 *
4 * Copyright 2003 Dmitry Timoshkov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20#include "wine/unicode.h"
21
22#ifdef __REACTOS__
23#define get_char_typeW(x) iswctype((x) >> 8, (x) & 0xFF)
24#endif
25extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen );
26extern const unsigned int collation_table[];
27
28/*
29 * flags - normalization NORM_* flags
30 *
31 * FIXME: 'variable' flag not handled
32 */
33int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
34{
35 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
36 int key_len[4];
37 char *key_ptr[4];
38 const WCHAR *src_save = src;
39 int srclen_save = srclen;
40
41 key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
42 for (; srclen; srclen--, src++)
43 {
44 unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
45 dummy[0] = *src;
46 if (decomposed_len)
47 {
48 for (i = 0; i < decomposed_len; i++)
49 {
50 WCHAR wch = dummy[i];
51 unsigned int ce;
52
53 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
54 * and skips white space and punctuation characters for
55 * NORM_IGNORESYMBOLS.
56 */
58 continue;
59
60 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
61
62 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
63 if (ce != (unsigned int)-1)
64 {
65 if (ce >> 16) key_len[0] += 2;
66 if ((ce >> 8) & 0xff) key_len[1]++;
67 if ((ce >> 4) & 0x0f) key_len[2]++;
68 if (ce & 1)
69 {
70 if (wch >> 8) key_len[3]++;
71 key_len[3]++;
72 }
73 }
74 else
75 {
76 key_len[0] += 2;
77 if (wch >> 8) key_len[0]++;
78 if (wch & 0xff) key_len[0]++;
79 }
80 }
81 }
82 }
83
84 if (!dstlen) /* compute length */
85 /* 4 * '\1' + 1 * '\0' + key length */
86 return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;
87
88 if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
89 return 0; /* overflow */
90
91 src = src_save;
92 srclen = srclen_save;
93
94 key_ptr[0] = dst;
95 key_ptr[1] = key_ptr[0] + key_len[0] + 1;
96 key_ptr[2] = key_ptr[1] + key_len[1] + 1;
97 key_ptr[3] = key_ptr[2] + key_len[2] + 1;
98
99 for (; srclen; srclen--, src++)
100 {
101 unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
102 dummy[0] = *src;
103 if (decomposed_len)
104 {
105 for (i = 0; i < decomposed_len; i++)
106 {
107 WCHAR wch = dummy[i];
108 unsigned int ce;
109
110 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
111 * and skips white space and punctuation characters for
112 * NORM_IGNORESYMBOLS.
113 */
115 continue;
116
117 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
118
119 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
120 if (ce != (unsigned int)-1)
121 {
122 WCHAR key;
123 if ((key = ce >> 16))
124 {
125 *key_ptr[0]++ = key >> 8;
126 *key_ptr[0]++ = key & 0xff;
127 }
128 /* make key 1 start from 2 */
129 if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
130 /* make key 2 start from 2 */
131 if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
132 /* key 3 is always a character code */
133 if (ce & 1)
134 {
135 if (wch >> 8) *key_ptr[3]++ = wch >> 8;
136 if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
137 }
138 }
139 else
140 {
141 *key_ptr[0]++ = 0xff;
142 *key_ptr[0]++ = 0xfe;
143 if (wch >> 8) *key_ptr[0]++ = wch >> 8;
144 if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
145 }
146 }
147 }
148 }
149
150 *key_ptr[0] = '\1';
151 *key_ptr[1] = '\1';
152 *key_ptr[2] = '\1';
153 *key_ptr[3]++ = '\1';
154 *key_ptr[3] = 0;
155
156 return key_ptr[3] - dst;
157}
158
159static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,
160 const WCHAR *str2, int len2)
161{
162 unsigned int ce1, ce2;
163 int ret;
164
165 /* 32-bit collation element table format:
166 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
167 * case weight - high 4 bit of low 8 bit.
168 */
169 while (len1 > 0 && len2 > 0)
170 {
172 {
173 int skip = 0;
174 /* FIXME: not tested */
175 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
176 {
177 str1++;
178 len1--;
179 skip = 1;
180 }
181 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
182 {
183 str2++;
184 len2--;
185 skip = 1;
186 }
187 if (skip) continue;
188 }
189
190 /* hyphen and apostrophe are treated differently depending on
191 * whether SORT_STRINGSORT specified or not
192 */
193 if (!(flags & SORT_STRINGSORT))
194 {
195 if (*str1 == '-' || *str1 == '\'')
196 {
197 if (*str2 != '-' && *str2 != '\'')
198 {
199 str1++;
200 len1--;
201 continue;
202 }
203 }
204 else if (*str2 == '-' || *str2 == '\'')
205 {
206 str2++;
207 len2--;
208 continue;
209 }
210 }
211
212 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
213 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
214
215 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
216 ret = (ce1 >> 16) - (ce2 >> 16);
217 else
218 ret = *str1 - *str2;
219
220 if (ret) return ret;
221
222 str1++;
223 str2++;
224 len1--;
225 len2--;
226 }
227 while (len1 && !*str1)
228 {
229 str1++;
230 len1--;
231 }
232 while (len2 && !*str2)
233 {
234 str2++;
235 len2--;
236 }
237 return len1 - len2;
238}
239
240static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,
241 const WCHAR *str2, int len2)
242{
243 unsigned int ce1, ce2;
244 int ret;
245
246 /* 32-bit collation element table format:
247 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
248 * case weight - high 4 bit of low 8 bit.
249 */
250 while (len1 > 0 && len2 > 0)
251 {
253 {
254 int skip = 0;
255 /* FIXME: not tested */
256 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
257 {
258 str1++;
259 len1--;
260 skip = 1;
261 }
262 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
263 {
264 str2++;
265 len2--;
266 skip = 1;
267 }
268 if (skip) continue;
269 }
270
271 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
272 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
273
274 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
275 ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);
276 else
277 ret = *str1 - *str2;
278
279 if (ret) return ret;
280
281 str1++;
282 str2++;
283 len1--;
284 len2--;
285 }
286 while (len1 && !*str1)
287 {
288 str1++;
289 len1--;
290 }
291 while (len2 && !*str2)
292 {
293 str2++;
294 len2--;
295 }
296 return len1 - len2;
297}
298
299static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
300 const WCHAR *str2, int len2)
301{
302 unsigned int ce1, ce2;
303 int ret;
304
305 /* 32-bit collation element table format:
306 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
307 * case weight - high 4 bit of low 8 bit.
308 */
309 while (len1 > 0 && len2 > 0)
310 {
312 {
313 int skip = 0;
314 /* FIXME: not tested */
315 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
316 {
317 str1++;
318 len1--;
319 skip = 1;
320 }
321 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
322 {
323 str2++;
324 len2--;
325 skip = 1;
326 }
327 if (skip) continue;
328 }
329
330 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
331 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
332
333 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
334 ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);
335 else
336 ret = *str1 - *str2;
337
338 if (ret) return ret;
339
340 str1++;
341 str2++;
342 len1--;
343 len2--;
344 }
345 while (len1 && !*str1)
346 {
347 str1++;
348 len1--;
349 }
350 while (len2 && !*str2)
351 {
352 str2++;
353 len2--;
354 }
355 return len1 - len2;
356}
357
358int wine_compare_string(int flags, const WCHAR *str1, int len1,
359 const WCHAR *str2, int len2)
360{
361 int ret;
362
363 ret = compare_unicode_weights(flags, str1, len1, str2, len2);
364 if (!ret)
365 {
366 if (!(flags & NORM_IGNORENONSPACE))
367 ret = compare_diacritic_weights(flags, str1, len1, str2, len2);
368 if (!ret && !(flags & NORM_IGNORECASE))
369 ret = compare_case_weights(flags, str1, len1, str2, len2);
370 }
371 return ret;
372}
#define skip(...)
Definition: atltest.h:64
int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
Definition: sortkey.c:33
unsigned int wine_decompose(WCHAR ch, WCHAR *dst, unsigned int dstlen)
static int compare_case_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:299
int wine_compare_string(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:358
static int compare_diacritic_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:240
const unsigned int collation_table[]
Definition: collation.c:5
static int compare_unicode_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:159
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31
GLenum src
Definition: glext.h:6340
GLenum GLenum dst
Definition: glext.h:6340
GLbitfield flags
Definition: glext.h:7161
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define C1_PUNCT
Definition: unicode.h:35
#define C1_SPACE
Definition: unicode.h:34
WINE_UNICODE_INLINE unsigned short get_char_typeW(WCHAR ch)
Definition: unicode.h:149
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:52
static DWORD dstlen
Definition: directory.c:51
#define tolowerW(n)
Definition: unicode.h:44
Definition: copy.c:22
int ret
#define NORM_IGNORECASE
Definition: winnls.h:176
#define SORT_STRINGSORT
Definition: winnls.h:183
#define NORM_IGNORENONSPACE
Definition: winnls.h:178
#define NORM_IGNORESYMBOLS
Definition: winnls.h:179
__wchar_t WCHAR
Definition: xmlstorage.h:180