ReactOS 0.4.15-dev-7958-gcd0bb1a
sortkey.c
Go to the documentation of this file.
1/*
2 * Unicode sort key generation
3 *
4 * Copyright 2003 Dmitry Timoshkov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20#include "wine/unicode.h"
21
22extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen );
23extern const unsigned int collation_table[];
24
25/*
26 * flags - normalization NORM_* flags
27 *
28 * FIXME: 'variable' flag not handled
29 */
30int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
31{
32 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
33 int key_len[4];
34 char *key_ptr[4];
35 const WCHAR *src_save = src;
36 int srclen_save = srclen;
37
38 key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
39 for (; srclen; srclen--, src++)
40 {
41 unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
42 dummy[0] = *src;
43 if (decomposed_len)
44 {
45 for (i = 0; i < decomposed_len; i++)
46 {
47 WCHAR wch = dummy[i];
48 unsigned int ce;
49
50 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
51 * and skips white space and punctuation characters for
52 * NORM_IGNORESYMBOLS.
53 */
55 continue;
56
57 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
58
59 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
60 if (ce != (unsigned int)-1)
61 {
62 if (ce >> 16) key_len[0] += 2;
63 if ((ce >> 8) & 0xff) key_len[1]++;
64 if ((ce >> 4) & 0x0f) key_len[2]++;
65 if (ce & 1)
66 {
67 if (wch >> 8) key_len[3]++;
68 key_len[3]++;
69 }
70 }
71 else
72 {
73 key_len[0] += 2;
74 if (wch >> 8) key_len[0]++;
75 if (wch & 0xff) key_len[0]++;
76 }
77 }
78 }
79 }
80
81 if (!dstlen) /* compute length */
82 /* 4 * '\1' + key length */
83 return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
84
85 if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
86 return 0; /* overflow */
87
88 src = src_save;
89 srclen = srclen_save;
90
91 key_ptr[0] = dst;
92 key_ptr[1] = key_ptr[0] + key_len[0] + 1;
93 key_ptr[2] = key_ptr[1] + key_len[1] + 1;
94 key_ptr[3] = key_ptr[2] + key_len[2] + 1;
95
96 for (; srclen; srclen--, src++)
97 {
98 unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
99 dummy[0] = *src;
100 if (decomposed_len)
101 {
102 for (i = 0; i < decomposed_len; i++)
103 {
104 WCHAR wch = dummy[i];
105 unsigned int ce;
106
107 /* tests show that win2k just ignores NORM_IGNORENONSPACE,
108 * and skips white space and punctuation characters for
109 * NORM_IGNORESYMBOLS.
110 */
112 continue;
113
114 if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
115
116 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
117 if (ce != (unsigned int)-1)
118 {
119 WCHAR key;
120 if ((key = ce >> 16))
121 {
122 *key_ptr[0]++ = key >> 8;
123 *key_ptr[0]++ = key & 0xff;
124 }
125 /* make key 1 start from 2 */
126 if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
127 /* make key 2 start from 2 */
128 if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
129 /* key 3 is always a character code */
130 if (ce & 1)
131 {
132 if (wch >> 8) *key_ptr[3]++ = wch >> 8;
133 if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
134 }
135 }
136 else
137 {
138 *key_ptr[0]++ = 0xff;
139 *key_ptr[0]++ = 0xfe;
140 if (wch >> 8) *key_ptr[0]++ = wch >> 8;
141 if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
142 }
143 }
144 }
145 }
146
147 *key_ptr[0] = '\1';
148 *key_ptr[1] = '\1';
149 *key_ptr[2] = '\1';
150 *key_ptr[3]++ = '\1';
151 *key_ptr[3] = 0;
152
153 return key_ptr[3] - dst;
154}
155
157{
162
163static unsigned int get_weight(WCHAR ch, enum weight type)
164{
165 unsigned int ret;
166
167 ret = collation_table[collation_table[ch >> 8] + (ch & 0xff)];
168 if (ret == (unsigned int)-1)
169 return ch;
170
171 switch(type)
172 {
173 case UNICODE_WEIGHT:
174 return ret >> 16;
175 case DIACRITIC_WEIGHT:
176 return (ret >> 8) & 0xff;
177 case CASE_WEIGHT:
178 default:
179 return (ret >> 4) & 0x0f;
180 }
181}
182
183static void inc_str_pos(const WCHAR **str, int *len, int *dpos, int *dlen)
184{
185 (*dpos)++;
186 if (*dpos == *dlen)
187 {
188 *dpos = *dlen = 0;
189 (*str)++;
190 (*len)--;
191 }
192}
193
194static inline int compare_weights(int flags, const WCHAR *str1, int len1,
195 const WCHAR *str2, int len2, enum weight type)
196{
197 int dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
198 WCHAR dstr1[4], dstr2[4];
199 unsigned int ce1, ce2;
200
201 /* 32-bit collation element table format:
202 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
203 * case weight - high 4 bit of low 8 bit.
204 */
205 while (len1 > 0 && len2 > 0)
206 {
207 if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
208 if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
209
211 {
212 int skip = 0;
213 /* FIXME: not tested */
214 if (get_char_typeW(dstr1[dpos1]) & (C1_PUNCT | C1_SPACE))
215 {
216 inc_str_pos(&str1, &len1, &dpos1, &dlen1);
217 skip = 1;
218 }
219 if (get_char_typeW(dstr2[dpos2]) & (C1_PUNCT | C1_SPACE))
220 {
221 inc_str_pos(&str2, &len2, &dpos2, &dlen2);
222 skip = 1;
223 }
224 if (skip) continue;
225 }
226
227 /* hyphen and apostrophe are treated differently depending on
228 * whether SORT_STRINGSORT specified or not
229 */
231 {
232 if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
233 {
234 if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
235 {
236 inc_str_pos(&str1, &len1, &dpos1, &dlen1);
237 continue;
238 }
239 }
240 else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
241 {
242 inc_str_pos(&str2, &len2, &dpos2, &dlen2);
243 continue;
244 }
245 }
246
247 ce1 = get_weight(dstr1[dpos1], type);
248 if (!ce1)
249 {
250 inc_str_pos(&str1, &len1, &dpos1, &dlen1);
251 continue;
252 }
253 ce2 = get_weight(dstr2[dpos2], type);
254 if (!ce2)
255 {
256 inc_str_pos(&str2, &len2, &dpos2, &dlen2);
257 continue;
258 }
259
260 if (ce1 - ce2) return ce1 - ce2;
261
262 inc_str_pos(&str1, &len1, &dpos1, &dlen1);
263 inc_str_pos(&str2, &len2, &dpos2, &dlen2);
264 }
265 while (len1)
266 {
267 if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
268
269 ce1 = get_weight(dstr1[dpos1], type);
270 if (ce1) break;
271 inc_str_pos(&str1, &len1, &dpos1, &dlen1);
272 }
273 while (len2)
274 {
275 if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
276
277 ce2 = get_weight(dstr2[dpos2], type);
278 if (ce2) break;
279 inc_str_pos(&str2, &len2, &dpos2, &dlen2);
280 }
281 return len1 - len2;
282}
283
284int wine_compare_string(int flags, const WCHAR *str1, int len1,
285 const WCHAR *str2, int len2)
286{
287 int ret;
288
289 ret = compare_weights(flags, str1, len1, str2, len2, UNICODE_WEIGHT);
290 if (!ret)
291 {
292 if (!(flags & NORM_IGNORENONSPACE))
293 ret = compare_weights(flags, str1, len1, str2, len2, DIACRITIC_WEIGHT);
294 if (!ret && !(flags & NORM_IGNORECASE))
295 ret = compare_weights(flags, str1, len1, str2, len2, CASE_WEIGHT);
296 }
297 return ret;
298}
#define skip(...)
Definition: atltest.h:64
int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
Definition: sortkey.c:33
unsigned int wine_decompose(WCHAR ch, WCHAR *dst, unsigned int dstlen)
int wine_compare_string(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:358
const unsigned int collation_table[]
Definition: collation.c:5
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLenum src
Definition: glext.h:6340
GLenum GLenum dst
Definition: glext.h:6340
GLbitfield flags
Definition: glext.h:7161
GLenum GLsizei len
Definition: glext.h:6722
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define C1_PUNCT
Definition: unicode.h:35
#define C1_SPACE
Definition: unicode.h:34
WINE_UNICODE_INLINE unsigned short get_char_typeW(WCHAR ch)
Definition: unicode.h:149
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:52
static DWORD dstlen
Definition: directory.c:51
#define tolowerW(n)
Definition: unicode.h:44
const WCHAR * str
weight
Definition: sortkey.c:157
@ UNICODE_WEIGHT
Definition: sortkey.c:158
@ DIACRITIC_WEIGHT
Definition: sortkey.c:159
@ CASE_WEIGHT
Definition: sortkey.c:160
static int compare_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2, enum weight type)
Definition: sortkey.c:194
static void inc_str_pos(const WCHAR **str, int *len, int *dpos, int *dlen)
Definition: sortkey.c:183
static unsigned int get_weight(WCHAR ch, enum weight type)
Definition: sortkey.c:163
Definition: copy.c:22
int ret
#define NORM_IGNORECASE
Definition: winnls.h:176
#define SORT_STRINGSORT
Definition: winnls.h:183
#define NORM_IGNORENONSPACE
Definition: winnls.h:178
#define NORM_IGNORESYMBOLS
Definition: winnls.h:179
__wchar_t WCHAR
Definition: xmlstorage.h:180