ReactOS  0.4.12-dev-934-g9a4676f
sortkey.c
Go to the documentation of this file.
1 /*
2  * Unicode sort key generation
3  *
4  * Copyright 2003 Dmitry Timoshkov
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 #include "wine/unicode.h"
21 
22 #ifdef __REACTOS__
23 #define get_char_typeW(x) iswctype((x) >> 8, (x) & 0xFF)
24 #endif
25 extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen );
26 extern const unsigned int collation_table[];
27 
28 /*
29  * flags - normalization NORM_* flags
30  *
31  * FIXME: 'variable' flag not handled
32  */
33 int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
34 {
35  WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
36  int key_len[4];
37  char *key_ptr[4];
38  const WCHAR *src_save = src;
39  int srclen_save = srclen;
40 
41  key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
42  for (; srclen; srclen--, src++)
43  {
44  unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
45  dummy[0] = *src;
46  if (decomposed_len)
47  {
48  for (i = 0; i < decomposed_len; i++)
49  {
50  WCHAR wch = dummy[i];
51  unsigned int ce;
52 
53  /* tests show that win2k just ignores NORM_IGNORENONSPACE,
54  * and skips white space and punctuation characters for
55  * NORM_IGNORESYMBOLS.
56  */
58  continue;
59 
60  if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
61 
62  ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
63  if (ce != (unsigned int)-1)
64  {
65  if (ce >> 16) key_len[0] += 2;
66  if ((ce >> 8) & 0xff) key_len[1]++;
67  if ((ce >> 4) & 0x0f) key_len[2]++;
68  if (ce & 1)
69  {
70  if (wch >> 8) key_len[3]++;
71  key_len[3]++;
72  }
73  }
74  else
75  {
76  key_len[0] += 2;
77  if (wch >> 8) key_len[0]++;
78  if (wch & 0xff) key_len[0]++;
79  }
80  }
81  }
82  }
83 
84  if (!dstlen) /* compute length */
85  /* 4 * '\1' + 1 * '\0' + key length */
86  return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;
87 
88  if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
89  return 0; /* overflow */
90 
91  src = src_save;
92  srclen = srclen_save;
93 
94  key_ptr[0] = dst;
95  key_ptr[1] = key_ptr[0] + key_len[0] + 1;
96  key_ptr[2] = key_ptr[1] + key_len[1] + 1;
97  key_ptr[3] = key_ptr[2] + key_len[2] + 1;
98 
99  for (; srclen; srclen--, src++)
100  {
101  unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
102  dummy[0] = *src;
103  if (decomposed_len)
104  {
105  for (i = 0; i < decomposed_len; i++)
106  {
107  WCHAR wch = dummy[i];
108  unsigned int ce;
109 
110  /* tests show that win2k just ignores NORM_IGNORENONSPACE,
111  * and skips white space and punctuation characters for
112  * NORM_IGNORESYMBOLS.
113  */
114  if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
115  continue;
116 
117  if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
118 
119  ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
120  if (ce != (unsigned int)-1)
121  {
122  WCHAR key;
123  if ((key = ce >> 16))
124  {
125  *key_ptr[0]++ = key >> 8;
126  *key_ptr[0]++ = key & 0xff;
127  }
128  /* make key 1 start from 2 */
129  if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
130  /* make key 2 start from 2 */
131  if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
132  /* key 3 is always a character code */
133  if (ce & 1)
134  {
135  if (wch >> 8) *key_ptr[3]++ = wch >> 8;
136  if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
137  }
138  }
139  else
140  {
141  *key_ptr[0]++ = 0xff;
142  *key_ptr[0]++ = 0xfe;
143  if (wch >> 8) *key_ptr[0]++ = wch >> 8;
144  if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
145  }
146  }
147  }
148  }
149 
150  *key_ptr[0] = '\1';
151  *key_ptr[1] = '\1';
152  *key_ptr[2] = '\1';
153  *key_ptr[3]++ = '\1';
154  *key_ptr[3] = 0;
155 
156  return key_ptr[3] - dst;
157 }
158 
159 static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,
160  const WCHAR *str2, int len2)
161 {
162  unsigned int ce1, ce2;
163  int ret;
164 
165  /* 32-bit collation element table format:
166  * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
167  * case weight - high 4 bit of low 8 bit.
168  */
169  while (len1 > 0 && len2 > 0)
170  {
172  {
173  int skip = 0;
174  /* FIXME: not tested */
175  if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
176  {
177  str1++;
178  len1--;
179  skip = 1;
180  }
181  if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
182  {
183  str2++;
184  len2--;
185  skip = 1;
186  }
187  if (skip) continue;
188  }
189 
190  /* hyphen and apostrophe are treated differently depending on
191  * whether SORT_STRINGSORT specified or not
192  */
193  if (!(flags & SORT_STRINGSORT))
194  {
195  if (*str1 == '-' || *str1 == '\'')
196  {
197  if (*str2 != '-' && *str2 != '\'')
198  {
199  str1++;
200  len1--;
201  continue;
202  }
203  }
204  else if (*str2 == '-' || *str2 == '\'')
205  {
206  str2++;
207  len2--;
208  continue;
209  }
210  }
211 
212  ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
213  ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
214 
215  if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
216  ret = (ce1 >> 16) - (ce2 >> 16);
217  else
218  ret = *str1 - *str2;
219 
220  if (ret) return ret;
221 
222  str1++;
223  str2++;
224  len1--;
225  len2--;
226  }
227  while (len1 && !*str1)
228  {
229  str1++;
230  len1--;
231  }
232  while (len2 && !*str2)
233  {
234  str2++;
235  len2--;
236  }
237  return len1 - len2;
238 }
239 
240 static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,
241  const WCHAR *str2, int len2)
242 {
243  unsigned int ce1, ce2;
244  int ret;
245 
246  /* 32-bit collation element table format:
247  * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
248  * case weight - high 4 bit of low 8 bit.
249  */
250  while (len1 > 0 && len2 > 0)
251  {
253  {
254  int skip = 0;
255  /* FIXME: not tested */
256  if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
257  {
258  str1++;
259  len1--;
260  skip = 1;
261  }
262  if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
263  {
264  str2++;
265  len2--;
266  skip = 1;
267  }
268  if (skip) continue;
269  }
270 
271  ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
272  ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
273 
274  if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
275  ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);
276  else
277  ret = *str1 - *str2;
278 
279  if (ret) return ret;
280 
281  str1++;
282  str2++;
283  len1--;
284  len2--;
285  }
286  while (len1 && !*str1)
287  {
288  str1++;
289  len1--;
290  }
291  while (len2 && !*str2)
292  {
293  str2++;
294  len2--;
295  }
296  return len1 - len2;
297 }
298 
299 static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
300  const WCHAR *str2, int len2)
301 {
302  unsigned int ce1, ce2;
303  int ret;
304 
305  /* 32-bit collation element table format:
306  * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
307  * case weight - high 4 bit of low 8 bit.
308  */
309  while (len1 > 0 && len2 > 0)
310  {
312  {
313  int skip = 0;
314  /* FIXME: not tested */
315  if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
316  {
317  str1++;
318  len1--;
319  skip = 1;
320  }
321  if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
322  {
323  str2++;
324  len2--;
325  skip = 1;
326  }
327  if (skip) continue;
328  }
329 
330  ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
331  ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
332 
333  if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
334  ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);
335  else
336  ret = *str1 - *str2;
337 
338  if (ret) return ret;
339 
340  str1++;
341  str2++;
342  len1--;
343  len2--;
344  }
345  while (len1 && !*str1)
346  {
347  str1++;
348  len1--;
349  }
350  while (len2 && !*str2)
351  {
352  str2++;
353  len2--;
354  }
355  return len1 - len2;
356 }
357 
358 int wine_compare_string(int flags, const WCHAR *str1, int len1,
359  const WCHAR *str2, int len2)
360 {
361  int ret;
362 
363  ret = compare_unicode_weights(flags, str1, len1, str2, len2);
364  if (!ret)
365  {
366  if (!(flags & NORM_IGNORENONSPACE))
367  ret = compare_diacritic_weights(flags, str1, len1, str2, len2);
368  if (!ret && !(flags & NORM_IGNORECASE))
369  ret = compare_case_weights(flags, str1, len1, str2, len2);
370  }
371  return ret;
372 }
static int compare_diacritic_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:240
#define SORT_STRINGSORT
Definition: winnls.h:180
static int compare_unicode_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:159
#define NORM_IGNORESYMBOLS
Definition: winnls.h:176
#define NORM_IGNORECASE
Definition: winnls.h:173
const unsigned int collation_table[]
Definition: collation.c:5
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
static int compare_case_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:299
int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
Definition: sortkey.c:33
__wchar_t WCHAR
Definition: xmlstorage.h:180
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:51
unsigned int wine_decompose(WCHAR ch, WCHAR *dst, unsigned int dstlen)
WINE_UNICODE_INLINE WCHAR tolowerW(WCHAR ch)
Definition: unicode.h:135
WINE_UNICODE_INLINE unsigned short get_char_typeW(WCHAR ch)
Definition: unicode.h:149
GLbitfield flags
Definition: glext.h:7161
int ret
#define C1_PUNCT
Definition: unicode.h:35
HKEY key
Definition: reg.c:42
GLenum src
Definition: glext.h:6340
static DWORD dstlen
Definition: directory.c:51
#define skip(...)
Definition: CString.cpp:57
GLenum GLenum dst
Definition: glext.h:6340
#define NORM_IGNORENONSPACE
Definition: winnls.h:175
#define C1_SPACE
Definition: unicode.h:34
int wine_compare_string(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:358
Definition: path.c:42
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31