ReactOS  0.4.15-dev-439-g292f67a
sortkey.c
Go to the documentation of this file.
1 /*
2  * Unicode sort key generation
3  *
4  * Copyright 2003 Dmitry Timoshkov
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 #include "wine/unicode.h"
21 
22 extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen );
23 extern const unsigned int collation_table[];
24 
25 /*
26  * flags - normalization NORM_* flags
27  *
28  * FIXME: 'variable' flag not handled
29  */
30 int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
31 {
32  WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
33  int key_len[4];
34  char *key_ptr[4];
35  const WCHAR *src_save = src;
36  int srclen_save = srclen;
37 
38  key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
39  for (; srclen; srclen--, src++)
40  {
41  unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
42  dummy[0] = *src;
43  if (decomposed_len)
44  {
45  for (i = 0; i < decomposed_len; i++)
46  {
47  WCHAR wch = dummy[i];
48  unsigned int ce;
49 
50  /* tests show that win2k just ignores NORM_IGNORENONSPACE,
51  * and skips white space and punctuation characters for
52  * NORM_IGNORESYMBOLS.
53  */
55  continue;
56 
57  if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
58 
59  ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
60  if (ce != (unsigned int)-1)
61  {
62  if (ce >> 16) key_len[0] += 2;
63  if ((ce >> 8) & 0xff) key_len[1]++;
64  if ((ce >> 4) & 0x0f) key_len[2]++;
65  if (ce & 1)
66  {
67  if (wch >> 8) key_len[3]++;
68  key_len[3]++;
69  }
70  }
71  else
72  {
73  key_len[0] += 2;
74  if (wch >> 8) key_len[0]++;
75  if (wch & 0xff) key_len[0]++;
76  }
77  }
78  }
79  }
80 
81  if (!dstlen) /* compute length */
82  /* 4 * '\1' + key length */
83  return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
84 
85  if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
86  return 0; /* overflow */
87 
88  src = src_save;
89  srclen = srclen_save;
90 
91  key_ptr[0] = dst;
92  key_ptr[1] = key_ptr[0] + key_len[0] + 1;
93  key_ptr[2] = key_ptr[1] + key_len[1] + 1;
94  key_ptr[3] = key_ptr[2] + key_len[2] + 1;
95 
96  for (; srclen; srclen--, src++)
97  {
98  unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
99  dummy[0] = *src;
100  if (decomposed_len)
101  {
102  for (i = 0; i < decomposed_len; i++)
103  {
104  WCHAR wch = dummy[i];
105  unsigned int ce;
106 
107  /* tests show that win2k just ignores NORM_IGNORENONSPACE,
108  * and skips white space and punctuation characters for
109  * NORM_IGNORESYMBOLS.
110  */
111  if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
112  continue;
113 
114  if (flags & NORM_IGNORECASE) wch = tolowerW(wch);
115 
116  ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];
117  if (ce != (unsigned int)-1)
118  {
119  WCHAR key;
120  if ((key = ce >> 16))
121  {
122  *key_ptr[0]++ = key >> 8;
123  *key_ptr[0]++ = key & 0xff;
124  }
125  /* make key 1 start from 2 */
126  if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
127  /* make key 2 start from 2 */
128  if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
129  /* key 3 is always a character code */
130  if (ce & 1)
131  {
132  if (wch >> 8) *key_ptr[3]++ = wch >> 8;
133  if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
134  }
135  }
136  else
137  {
138  *key_ptr[0]++ = 0xff;
139  *key_ptr[0]++ = 0xfe;
140  if (wch >> 8) *key_ptr[0]++ = wch >> 8;
141  if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
142  }
143  }
144  }
145  }
146 
147  *key_ptr[0] = '\1';
148  *key_ptr[1] = '\1';
149  *key_ptr[2] = '\1';
150  *key_ptr[3]++ = '\1';
151  *key_ptr[3] = 0;
152 
153  return key_ptr[3] - dst;
154 }
155 
156 enum weight
157 {
161 };
162 
163 static unsigned int get_weight(WCHAR ch, enum weight type)
164 {
165  unsigned int ret;
166 
167  ret = collation_table[collation_table[ch >> 8] + (ch & 0xff)];
168  if (ret == (unsigned int)-1)
169  return ch;
170 
171  switch(type)
172  {
173  case UNICODE_WEIGHT:
174  return ret >> 16;
175  case DIACRITIC_WEIGHT:
176  return (ret >> 8) & 0xff;
177  case CASE_WEIGHT:
178  default:
179  return (ret >> 4) & 0x0f;
180  }
181 }
182 
183 static void inc_str_pos(const WCHAR **str, int *len, int *dpos, int *dlen)
184 {
185  (*dpos)++;
186  if (*dpos == *dlen)
187  {
188  *dpos = *dlen = 0;
189  (*str)++;
190  (*len)--;
191  }
192 }
193 
194 static inline int compare_weights(int flags, const WCHAR *str1, int len1,
195  const WCHAR *str2, int len2, enum weight type)
196 {
197  int dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
198  WCHAR dstr1[4], dstr2[4];
199  unsigned int ce1, ce2;
200 
201  /* 32-bit collation element table format:
202  * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
203  * case weight - high 4 bit of low 8 bit.
204  */
205  while (len1 > 0 && len2 > 0)
206  {
207  if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
208  if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
209 
211  {
212  int skip = 0;
213  /* FIXME: not tested */
214  if (get_char_typeW(dstr1[dpos1]) & (C1_PUNCT | C1_SPACE))
215  {
216  inc_str_pos(&str1, &len1, &dpos1, &dlen1);
217  skip = 1;
218  }
219  if (get_char_typeW(dstr2[dpos2]) & (C1_PUNCT | C1_SPACE))
220  {
221  inc_str_pos(&str2, &len2, &dpos2, &dlen2);
222  skip = 1;
223  }
224  if (skip) continue;
225  }
226 
227  /* hyphen and apostrophe are treated differently depending on
228  * whether SORT_STRINGSORT specified or not
229  */
230  if (type == UNICODE_WEIGHT && !(flags & SORT_STRINGSORT))
231  {
232  if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
233  {
234  if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
235  {
236  inc_str_pos(&str1, &len1, &dpos1, &dlen1);
237  continue;
238  }
239  }
240  else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
241  {
242  inc_str_pos(&str2, &len2, &dpos2, &dlen2);
243  continue;
244  }
245  }
246 
247  ce1 = get_weight(dstr1[dpos1], type);
248  if (!ce1)
249  {
250  inc_str_pos(&str1, &len1, &dpos1, &dlen1);
251  continue;
252  }
253  ce2 = get_weight(dstr2[dpos2], type);
254  if (!ce2)
255  {
256  inc_str_pos(&str2, &len2, &dpos2, &dlen2);
257  continue;
258  }
259 
260  if (ce1 - ce2) return ce1 - ce2;
261 
262  inc_str_pos(&str1, &len1, &dpos1, &dlen1);
263  inc_str_pos(&str2, &len2, &dpos2, &dlen2);
264  }
265  while (len1)
266  {
267  if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
268 
269  ce1 = get_weight(dstr1[dpos1], type);
270  if (ce1) break;
271  inc_str_pos(&str1, &len1, &dpos1, &dlen1);
272  }
273  while (len2)
274  {
275  if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
276 
277  ce2 = get_weight(dstr2[dpos2], type);
278  if (ce2) break;
279  inc_str_pos(&str2, &len2, &dpos2, &dlen2);
280  }
281  return len1 - len2;
282 }
283 
284 int wine_compare_string(int flags, const WCHAR *str1, int len1,
285  const WCHAR *str2, int len2)
286 {
287  int ret;
288 
289  ret = compare_weights(flags, str1, len1, str2, len2, UNICODE_WEIGHT);
290  if (!ret)
291  {
292  if (!(flags & NORM_IGNORENONSPACE))
293  ret = compare_weights(flags, str1, len1, str2, len2, DIACRITIC_WEIGHT);
294  if (!ret && !(flags & NORM_IGNORECASE))
295  ret = compare_weights(flags, str1, len1, str2, len2, CASE_WEIGHT);
296  }
297  return ret;
298 }
#define SORT_STRINGSORT
Definition: winnls.h:180
#define NORM_IGNORESYMBOLS
Definition: winnls.h:176
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
#define NORM_IGNORECASE
Definition: winnls.h:173
const unsigned int collation_table[]
Definition: collation.c:5
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
const WCHAR * str
static void inc_str_pos(const WCHAR **str, int *len, int *dpos, int *dlen)
Definition: sortkey.c:183
static unsigned int get_weight(WCHAR ch, enum weight type)
Definition: sortkey.c:163
int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
Definition: sortkey.c:33
__wchar_t WCHAR
Definition: xmlstorage.h:180
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:51
unsigned int wine_decompose(WCHAR ch, WCHAR *dst, unsigned int dstlen)
WINE_UNICODE_INLINE WCHAR tolowerW(WCHAR ch)
Definition: unicode.h:135
WINE_UNICODE_INLINE unsigned short get_char_typeW(WCHAR ch)
Definition: unicode.h:149
GLbitfield flags
Definition: glext.h:7161
int ret
#define C1_PUNCT
Definition: unicode.h:35
static int compare_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2, enum weight type)
Definition: sortkey.c:194
HKEY key
Definition: reg.c:42
GLenum GLsizei len
Definition: glext.h:6722
GLenum src
Definition: glext.h:6340
weight
Definition: sortkey.c:156
static DWORD dstlen
Definition: directory.c:51
GLenum GLenum dst
Definition: glext.h:6340
#define skip(...)
Definition: atltest.h:64
#define NORM_IGNORENONSPACE
Definition: winnls.h:175
#define C1_SPACE
Definition: unicode.h:34
int wine_compare_string(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:358
Definition: path.c:41