Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygensortkey.c
Go to the documentation of this file.
00001 /* 00002 * Unicode sort key generation 00003 * 00004 * Copyright 2003 Dmitry Timoshkov 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00019 */ 00020 #include "wine/unicode.h" 00021 00022 extern int get_decomposition(WCHAR src, WCHAR *dst, unsigned int dstlen); 00023 extern const unsigned int collation_table[]; 00024 00025 /* 00026 * flags - normalization NORM_* flags 00027 * 00028 * FIXME: 'variable' flag not handled 00029 */ 00030 int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen) 00031 { 00032 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 00033 int key_len[4]; 00034 char *key_ptr[4]; 00035 const WCHAR *src_save = src; 00036 int srclen_save = srclen; 00037 00038 key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0; 00039 for (; srclen; srclen--, src++) 00040 { 00041 int decomposed_len = 1;/*get_decomposition(*src, dummy, 4);*/ 00042 dummy[0] = *src; 00043 if (decomposed_len) 00044 { 00045 int i; 00046 for (i = 0; i < decomposed_len; i++) 00047 { 00048 WCHAR wch = dummy[i]; 00049 unsigned int ce; 00050 00051 /* tests show that win2k just ignores NORM_IGNORENONSPACE, 00052 * and skips white space and punctuation characters for 00053 * NORM_IGNORESYMBOLS. 00054 */ 00055 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE))) 00056 continue; 00057 00058 if (flags & NORM_IGNORECASE) wch = tolowerW(wch); 00059 00060 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)]; 00061 if (ce != (unsigned int)-1) 00062 { 00063 if (ce >> 16) key_len[0] += 2; 00064 if ((ce >> 8) & 0xff) key_len[1]++; 00065 if ((ce >> 4) & 0x0f) key_len[2]++; 00066 if (ce & 1) 00067 { 00068 if (wch >> 8) key_len[3]++; 00069 key_len[3]++; 00070 } 00071 } 00072 else 00073 { 00074 key_len[0] += 2; 00075 if (wch >> 8) key_len[0]++; 00076 if (wch & 0xff) key_len[0]++; 00077 } 00078 } 00079 } 00080 } 00081 00082 if (!dstlen) /* compute length */ 00083 /* 4 * '\1' + 1 * '\0' + key length */ 00084 return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1; 00085 00086 if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1) 00087 return 0; /* overflow */ 00088 00089 src = src_save; 00090 srclen = srclen_save; 00091 00092 key_ptr[0] = dst; 00093 key_ptr[1] = key_ptr[0] + key_len[0] + 1; 00094 key_ptr[2] = key_ptr[1] + key_len[1] + 1; 00095 key_ptr[3] = key_ptr[2] + key_len[2] + 1; 00096 00097 for (; srclen; srclen--, src++) 00098 { 00099 int decomposed_len = 1;/*get_decomposition(*src, dummy, 4);*/ 00100 dummy[0] = *src; 00101 if (decomposed_len) 00102 { 00103 int i; 00104 for (i = 0; i < decomposed_len; i++) 00105 { 00106 WCHAR wch = dummy[i]; 00107 unsigned int ce; 00108 00109 /* tests show that win2k just ignores NORM_IGNORENONSPACE, 00110 * and skips white space and punctuation characters for 00111 * NORM_IGNORESYMBOLS. 00112 */ 00113 if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE))) 00114 continue; 00115 00116 if (flags & NORM_IGNORECASE) wch = tolowerW(wch); 00117 00118 ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)]; 00119 if (ce != (unsigned int)-1) 00120 { 00121 WCHAR key; 00122 if ((key = ce >> 16)) 00123 { 00124 *key_ptr[0]++ = key >> 8; 00125 *key_ptr[0]++ = key & 0xff; 00126 } 00127 /* make key 1 start from 2 */ 00128 if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1; 00129 /* make key 2 start from 2 */ 00130 if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1; 00131 /* key 3 is always a character code */ 00132 if (ce & 1) 00133 { 00134 if (wch >> 8) *key_ptr[3]++ = wch >> 8; 00135 if (wch & 0xff) *key_ptr[3]++ = wch & 0xff; 00136 } 00137 } 00138 else 00139 { 00140 *key_ptr[0]++ = 0xff; 00141 *key_ptr[0]++ = 0xfe; 00142 if (wch >> 8) *key_ptr[0]++ = wch >> 8; 00143 if (wch & 0xff) *key_ptr[0]++ = wch & 0xff; 00144 } 00145 } 00146 } 00147 } 00148 00149 *key_ptr[0] = '\1'; 00150 *key_ptr[1] = '\1'; 00151 *key_ptr[2] = '\1'; 00152 *key_ptr[3]++ = '\1'; 00153 *key_ptr[3] = 0; 00154 00155 return key_ptr[3] - dst; 00156 } 00157 00158 static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1, 00159 const WCHAR *str2, int len2) 00160 { 00161 unsigned int ce1, ce2; 00162 int ret; 00163 00164 /* 32-bit collation element table format: 00165 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit, 00166 * case weight - high 4 bit of low 8 bit. 00167 */ 00168 while (len1 > 0 && len2 > 0) 00169 { 00170 if (flags & NORM_IGNORESYMBOLS) 00171 { 00172 int skip = 0; 00173 /* FIXME: not tested */ 00174 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE)) 00175 { 00176 str1++; 00177 len1--; 00178 skip = 1; 00179 } 00180 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE)) 00181 { 00182 str2++; 00183 len2--; 00184 skip = 1; 00185 } 00186 if (skip) continue; 00187 } 00188 00189 /* hyphen and apostrophe are treated differently depending on 00190 * whether SORT_STRINGSORT specified or not 00191 */ 00192 if (!(flags & SORT_STRINGSORT)) 00193 { 00194 if (*str1 == '-' || *str1 == '\'') 00195 { 00196 if (*str2 != '-' && *str2 != '\'') 00197 { 00198 str1++; 00199 len1--; 00200 continue; 00201 } 00202 } 00203 else if (*str2 == '-' || *str2 == '\'') 00204 { 00205 str2++; 00206 len2--; 00207 continue; 00208 } 00209 } 00210 00211 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)]; 00212 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)]; 00213 00214 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1) 00215 ret = (ce1 >> 16) - (ce2 >> 16); 00216 else 00217 ret = *str1 - *str2; 00218 00219 if (ret) return ret; 00220 00221 str1++; 00222 str2++; 00223 len1--; 00224 len2--; 00225 } 00226 return len1 - len2; 00227 } 00228 00229 static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1, 00230 const WCHAR *str2, int len2) 00231 { 00232 unsigned int ce1, ce2; 00233 int ret; 00234 00235 /* 32-bit collation element table format: 00236 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit, 00237 * case weight - high 4 bit of low 8 bit. 00238 */ 00239 while (len1 > 0 && len2 > 0) 00240 { 00241 if (flags & NORM_IGNORESYMBOLS) 00242 { 00243 int skip = 0; 00244 /* FIXME: not tested */ 00245 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE)) 00246 { 00247 str1++; 00248 len1--; 00249 skip = 1; 00250 } 00251 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE)) 00252 { 00253 str2++; 00254 len2--; 00255 skip = 1; 00256 } 00257 if (skip) continue; 00258 } 00259 00260 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)]; 00261 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)]; 00262 00263 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1) 00264 ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff); 00265 else 00266 ret = *str1 - *str2; 00267 00268 if (ret) return ret; 00269 00270 str1++; 00271 str2++; 00272 len1--; 00273 len2--; 00274 } 00275 return len1 - len2; 00276 } 00277 00278 static inline int compare_case_weights(int flags, const WCHAR *str1, int len1, 00279 const WCHAR *str2, int len2) 00280 { 00281 unsigned int ce1, ce2; 00282 int ret; 00283 00284 /* 32-bit collation element table format: 00285 * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit, 00286 * case weight - high 4 bit of low 8 bit. 00287 */ 00288 while (len1 > 0 && len2 > 0) 00289 { 00290 if (flags & NORM_IGNORESYMBOLS) 00291 { 00292 int skip = 0; 00293 /* FIXME: not tested */ 00294 if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE)) 00295 { 00296 str1++; 00297 len1--; 00298 skip = 1; 00299 } 00300 if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE)) 00301 { 00302 str2++; 00303 len2--; 00304 skip = 1; 00305 } 00306 if (skip) continue; 00307 } 00308 00309 ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)]; 00310 ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)]; 00311 00312 if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1) 00313 ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f); 00314 else 00315 ret = *str1 - *str2; 00316 00317 if (ret) return ret; 00318 00319 str1++; 00320 str2++; 00321 len1--; 00322 len2--; 00323 } 00324 return len1 - len2; 00325 } 00326 00327 static inline int real_length(const WCHAR *str, int len) 00328 { 00329 while (len && !str[len - 1]) len--; 00330 return len; 00331 } 00332 00333 int wine_compare_string(int flags, const WCHAR *str1, int len1, 00334 const WCHAR *str2, int len2) 00335 { 00336 int ret; 00337 00338 len1 = real_length(str1, len1); 00339 len2 = real_length(str2, len2); 00340 00341 ret = compare_unicode_weights(flags, str1, len1, str2, len2); 00342 if (!ret) 00343 { 00344 if (!(flags & NORM_IGNORENONSPACE)) 00345 ret = compare_diacritic_weights(flags, str1, len1, str2, len2); 00346 if (!ret && !(flags & NORM_IGNORECASE)) 00347 ret = compare_case_weights(flags, str1, len1, str2, len2); 00348 } 00349 return ret; 00350 } Generated on Sun May 27 2012 04:24:36 for ReactOS by
1.7.6.1
|