d7/d99/dll_2win32_2kernel32_2winnls_2string_2sortkey_8c_source.html

/*

 * Unicode sort key generation

 *

 * Copyright 2003 Dmitry Timoshkov

 *

 * This library is free software; you can redistribute it and/or

 * modify it under the terms of the GNU Lesser General Public

 * License as published by the Free Software Foundation; either

 * version 2.1 of the License, or (at your option) any later version.

 *

 * This library is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 * Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with this library; if not, write to the Free Software

 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA

 */

#include "wine/unicode.h"


#ifdef __REACTOS__

#define get_char_typeW(x) iswctype((x) >> 8, (x) & 0xFF)

#endif

extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen );

extern const unsigned int collation_table[];


/*

 * flags - normalization NORM_* flags

 *

 * FIXME: 'variable' flag not handled

 */

int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)

{

    WCHAR dummy[4]; /* no decomposition is larger than 4 chars */

    int key_len[4];

    char *key_ptr[4];

    const WCHAR *src_save = src;

    int srclen_save = srclen;


    key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;

    for (; srclen; srclen--, src++)

    {

        unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/

        dummy[0] = *src;

        if (decomposed_len)

        {

            for (i = 0; i < decomposed_len; i++)

            {

                WCHAR wch = dummy[i];

                unsigned int ce;


                /* tests show that win2k just ignores NORM_IGNORENONSPACE,

                 * and skips white space and punctuation characters for

                 * NORM_IGNORESYMBOLS.

                 */

                if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))

                    continue;


                if (flags & NORM_IGNORECASE) wch = tolowerW(wch);


                ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];

                if (ce != (unsigned int)-1)

                {

                    if (ce >> 16) key_len[0] += 2;

                    if ((ce >> 8) & 0xff) key_len[1]++;

                    if ((ce >> 4) & 0x0f) key_len[2]++;

                    if (ce & 1)

                    {

                        if (wch >> 8) key_len[3]++;

                        key_len[3]++;

                    }

                }

                else

                {

                    key_len[0] += 2;

                    if (wch >> 8) key_len[0]++;

                    if (wch & 0xff) key_len[0]++;

        }

            }

        }

    }


    if (!dstlen) /* compute length */

        /* 4 * '\1' + 1 * '\0' + key length */

        return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;


    if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)

        return 0; /* overflow */


    src = src_save;

    srclen = srclen_save;


    key_ptr[0] = dst;

    key_ptr[1] = key_ptr[0] + key_len[0] + 1;

    key_ptr[2] = key_ptr[1] + key_len[1] + 1;

    key_ptr[3] = key_ptr[2] + key_len[2] + 1;


    for (; srclen; srclen--, src++)

    {

        unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/

        dummy[0] = *src;

        if (decomposed_len)

        {

            for (i = 0; i < decomposed_len; i++)

            {

                WCHAR wch = dummy[i];

                unsigned int ce;


                /* tests show that win2k just ignores NORM_IGNORENONSPACE,

                 * and skips white space and punctuation characters for

                 * NORM_IGNORESYMBOLS.

                 */

                if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))

                    continue;


                if (flags & NORM_IGNORECASE) wch = tolowerW(wch);


                ce = collation_table[collation_table[wch >> 8] + (wch & 0xff)];

                if (ce != (unsigned int)-1)

                {

                    WCHAR key;

                    if ((key = ce >> 16))

                    {

                        *key_ptr[0]++ = key >> 8;

                        *key_ptr[0]++ = key & 0xff;

                    }

                    /* make key 1 start from 2 */

                    if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;

                    /* make key 2 start from 2 */

                    if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;

                    /* key 3 is always a character code */

                    if (ce & 1)

                    {

                        if (wch >> 8) *key_ptr[3]++ = wch >> 8;

                        if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;

                    }

                }

                else

                {

                    *key_ptr[0]++ = 0xff;

                    *key_ptr[0]++ = 0xfe;

                    if (wch >> 8) *key_ptr[0]++ = wch >> 8;

                    if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;

                }

            }

        }

    }


    *key_ptr[0] = '\1';

    *key_ptr[1] = '\1';

    *key_ptr[2] = '\1';

    *key_ptr[3]++ = '\1';

    *key_ptr[3] = 0;


    return key_ptr[3] - dst;

}


static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,

                                          const WCHAR *str2, int len2)

{

    unsigned int ce1, ce2;

    int ret;


    /* 32-bit collation element table format:

     * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,

     * case weight - high 4 bit of low 8 bit.

     */

    while (len1 > 0 && len2 > 0)

    {

        if (flags & NORM_IGNORESYMBOLS)

        {

            int skip = 0;

            /* FIXME: not tested */

            if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))

            {

                str1++;

                len1--;

                skip = 1;

            }

            if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))

            {

                str2++;

                len2--;

                skip = 1;

            }

            if (skip) continue;

        }


       /* hyphen and apostrophe are treated differently depending on

        * whether SORT_STRINGSORT specified or not

        */

        if (!(flags & SORT_STRINGSORT))

        {

            if (*str1 == '-' || *str1 == '\'')

            {

                if (*str2 != '-' && *str2 != '\'')

                {

                    str1++;

                    len1--;

                    continue;

                }

            }

            else if (*str2 == '-' || *str2 == '\'')

            {

                str2++;

                len2--;

                continue;

            }

        }


        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];

        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];


        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)

            ret = (ce1 >> 16) - (ce2 >> 16);

        else

            ret = *str1 - *str2;


        if (ret) return ret;


        str1++;

        str2++;

        len1--;

        len2--;

    }

    while (len1 && !*str1)

    {

        str1++;

        len1--;

    }

    while (len2 && !*str2)

    {

        str2++;

        len2--;

    }

    return len1 - len2;

}


static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,

                                            const WCHAR *str2, int len2)

{

    unsigned int ce1, ce2;

    int ret;


    /* 32-bit collation element table format:

     * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,

     * case weight - high 4 bit of low 8 bit.

     */

    while (len1 > 0 && len2 > 0)

    {

        if (flags & NORM_IGNORESYMBOLS)

        {

            int skip = 0;

            /* FIXME: not tested */

            if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))

            {

                str1++;

                len1--;

                skip = 1;

            }

            if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))

            {

                str2++;

                len2--;

                skip = 1;

            }

            if (skip) continue;

        }


        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];

        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];


        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)

            ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);

        else

            ret = *str1 - *str2;


        if (ret) return ret;


        str1++;

        str2++;

        len1--;

        len2--;

    }

    while (len1 && !*str1)

    {

        str1++;

        len1--;

    }

    while (len2 && !*str2)

    {

        str2++;

        len2--;

    }

    return len1 - len2;

}


static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,

                                       const WCHAR *str2, int len2)

{

    unsigned int ce1, ce2;

    int ret;


    /* 32-bit collation element table format:

     * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,

     * case weight - high 4 bit of low 8 bit.

     */

    while (len1 > 0 && len2 > 0)

    {

        if (flags & NORM_IGNORESYMBOLS)

        {

            int skip = 0;

            /* FIXME: not tested */

            if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))

            {

                str1++;

                len1--;

                skip = 1;

            }

            if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))

            {

                str2++;

                len2--;

                skip = 1;

            }

            if (skip) continue;

        }


        ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];

        ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];


        if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)

            ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);

        else

            ret = *str1 - *str2;


        if (ret) return ret;


        str1++;

        str2++;

        len1--;

        len2--;

    }

    while (len1 && !*str1)

    {

        str1++;

        len1--;

    }

    while (len2 && !*str2)

    {

        str2++;

        len2--;

    }

    return len1 - len2;

}


int wine_compare_string(int flags, const WCHAR *str1, int len1,

                        const WCHAR *str2, int len2)

{

    int ret;


    ret = compare_unicode_weights(flags, str1, len1, str2, len2);

    if (!ret)

    {

        if (!(flags & NORM_IGNORENONSPACE))

            ret = compare_diacritic_weights(flags, str1, len1, str2, len2);

        if (!ret && !(flags & NORM_IGNORECASE))

            ret = compare_case_weights(flags, str1, len1, str2, len2);

    }

    return ret;

}

skip
#define skip(...)
Definition: atltest.h:64

wine_get_sortkey
int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dstlen)
Definition: sortkey.c:33

wine_decompose
unsigned int wine_decompose(WCHAR ch, WCHAR *dst, unsigned int dstlen)

compare_case_weights
static int compare_case_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:299

wine_compare_string
int wine_compare_string(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:358

compare_diacritic_weights
static int compare_diacritic_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:240

collation_table
const unsigned int collation_table[]
Definition: collation.c:5

compare_unicode_weights
static int compare_unicode_weights(int flags, const WCHAR *str1, int len1, const WCHAR *str2, int len2)
Definition: sortkey.c:159

int
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31

src
GLenum src
Definition: glext.h:6340

dst
GLenum GLenum dst
Definition: glext.h:6340

flags
GLbitfield flags
Definition: glext.h:7161

i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248

C1_PUNCT
#define C1_PUNCT
Definition: unicode.h:35

C1_SPACE
#define C1_SPACE
Definition: unicode.h:34

get_char_typeW
WINE_UNICODE_INLINE unsigned short get_char_typeW(WCHAR ch)
Definition: unicode.h:149

dummy
Definition: ndr_types.idl:28

srclen
static DWORD LPDWORD LPCSTR DWORD srclen
Definition: directory.c:52

dstlen
static DWORD dstlen
Definition: directory.c:51

tolowerW
#define tolowerW(n)
Definition: unicode.h:50

key
Definition: copy.c:22

ret
int ret
Definition: wcstombs-tests.c:31

NORM_IGNORECASE
#define NORM_IGNORECASE
Definition: winnls.h:176

SORT_STRINGSORT
#define SORT_STRINGSORT
Definition: winnls.h:183

NORM_IGNORENONSPACE
#define NORM_IGNORENONSPACE
Definition: winnls.h:178

NORM_IGNORESYMBOLS
#define NORM_IGNORESYMBOLS
Definition: winnls.h:179

WCHAR
__wchar_t WCHAR
Definition: xmlstorage.h:180