ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

bidi.c
Go to the documentation of this file.
00001 /*
00002  * Uniscribe BiDirectional handling
00003  *
00004  * Copyright 2003 Shachar Shemesh
00005  * Copyright 2007 Maarten Lankhorst
00006  * Copyright 2010 CodeWeavers, Aric Stewart
00007  *
00008  * This library is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * This library is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with this library; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00021  *
00022  * Code derived from the modified reference implementation
00023  * that was found in revision 17 of http://unicode.org/reports/tr9/
00024  * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
00025  *
00026  * -- Copyright (C) 1999-2005, ASMUS, Inc.
00027  *
00028  * Permission is hereby granted, free of charge, to any person obtaining a
00029  * copy of the Unicode data files and any associated documentation (the
00030  * "Data Files") or Unicode software and any associated documentation (the
00031  * "Software") to deal in the Data Files or Software without restriction,
00032  * including without limitation the rights to use, copy, modify, merge,
00033  * publish, distribute, and/or sell copies of the Data Files or Software,
00034  * and to permit persons to whom the Data Files or Software are furnished
00035  * to do so, provided that (a) the above copyright notice(s) and this
00036  * permission notice appear with all copies of the Data Files or Software,
00037  * (b) both the above copyright notice(s) and this permission notice appear
00038  * in associated documentation, and (c) there is clear notice in each
00039  * modified Data File or in the Software as well as in the documentation
00040  * associated with the Data File(s) or Software that the data or software
00041  * has been modified.
00042  */
00043 
00044 #include "config.h"
00045 
00046 #include <stdarg.h>
00047 #include "windef.h"
00048 #include "winbase.h"
00049 #include "wingdi.h"
00050 #include "winnls.h"
00051 #include "usp10.h"
00052 #include "wine/unicode.h"
00053 #include "wine/debug.h"
00054 
00055 #include "usp10_internal.h"
00056 
00057 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
00058 
00059 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
00060 #define MAX_LEVEL 61
00061 
00062 /* HELPER FUNCTIONS AND DECLARATIONS */
00063 
00064 /*------------------------------------------------------------------------
00065     Bidirectional Character Types
00066 
00067     as defined by the Unicode Bidirectional Algorithm Table 3-7.
00068 
00069     Note:
00070 
00071       The list of bidirectional character types here is not grouped the
00072       same way as the table 3-7, since the numberic values for the types
00073       are chosen to keep the state and action tables compact.
00074 ------------------------------------------------------------------------*/
00075 enum directions
00076 {
00077     /* input types */
00078              /* ON MUST be zero, code relies on ON = N = 0 */
00079     ON = 0,  /* Other Neutral */
00080     L,       /* Left Letter */
00081     R,       /* Right Letter */
00082     AN,      /* Arabic Number */
00083     EN,      /* European Number */
00084     AL,      /* Arabic Letter (Right-to-left) */
00085     NSM,     /* Non-spacing Mark */
00086     CS,      /* Common Separator */
00087     ES,      /* European Separator */
00088     ET,      /* European Terminator (post/prefix e.g. $ and %) */
00089 
00090     /* resolved types */
00091     BN,      /* Boundary neutral (type of RLE etc after explicit levels) */
00092 
00093     /* input types, */
00094     S,       /* Segment Separator (TAB)        // used only in L1 */
00095     WS,      /* White space                    // used only in L1 */
00096     B,       /* Paragraph Separator (aka as PS) */
00097 
00098     /* types for explicit controls */
00099     RLO,     /* these are used only in X1-X9 */
00100     RLE,
00101     LRO,
00102     LRE,
00103     PDF,
00104 
00105     /* resolved types, also resolved directions */
00106     N = ON,  /* alias, where ON, WS and S are treated the same */
00107 };
00108 
00109 /* HELPER FUNCTIONS */
00110 
00111 /* Convert the libwine information to the direction enum */
00112 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
00113 {
00114     static const enum directions dir_map[16] =
00115     {
00116         L,  /* unassigned defaults to L */
00117         L,
00118         R,
00119         EN,
00120         ES,
00121         ET,
00122         AN,
00123         CS,
00124         B,
00125         S,
00126         WS,
00127         ON,
00128         AL,
00129         NSM,
00130         BN,
00131         PDF  /* also LRE, LRO, RLE, RLO */
00132     };
00133 
00134     unsigned i;
00135 
00136     for (i = 0; i < uCount; ++i)
00137     {
00138         chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
00139         switch (chartype[i])
00140         {
00141         case ES:
00142             if (!c->fLegacyBidiClass) break;
00143             switch (lpString[i])
00144             {
00145             case '-':
00146             case '+': chartype[i] = N; break;
00147             case '/': chartype[i] = CS; break;
00148             }
00149             break;
00150         case PDF:
00151             switch (lpString[i])
00152             {
00153             case 0x202A: chartype[i] = LRE; break;
00154             case 0x202B: chartype[i] = RLE; break;
00155             case 0x202C: chartype[i] = PDF; break;
00156             case 0x202D: chartype[i] = LRO; break;
00157             case 0x202E: chartype[i] = RLO; break;
00158             }
00159             break;
00160         }
00161     }
00162 }
00163 
00164 /* Set a run of cval values at locations all prior to, but not including */
00165 /* iStart, to the new value nval. */
00166 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
00167 {
00168     int i = iStart - 1;
00169     for (; i >= iStart - cval; i--)
00170     {
00171         pval[i] = nval;
00172     }
00173 }
00174 
00175 /* RESOLVE EXPLICIT */
00176 
00177 static WORD GreaterEven(int i)
00178 {
00179     return odd(i) ? i + 1 : i + 2;
00180 }
00181 
00182 static WORD GreaterOdd(int i)
00183 {
00184     return odd(i) ? i + 2 : i + 1;
00185 }
00186 
00187 static WORD EmbeddingDirection(int level)
00188 {
00189     return odd(level) ? R : L;
00190 }
00191 
00192 /*------------------------------------------------------------------------
00193     Function: resolveExplicit
00194 
00195     Recursively resolves explicit embedding levels and overrides.
00196     Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
00197 
00198     Input: Base embedding level and direction
00199            Character count
00200 
00201     Output: Array of embedding levels
00202 
00203     In/Out: Array of direction classes
00204 
00205 
00206     Note: The function uses two simple counters to keep track of
00207           matching explicit codes and PDF. Use the default argument for
00208           the outermost call. The nesting counter counts the recursion
00209           depth and not the embedding level.
00210 ------------------------------------------------------------------------*/
00211 
00212 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
00213 {
00214     /* always called with a valid nesting level
00215        nesting levels are != embedding levels */
00216     int nLastValid = nNest;
00217     int ich = 0;
00218 
00219     /* check input values */
00220     ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
00221 
00222     /* process the text */
00223     for (; ich < cch; ich++)
00224     {
00225         WORD cls = pcls[ich];
00226         switch (cls)
00227         {
00228         case LRO:
00229         case LRE:
00230             nNest++;
00231             if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
00232             {
00233                 plevel[ich] = GreaterEven(level);
00234                 pcls[ich] = BN;
00235                 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
00236                             &pcls[ich+1], &plevel[ich+1],
00237                              cch - (ich+1), nNest);
00238                 nNest--;
00239                 continue;
00240             }
00241             cls = pcls[ich] = BN;
00242             break;
00243 
00244         case RLO:
00245         case RLE:
00246             nNest++;
00247             if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
00248             {
00249                 plevel[ich] = GreaterOdd(level);
00250                 pcls[ich] = BN;
00251                 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
00252                                 &pcls[ich+1], &plevel[ich+1],
00253                                  cch - (ich+1), nNest);
00254                 nNest--;
00255                 continue;
00256             }
00257             cls = pcls[ich] = BN;
00258             break;
00259 
00260         case PDF:
00261             cls = pcls[ich] = BN;
00262             if (nNest)
00263             {
00264                 if (nLastValid < nNest)
00265                 {
00266                     nNest--;
00267                 }
00268                 else
00269                 {
00270                     cch = ich; /* break the loop, but complete body */
00271                 }
00272             }
00273         }
00274 
00275         /* Apply the override */
00276         if (dir != N)
00277         {
00278             cls = dir;
00279         }
00280         plevel[ich] = level;
00281         if (pcls[ich] != BN)
00282             pcls[ich] = cls;
00283     }
00284 
00285     return ich;
00286 }
00287 
00288 /* RESOLVE WEAK TYPES */
00289 
00290 enum states /* possible states */
00291 {
00292     xa,        /*  Arabic letter */
00293     xr,        /*  right letter */
00294     xl,        /*  left letter */
00295 
00296     ao,        /*  Arabic lett. foll by ON */
00297     ro,        /*  right lett. foll by ON */
00298     lo,        /*  left lett. foll by ON */
00299 
00300     rt,        /*  ET following R */
00301     lt,        /*  ET following L */
00302 
00303     cn,        /*  EN, AN following AL */
00304     ra,        /*  Arabic number foll R */
00305     re,        /*  European number foll R */
00306     la,        /*  Arabic number foll L */
00307     le,        /*  European number foll L */
00308 
00309     ac,        /*  CS following cn */
00310     rc,        /*  CS following ra */
00311     rs,        /*  CS,ES following re */
00312     lc,        /*  CS following la */
00313     ls,        /*  CS,ES following le */
00314 
00315     ret,    /*  ET following re */
00316     let,    /*  ET following le */
00317 } ;
00318 
00319 static const int stateWeak[][10] =
00320 {
00321     /*    N,  L,  R, AN, EN, AL,NSM, CS, ES, ET */
00322 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter          */
00323 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter           */
00324 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter            */
00325 
00326 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* Arabic lett. foll by ON*/
00327 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
00328 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON  */
00329 
00330 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R         */
00331 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L         */
00332 
00333 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL    */
00334 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* Arabic number foll R   */
00335 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* European number foll R */
00336 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* Arabic number foll L   */
00337 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* European number foll L */
00338 
00339 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn        */
00340 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra        */
00341 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re     */
00342 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la        */
00343 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le     */
00344 
00345 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re        */
00346 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le        */
00347 };
00348 
00349 enum actions /* possible actions */
00350 {
00351     /* primitives */
00352     IX = 0x100,                    /* increment */
00353     XX = 0xF,                    /* no-op */
00354 
00355     /* actions */
00356     xxx = (XX << 4) + XX,        /* no-op */
00357     xIx = IX + xxx,                /* increment run */
00358     xxN = (XX << 4) + ON,        /* set current to N */
00359     xxE = (XX << 4) + EN,        /* set current to EN */
00360     xxA = (XX << 4) + AN,        /* set current to AN */
00361     xxR = (XX << 4) + R,        /* set current to R */
00362     xxL = (XX << 4) + L,        /* set current to L */
00363     Nxx = (ON << 4) + 0xF,        /* set run to neutral */
00364     Axx = (AN << 4) + 0xF,        /* set run to AN */
00365     ExE = (EN << 4) + EN,        /* set run to EN, set current to EN */
00366     NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
00367     NxN = (ON << 4) + ON,        /* set run to N, set current to N */
00368     NxR = (ON << 4) + R,        /* set run to N, set current to R */
00369     NxE = (ON << 4) + EN,        /* set run to N, set current to EN */
00370 
00371     AxA = (AN << 4) + AN,        /* set run to AN, set current to AN */
00372     NxL = (ON << 4) + L,        /* set run to N, set current to L */
00373     LxL = (L << 4) + L,            /* set run to L, set current to L */
00374 }  ;
00375 
00376 static const int actionWeak[][10] =
00377 {
00378        /*  N,   L,   R,  AN,  EN,  AL, NSM,  CS,  ES,  ET */
00379 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter           */
00380 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter            */
00381 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter             */
00382 
00383 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* Arabic lett. foll by ON */
00384 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON  */
00385 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON   */
00386 
00387 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R         */
00388 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L         */
00389 
00390 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following  AL    */
00391 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll R   */
00392 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* European number foll R */
00393 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll L   */
00394 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* European number foll L */
00395 
00396 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn         */
00397 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra         */
00398 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re      */
00399 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la         */
00400 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le      */
00401 
00402 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re            */
00403 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le            */
00404 };
00405 
00406 static int GetDeferredType(int action)
00407 {
00408     return (action >> 4) & 0xF;
00409 }
00410 
00411 static int GetResolvedType(int action)
00412 {
00413     return action & 0xF;
00414 }
00415 
00416 /* Note on action table:
00417 
00418   States can be of two kinds:
00419      - Immediate Resolution State, where each input token
00420        is resolved as soon as it is seen. These states have
00421        only single action codes (xxN) or the no-op (xxx)
00422        for static input tokens.
00423      - Deferred Resolution State, where input tokens either
00424        either extend the run (xIx) or resolve its Type (e.g. Nxx).
00425 
00426    Input classes are of three kinds
00427      - Static Input Token, where the class of the token remains
00428        unchanged on output (AN, L, N, R)
00429      - Replaced Input Token, where the class of the token is
00430        always replaced on output (AL, BN, NSM, CS, ES, ET)
00431      - Conditional Input Token, where the class of the token is
00432        changed on output in some, but not all, cases (EN)
00433 
00434      Where tokens are subject to change, a double action
00435      (e.g. NxA, or NxN) is _required_ after deferred states,
00436      resolving both the deferred state and changing the current token.
00437 */
00438 
00439 /*------------------------------------------------------------------------
00440     Function: resolveWeak
00441 
00442     Resolves the directionality of numeric and other weak character types
00443 
00444     Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
00445 
00446     Input: Array of embedding levels
00447            Character count
00448 
00449     In/Out: Array of directional classes
00450 
00451     Note: On input only these directional classes are expected
00452           AL, HL, R, L,  ON, BN, NSM, AN, EN, ES, ET, CS,
00453 ------------------------------------------------------------------------*/
00454 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
00455 {
00456     int state = odd(baselevel) ? xr : xl;
00457     int cls;
00458 
00459     int level = baselevel;
00460     int action, clsRun, clsNew;
00461     int cchRun = 0;
00462     int ich = 0;
00463 
00464     for (; ich < cch; ich++)
00465     {
00466         /* ignore boundary neutrals */
00467         if (pcls[ich] == BN)
00468         {
00469             /* must flatten levels unless at a level change; */
00470             plevel[ich] = level;
00471 
00472             /* lookahead for level changes */
00473             if (ich + 1 == cch && level != baselevel)
00474             {
00475                 /* have to fixup last BN before end of the loop, since
00476                  * its fix-upped value will be needed below the assert */
00477                 pcls[ich] = EmbeddingDirection(level);
00478             }
00479             else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
00480             {
00481                 /* fixup LAST BN in front / after a level run to make
00482                  * it act like the SOR/EOR in rule X10 */
00483                 int newlevel = plevel[ich+1];
00484                 if (level > newlevel) {
00485                     newlevel = level;
00486                 }
00487                 plevel[ich] = newlevel;
00488 
00489                 /* must match assigned level */
00490                 pcls[ich] = EmbeddingDirection(newlevel);
00491                 level = plevel[ich+1];
00492             }
00493             else
00494             {
00495                 /* don't interrupt runs */
00496                 if (cchRun)
00497                 {
00498                     cchRun++;
00499                 }
00500                 continue;
00501             }
00502         }
00503 
00504         ASSERT(pcls[ich] <= BN);
00505         cls = pcls[ich];
00506 
00507         action = actionWeak[state][cls];
00508 
00509         /* resolve the directionality for deferred runs */
00510         clsRun = GetDeferredType(action);
00511         if (clsRun != XX)
00512         {
00513             SetDeferredRun(pcls, cchRun, ich, clsRun);
00514             cchRun = 0;
00515         }
00516 
00517         /* resolve the directionality class at the current location */
00518         clsNew = GetResolvedType(action);
00519         if (clsNew != XX)
00520             pcls[ich] = clsNew;
00521 
00522         /* increment a deferred run */
00523         if (IX & action)
00524             cchRun++;
00525 
00526         state = stateWeak[state][cls];
00527     }
00528 
00529     /* resolve any deferred runs
00530      * use the direction of the current level to emulate PDF */
00531     cls = EmbeddingDirection(level);
00532 
00533     /* resolve the directionality for deferred runs */
00534     clsRun = GetDeferredType(actionWeak[state][cls]);
00535     if (clsRun != XX)
00536         SetDeferredRun(pcls, cchRun, ich, clsRun);
00537 }
00538 
00539 /* RESOLVE NEUTRAL TYPES */
00540 
00541 /* action values */
00542 enum neutralactions
00543 {
00544     /* action to resolve previous input */
00545     nL = L,         /* resolve EN to L */
00546     En = 3 << 4,    /* resolve neutrals run to embedding level direction */
00547     Rn = R << 4,    /* resolve neutrals run to strong right */
00548     Ln = L << 4,    /* resolved neutrals run to strong left */
00549     In = (1<<8),    /* increment count of deferred neutrals */
00550     LnL = (1<<4)+L, /* set run and EN to L */
00551 };
00552 
00553 static int GetDeferredNeutrals(int action, int level)
00554 {
00555     action = (action >> 4) & 0xF;
00556     if (action == (En >> 4))
00557         return EmbeddingDirection(level);
00558     else
00559         return action;
00560 }
00561 
00562 static int GetResolvedNeutrals(int action)
00563 {
00564     action = action & 0xF;
00565     if (action == In)
00566         return 0;
00567     else
00568         return action;
00569 }
00570 
00571 /* state values */
00572 enum resolvestates
00573 {
00574     /* new temporary class */
00575     r,  /* R and characters resolved to R */
00576     l,  /* L and characters resolved to L */
00577     rn, /* N preceded by right */
00578     ln, /* N preceded by left */
00579     a,  /* AN preceded by left (the abbreviation 'la' is used up above) */
00580     na, /* N preceded by a */
00581 } ;
00582 
00583 
00584 /*------------------------------------------------------------------------
00585   Notes:
00586 
00587   By rule W7, whenever a EN is 'dominated' by an L (including start of
00588   run with embedding direction = L) it is resolved to, and further treated
00589   as L.
00590 
00591   This leads to the need for 'a' and 'na' states.
00592 ------------------------------------------------------------------------*/
00593 
00594 static const int actionNeutrals[][5] =
00595 {
00596 /*   N,  L,  R,  AN, EN = cls */
00597   { In,  0,  0,  0,  0 }, /* r    right */
00598   { In,  0,  0,  0,  L }, /* l    left */
00599 
00600   { In, En, Rn, Rn, Rn }, /* rn   N preceded by right */
00601   { In, Ln, En, En, LnL}, /* ln   N preceded by left */
00602 
00603   { In,  0,  0,  0,  L }, /* a   AN preceded by left */
00604   { In, En, Rn, Rn, En }, /* na   N  preceded by a */
00605 } ;
00606 
00607 static const int stateNeutrals[][5] =
00608 {
00609 /*   N, L,  R, AN, EN */
00610   { rn, l,  r,  r,  r }, /* r   right */
00611   { ln, l,  r,  a,  l }, /* l   left */
00612 
00613   { rn, l,  r,  r,  r }, /* rn  N preceded by right */
00614   { ln, l,  r,  a,  l }, /* ln  N preceded by left */
00615 
00616   { na, l,  r,  a,  l }, /* a  AN preceded by left */
00617   { na, l,  r,  a,  l }, /* na  N preceded by la */
00618 } ;
00619 
00620 /*------------------------------------------------------------------------
00621     Function: resolveNeutrals
00622 
00623     Resolves the directionality of neutral character types.
00624 
00625     Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
00626 
00627     Input: Array of embedding levels
00628            Character count
00629            Baselevel
00630 
00631     In/Out: Array of directional classes
00632 
00633     Note: On input only these directional classes are expected
00634           R,  L,  N, AN, EN and BN
00635 
00636           W8 resolves a number of ENs to L
00637 ------------------------------------------------------------------------*/
00638 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
00639 {
00640     /* the state at the start of text depends on the base level */
00641     int state = odd(baselevel) ? r : l;
00642     int cls;
00643 
00644     int cchRun = 0;
00645     int level = baselevel;
00646 
00647     int action, clsRun, clsNew;
00648     int ich = 0;
00649     for (; ich < cch; ich++)
00650     {
00651         /* ignore boundary neutrals */
00652         if (pcls[ich] == BN)
00653         {
00654             /* include in the count for a deferred run */
00655             if (cchRun)
00656                 cchRun++;
00657 
00658             /* skip any further processing */
00659             continue;
00660         }
00661 
00662         ASSERT(pcls[ich] < 5); /* "Only N, L, R,  AN, EN are allowed" */
00663         cls = pcls[ich];
00664 
00665         action = actionNeutrals[state][cls];
00666 
00667         /* resolve the directionality for deferred runs */
00668         clsRun = GetDeferredNeutrals(action, level);
00669         if (clsRun != N)
00670         {
00671             SetDeferredRun(pcls, cchRun, ich, clsRun);
00672             cchRun = 0;
00673         }
00674 
00675         /* resolve the directionality class at the current location */
00676         clsNew = GetResolvedNeutrals(action);
00677         if (clsNew != N)
00678             pcls[ich] = clsNew;
00679 
00680         if (In & action)
00681             cchRun++;
00682 
00683         state = stateNeutrals[state][cls];
00684         level = plevel[ich];
00685     }
00686 
00687     /* resolve any deferred runs */
00688     cls = EmbeddingDirection(level);    /* eor has type of current level */
00689 
00690     /* resolve the directionality for deferred runs */
00691     clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
00692     if (clsRun != N)
00693         SetDeferredRun(pcls, cchRun, ich, clsRun);
00694 }
00695 
00696 /* RESOLVE IMPLICIT */
00697 
00698 /*------------------------------------------------------------------------
00699     Function: resolveImplicit
00700 
00701     Recursively resolves implicit embedding levels.
00702     Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
00703 
00704     Input: Array of direction classes
00705            Character count
00706            Base level
00707 
00708     In/Out: Array of embedding levels
00709 
00710     Note: levels may exceed 15 on output.
00711           Accepted subset of direction classes
00712           R, L, AN, EN
00713 ------------------------------------------------------------------------*/
00714 static const WORD addLevel[][4] =
00715 {
00716           /* L,  R, AN, EN */
00717 /* even */ { 0,  1,  2,  2, },
00718 /* odd  */ { 1,  0,  1,  1, }
00719 
00720 };
00721 
00722 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
00723 {
00724     int ich = 0;
00725     for (; ich < cch; ich++)
00726     {
00727         /* cannot resolve bn here, since some bn were resolved to strong
00728          * types in resolveWeak. To remove these we need the original
00729          * types, which are available again in resolveWhiteSpace */
00730         if (pcls[ich] == BN)
00731         {
00732             continue;
00733         }
00734         ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
00735         ASSERT(pcls[ich] < 5); /* "Out of range." */
00736         plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
00737     }
00738 }
00739 
00740 /*************************************************************
00741  *    BIDI_DeterminLevels
00742  */
00743 BOOL BIDI_DetermineLevels(
00744                 LPCWSTR lpString,       /* [in] The string for which information is to be returned */
00745                 INT uCount,     /* [in] Number of WCHARs in string. */
00746                 const SCRIPT_STATE *s,
00747                 const SCRIPT_CONTROL *c,
00748                 WORD *lpOutLevels /* [out] final string levels */
00749     )
00750 {
00751     WORD *chartype;
00752     unsigned baselevel = 0,j;
00753     TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
00754 
00755     chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
00756     if (!chartype)
00757     {
00758         WARN("Out of memory\n");
00759         return FALSE;
00760     }
00761 
00762     baselevel = s->uBidiLevel;
00763 
00764     classify(lpString, chartype, uCount, c);
00765 
00766     for (j = 0; j < uCount; ++j)
00767         switch(chartype[j])
00768         {
00769             case B:
00770             case S:
00771             case WS:
00772             case ON: chartype[j] = N;
00773             default: continue;
00774         }
00775 
00776     /* resolve explicit */
00777     resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
00778 
00779     /* resolve weak */
00780     resolveWeak(baselevel, chartype, lpOutLevels, uCount);
00781 
00782     /* resolve neutrals */
00783     resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
00784 
00785     /* resolveImplicit */
00786     resolveImplicit(chartype, lpOutLevels, uCount);
00787 
00788     HeapFree(GetProcessHeap(), 0, chartype);
00789     return TRUE;
00790 }
00791 
00792 /* reverse cch indexes */
00793 static void reverse(int *pidx, int cch)
00794 {
00795     int temp;
00796     int ich = 0;
00797     for (; ich < --cch; ich++)
00798     {
00799         temp = pidx[ich];
00800         pidx[ich] = pidx[cch];
00801         pidx[cch] = temp;
00802     }
00803 }
00804 
00805 
00806 /*------------------------------------------------------------------------
00807     Functions: reorder/reorderLevel
00808 
00809     Recursively reorders the display string
00810     "From the highest level down, reverse all characters at that level and
00811     higher, down to the lowest odd level"
00812 
00813     Implements rule L2 of the Unicode bidi Algorithm.
00814 
00815     Input: Array of embedding levels
00816            Character count
00817            Flag enabling reversal (set to false by initial caller)
00818 
00819     In/Out: Text to reorder
00820 
00821     Note: levels may exceed 15 resp. 61 on input.
00822 
00823     Rule L3 - reorder combining marks is not implemented here
00824     Rule L4 - glyph mirroring is implemented as a display option below
00825 
00826     Note: this should be applied a line at a time
00827 -------------------------------------------------------------------------*/
00828 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
00829 {
00830     int ich = 0;
00831 
00832     /* true as soon as first odd level encountered */
00833     fReverse = fReverse || odd(level);
00834 
00835     for (; ich < cch; ich++)
00836     {
00837         if (plevel[ich] < level)
00838         {
00839             break;
00840         }
00841         else if (plevel[ich] > level)
00842         {
00843             ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
00844                 cch - ich, fReverse) - 1;
00845         }
00846     }
00847     if (fReverse)
00848     {
00849         reverse(pIndexs, ich);
00850     }
00851     return ich;
00852 }
00853 
00854 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
00855 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
00856 {
00857     int ich = 0;
00858     int newlevel = -1;
00859 
00860     /* true as soon as first odd level encountered */
00861     fReverse = fReverse || odd(level);
00862 
00863     for (; ich < cch; ich++)
00864     {
00865         if (plevel[ich] < level)
00866             break;
00867         else if (plevel[ich] > level)
00868             newlevel = ich;
00869     }
00870     if (fReverse)
00871     {
00872         reverse(pIndexs, ich);
00873     }
00874 
00875     if (newlevel > 1)
00876     {
00877         ich = 0;
00878         for (; ich < cch; ich++)
00879             if (plevel[ich] > level)
00880                 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
00881                 cch - ich, fReverse) - 1;
00882     }
00883 
00884     return ich;
00885 }
00886 
00887 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
00888                       WORD* lpStrength)
00889 {
00890     int i;
00891     classify(lpString, lpStrength, uCount, c);
00892 
00893     for ( i = 0; i < uCount; i++)
00894     {
00895         switch(lpStrength[i])
00896         {
00897             case L:
00898             case LRE:
00899             case LRO:
00900             case R:
00901             case AL:
00902             case RLE:
00903             case RLO:
00904                 lpStrength[i] = BIDI_STRONG;
00905                 break;
00906             case PDF:
00907             case EN:
00908             case ES:
00909             case ET:
00910             case AN:
00911             case CS:
00912             case BN:
00913                 lpStrength[i] = BIDI_WEAK;
00914                 break;
00915             case B:
00916             case S:
00917             case WS:
00918             case ON:
00919             default: /* Neutrals and NSM */
00920                 lpStrength[i] = BIDI_NEUTRAL;
00921         }
00922     }
00923     return TRUE;
00924 }

Generated on Sat May 26 2012 04:25:18 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.