Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenbidi.c
Go to the documentation of this file.
00001 /* 00002 * Uniscribe BiDirectional handling 00003 * 00004 * Copyright 2003 Shachar Shemesh 00005 * Copyright 2007 Maarten Lankhorst 00006 * Copyright 2010 CodeWeavers, Aric Stewart 00007 * 00008 * This library is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * This library is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with this library; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00021 * 00022 * Code derived from the modified reference implementation 00023 * that was found in revision 17 of http://unicode.org/reports/tr9/ 00024 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" 00025 * 00026 * -- Copyright (C) 1999-2005, ASMUS, Inc. 00027 * 00028 * Permission is hereby granted, free of charge, to any person obtaining a 00029 * copy of the Unicode data files and any associated documentation (the 00030 * "Data Files") or Unicode software and any associated documentation (the 00031 * "Software") to deal in the Data Files or Software without restriction, 00032 * including without limitation the rights to use, copy, modify, merge, 00033 * publish, distribute, and/or sell copies of the Data Files or Software, 00034 * and to permit persons to whom the Data Files or Software are furnished 00035 * to do so, provided that (a) the above copyright notice(s) and this 00036 * permission notice appear with all copies of the Data Files or Software, 00037 * (b) both the above copyright notice(s) and this permission notice appear 00038 * in associated documentation, and (c) there is clear notice in each 00039 * modified Data File or in the Software as well as in the documentation 00040 * associated with the Data File(s) or Software that the data or software 00041 * has been modified. 00042 */ 00043 00044 #include "config.h" 00045 00046 #include <stdarg.h> 00047 #include "windef.h" 00048 #include "winbase.h" 00049 #include "wingdi.h" 00050 #include "winnls.h" 00051 #include "usp10.h" 00052 #include "wine/unicode.h" 00053 #include "wine/debug.h" 00054 00055 #include "usp10_internal.h" 00056 00057 WINE_DEFAULT_DEBUG_CHANNEL(bidi); 00058 00059 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) 00060 #define MAX_LEVEL 61 00061 00062 /* HELPER FUNCTIONS AND DECLARATIONS */ 00063 00064 /*------------------------------------------------------------------------ 00065 Bidirectional Character Types 00066 00067 as defined by the Unicode Bidirectional Algorithm Table 3-7. 00068 00069 Note: 00070 00071 The list of bidirectional character types here is not grouped the 00072 same way as the table 3-7, since the numberic values for the types 00073 are chosen to keep the state and action tables compact. 00074 ------------------------------------------------------------------------*/ 00075 enum directions 00076 { 00077 /* input types */ 00078 /* ON MUST be zero, code relies on ON = N = 0 */ 00079 ON = 0, /* Other Neutral */ 00080 L, /* Left Letter */ 00081 R, /* Right Letter */ 00082 AN, /* Arabic Number */ 00083 EN, /* European Number */ 00084 AL, /* Arabic Letter (Right-to-left) */ 00085 NSM, /* Non-spacing Mark */ 00086 CS, /* Common Separator */ 00087 ES, /* European Separator */ 00088 ET, /* European Terminator (post/prefix e.g. $ and %) */ 00089 00090 /* resolved types */ 00091 BN, /* Boundary neutral (type of RLE etc after explicit levels) */ 00092 00093 /* input types, */ 00094 S, /* Segment Separator (TAB) // used only in L1 */ 00095 WS, /* White space // used only in L1 */ 00096 B, /* Paragraph Separator (aka as PS) */ 00097 00098 /* types for explicit controls */ 00099 RLO, /* these are used only in X1-X9 */ 00100 RLE, 00101 LRO, 00102 LRE, 00103 PDF, 00104 00105 /* resolved types, also resolved directions */ 00106 N = ON, /* alias, where ON, WS and S are treated the same */ 00107 }; 00108 00109 /* HELPER FUNCTIONS */ 00110 00111 /* Convert the libwine information to the direction enum */ 00112 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c) 00113 { 00114 static const enum directions dir_map[16] = 00115 { 00116 L, /* unassigned defaults to L */ 00117 L, 00118 R, 00119 EN, 00120 ES, 00121 ET, 00122 AN, 00123 CS, 00124 B, 00125 S, 00126 WS, 00127 ON, 00128 AL, 00129 NSM, 00130 BN, 00131 PDF /* also LRE, LRO, RLE, RLO */ 00132 }; 00133 00134 unsigned i; 00135 00136 for (i = 0; i < uCount; ++i) 00137 { 00138 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12]; 00139 switch (chartype[i]) 00140 { 00141 case ES: 00142 if (!c->fLegacyBidiClass) break; 00143 switch (lpString[i]) 00144 { 00145 case '-': 00146 case '+': chartype[i] = N; break; 00147 case '/': chartype[i] = CS; break; 00148 } 00149 break; 00150 case PDF: 00151 switch (lpString[i]) 00152 { 00153 case 0x202A: chartype[i] = LRE; break; 00154 case 0x202B: chartype[i] = RLE; break; 00155 case 0x202C: chartype[i] = PDF; break; 00156 case 0x202D: chartype[i] = LRO; break; 00157 case 0x202E: chartype[i] = RLO; break; 00158 } 00159 break; 00160 } 00161 } 00162 } 00163 00164 /* Set a run of cval values at locations all prior to, but not including */ 00165 /* iStart, to the new value nval. */ 00166 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval) 00167 { 00168 int i = iStart - 1; 00169 for (; i >= iStart - cval; i--) 00170 { 00171 pval[i] = nval; 00172 } 00173 } 00174 00175 /* RESOLVE EXPLICIT */ 00176 00177 static WORD GreaterEven(int i) 00178 { 00179 return odd(i) ? i + 1 : i + 2; 00180 } 00181 00182 static WORD GreaterOdd(int i) 00183 { 00184 return odd(i) ? i + 2 : i + 1; 00185 } 00186 00187 static WORD EmbeddingDirection(int level) 00188 { 00189 return odd(level) ? R : L; 00190 } 00191 00192 /*------------------------------------------------------------------------ 00193 Function: resolveExplicit 00194 00195 Recursively resolves explicit embedding levels and overrides. 00196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm. 00197 00198 Input: Base embedding level and direction 00199 Character count 00200 00201 Output: Array of embedding levels 00202 00203 In/Out: Array of direction classes 00204 00205 00206 Note: The function uses two simple counters to keep track of 00207 matching explicit codes and PDF. Use the default argument for 00208 the outermost call. The nesting counter counts the recursion 00209 depth and not the embedding level. 00210 ------------------------------------------------------------------------*/ 00211 00212 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest) 00213 { 00214 /* always called with a valid nesting level 00215 nesting levels are != embedding levels */ 00216 int nLastValid = nNest; 00217 int ich = 0; 00218 00219 /* check input values */ 00220 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL); 00221 00222 /* process the text */ 00223 for (; ich < cch; ich++) 00224 { 00225 WORD cls = pcls[ich]; 00226 switch (cls) 00227 { 00228 case LRO: 00229 case LRE: 00230 nNest++; 00231 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0)) 00232 { 00233 plevel[ich] = GreaterEven(level); 00234 pcls[ich] = BN; 00235 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L), 00236 &pcls[ich+1], &plevel[ich+1], 00237 cch - (ich+1), nNest); 00238 nNest--; 00239 continue; 00240 } 00241 cls = pcls[ich] = BN; 00242 break; 00243 00244 case RLO: 00245 case RLE: 00246 nNest++; 00247 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0)) 00248 { 00249 plevel[ich] = GreaterOdd(level); 00250 pcls[ich] = BN; 00251 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R), 00252 &pcls[ich+1], &plevel[ich+1], 00253 cch - (ich+1), nNest); 00254 nNest--; 00255 continue; 00256 } 00257 cls = pcls[ich] = BN; 00258 break; 00259 00260 case PDF: 00261 cls = pcls[ich] = BN; 00262 if (nNest) 00263 { 00264 if (nLastValid < nNest) 00265 { 00266 nNest--; 00267 } 00268 else 00269 { 00270 cch = ich; /* break the loop, but complete body */ 00271 } 00272 } 00273 } 00274 00275 /* Apply the override */ 00276 if (dir != N) 00277 { 00278 cls = dir; 00279 } 00280 plevel[ich] = level; 00281 if (pcls[ich] != BN) 00282 pcls[ich] = cls; 00283 } 00284 00285 return ich; 00286 } 00287 00288 /* RESOLVE WEAK TYPES */ 00289 00290 enum states /* possible states */ 00291 { 00292 xa, /* Arabic letter */ 00293 xr, /* right letter */ 00294 xl, /* left letter */ 00295 00296 ao, /* Arabic lett. foll by ON */ 00297 ro, /* right lett. foll by ON */ 00298 lo, /* left lett. foll by ON */ 00299 00300 rt, /* ET following R */ 00301 lt, /* ET following L */ 00302 00303 cn, /* EN, AN following AL */ 00304 ra, /* Arabic number foll R */ 00305 re, /* European number foll R */ 00306 la, /* Arabic number foll L */ 00307 le, /* European number foll L */ 00308 00309 ac, /* CS following cn */ 00310 rc, /* CS following ra */ 00311 rs, /* CS,ES following re */ 00312 lc, /* CS following la */ 00313 ls, /* CS,ES following le */ 00314 00315 ret, /* ET following re */ 00316 let, /* ET following le */ 00317 } ; 00318 00319 static const int stateWeak[][10] = 00320 { 00321 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */ 00322 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */ 00323 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */ 00324 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */ 00325 00326 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* Arabic lett. foll by ON*/ 00327 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */ 00328 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */ 00329 00330 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */ 00331 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */ 00332 00333 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */ 00334 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* Arabic number foll R */ 00335 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* European number foll R */ 00336 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* Arabic number foll L */ 00337 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* European number foll L */ 00338 00339 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */ 00340 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */ 00341 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */ 00342 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */ 00343 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */ 00344 00345 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */ 00346 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */ 00347 }; 00348 00349 enum actions /* possible actions */ 00350 { 00351 /* primitives */ 00352 IX = 0x100, /* increment */ 00353 XX = 0xF, /* no-op */ 00354 00355 /* actions */ 00356 xxx = (XX << 4) + XX, /* no-op */ 00357 xIx = IX + xxx, /* increment run */ 00358 xxN = (XX << 4) + ON, /* set current to N */ 00359 xxE = (XX << 4) + EN, /* set current to EN */ 00360 xxA = (XX << 4) + AN, /* set current to AN */ 00361 xxR = (XX << 4) + R, /* set current to R */ 00362 xxL = (XX << 4) + L, /* set current to L */ 00363 Nxx = (ON << 4) + 0xF, /* set run to neutral */ 00364 Axx = (AN << 4) + 0xF, /* set run to AN */ 00365 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */ 00366 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */ 00367 NxN = (ON << 4) + ON, /* set run to N, set current to N */ 00368 NxR = (ON << 4) + R, /* set run to N, set current to R */ 00369 NxE = (ON << 4) + EN, /* set run to N, set current to EN */ 00370 00371 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */ 00372 NxL = (ON << 4) + L, /* set run to N, set current to L */ 00373 LxL = (L << 4) + L, /* set run to L, set current to L */ 00374 } ; 00375 00376 static const int actionWeak[][10] = 00377 { 00378 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */ 00379 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */ 00380 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */ 00381 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */ 00382 00383 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* Arabic lett. foll by ON */ 00384 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */ 00385 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */ 00386 00387 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */ 00388 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */ 00389 00390 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */ 00391 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll R */ 00392 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* European number foll R */ 00393 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll L */ 00394 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* European number foll L */ 00395 00396 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */ 00397 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */ 00398 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */ 00399 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */ 00400 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */ 00401 00402 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */ 00403 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */ 00404 }; 00405 00406 static int GetDeferredType(int action) 00407 { 00408 return (action >> 4) & 0xF; 00409 } 00410 00411 static int GetResolvedType(int action) 00412 { 00413 return action & 0xF; 00414 } 00415 00416 /* Note on action table: 00417 00418 States can be of two kinds: 00419 - Immediate Resolution State, where each input token 00420 is resolved as soon as it is seen. These states have 00421 only single action codes (xxN) or the no-op (xxx) 00422 for static input tokens. 00423 - Deferred Resolution State, where input tokens either 00424 either extend the run (xIx) or resolve its Type (e.g. Nxx). 00425 00426 Input classes are of three kinds 00427 - Static Input Token, where the class of the token remains 00428 unchanged on output (AN, L, N, R) 00429 - Replaced Input Token, where the class of the token is 00430 always replaced on output (AL, BN, NSM, CS, ES, ET) 00431 - Conditional Input Token, where the class of the token is 00432 changed on output in some, but not all, cases (EN) 00433 00434 Where tokens are subject to change, a double action 00435 (e.g. NxA, or NxN) is _required_ after deferred states, 00436 resolving both the deferred state and changing the current token. 00437 */ 00438 00439 /*------------------------------------------------------------------------ 00440 Function: resolveWeak 00441 00442 Resolves the directionality of numeric and other weak character types 00443 00444 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. 00445 00446 Input: Array of embedding levels 00447 Character count 00448 00449 In/Out: Array of directional classes 00450 00451 Note: On input only these directional classes are expected 00452 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS, 00453 ------------------------------------------------------------------------*/ 00454 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch) 00455 { 00456 int state = odd(baselevel) ? xr : xl; 00457 int cls; 00458 00459 int level = baselevel; 00460 int action, clsRun, clsNew; 00461 int cchRun = 0; 00462 int ich = 0; 00463 00464 for (; ich < cch; ich++) 00465 { 00466 /* ignore boundary neutrals */ 00467 if (pcls[ich] == BN) 00468 { 00469 /* must flatten levels unless at a level change; */ 00470 plevel[ich] = level; 00471 00472 /* lookahead for level changes */ 00473 if (ich + 1 == cch && level != baselevel) 00474 { 00475 /* have to fixup last BN before end of the loop, since 00476 * its fix-upped value will be needed below the assert */ 00477 pcls[ich] = EmbeddingDirection(level); 00478 } 00479 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN) 00480 { 00481 /* fixup LAST BN in front / after a level run to make 00482 * it act like the SOR/EOR in rule X10 */ 00483 int newlevel = plevel[ich+1]; 00484 if (level > newlevel) { 00485 newlevel = level; 00486 } 00487 plevel[ich] = newlevel; 00488 00489 /* must match assigned level */ 00490 pcls[ich] = EmbeddingDirection(newlevel); 00491 level = plevel[ich+1]; 00492 } 00493 else 00494 { 00495 /* don't interrupt runs */ 00496 if (cchRun) 00497 { 00498 cchRun++; 00499 } 00500 continue; 00501 } 00502 } 00503 00504 ASSERT(pcls[ich] <= BN); 00505 cls = pcls[ich]; 00506 00507 action = actionWeak[state][cls]; 00508 00509 /* resolve the directionality for deferred runs */ 00510 clsRun = GetDeferredType(action); 00511 if (clsRun != XX) 00512 { 00513 SetDeferredRun(pcls, cchRun, ich, clsRun); 00514 cchRun = 0; 00515 } 00516 00517 /* resolve the directionality class at the current location */ 00518 clsNew = GetResolvedType(action); 00519 if (clsNew != XX) 00520 pcls[ich] = clsNew; 00521 00522 /* increment a deferred run */ 00523 if (IX & action) 00524 cchRun++; 00525 00526 state = stateWeak[state][cls]; 00527 } 00528 00529 /* resolve any deferred runs 00530 * use the direction of the current level to emulate PDF */ 00531 cls = EmbeddingDirection(level); 00532 00533 /* resolve the directionality for deferred runs */ 00534 clsRun = GetDeferredType(actionWeak[state][cls]); 00535 if (clsRun != XX) 00536 SetDeferredRun(pcls, cchRun, ich, clsRun); 00537 } 00538 00539 /* RESOLVE NEUTRAL TYPES */ 00540 00541 /* action values */ 00542 enum neutralactions 00543 { 00544 /* action to resolve previous input */ 00545 nL = L, /* resolve EN to L */ 00546 En = 3 << 4, /* resolve neutrals run to embedding level direction */ 00547 Rn = R << 4, /* resolve neutrals run to strong right */ 00548 Ln = L << 4, /* resolved neutrals run to strong left */ 00549 In = (1<<8), /* increment count of deferred neutrals */ 00550 LnL = (1<<4)+L, /* set run and EN to L */ 00551 }; 00552 00553 static int GetDeferredNeutrals(int action, int level) 00554 { 00555 action = (action >> 4) & 0xF; 00556 if (action == (En >> 4)) 00557 return EmbeddingDirection(level); 00558 else 00559 return action; 00560 } 00561 00562 static int GetResolvedNeutrals(int action) 00563 { 00564 action = action & 0xF; 00565 if (action == In) 00566 return 0; 00567 else 00568 return action; 00569 } 00570 00571 /* state values */ 00572 enum resolvestates 00573 { 00574 /* new temporary class */ 00575 r, /* R and characters resolved to R */ 00576 l, /* L and characters resolved to L */ 00577 rn, /* N preceded by right */ 00578 ln, /* N preceded by left */ 00579 a, /* AN preceded by left (the abbreviation 'la' is used up above) */ 00580 na, /* N preceded by a */ 00581 } ; 00582 00583 00584 /*------------------------------------------------------------------------ 00585 Notes: 00586 00587 By rule W7, whenever a EN is 'dominated' by an L (including start of 00588 run with embedding direction = L) it is resolved to, and further treated 00589 as L. 00590 00591 This leads to the need for 'a' and 'na' states. 00592 ------------------------------------------------------------------------*/ 00593 00594 static const int actionNeutrals[][5] = 00595 { 00596 /* N, L, R, AN, EN = cls */ 00597 { In, 0, 0, 0, 0 }, /* r right */ 00598 { In, 0, 0, 0, L }, /* l left */ 00599 00600 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */ 00601 { In, Ln, En, En, LnL}, /* ln N preceded by left */ 00602 00603 { In, 0, 0, 0, L }, /* a AN preceded by left */ 00604 { In, En, Rn, Rn, En }, /* na N preceded by a */ 00605 } ; 00606 00607 static const int stateNeutrals[][5] = 00608 { 00609 /* N, L, R, AN, EN */ 00610 { rn, l, r, r, r }, /* r right */ 00611 { ln, l, r, a, l }, /* l left */ 00612 00613 { rn, l, r, r, r }, /* rn N preceded by right */ 00614 { ln, l, r, a, l }, /* ln N preceded by left */ 00615 00616 { na, l, r, a, l }, /* a AN preceded by left */ 00617 { na, l, r, a, l }, /* na N preceded by la */ 00618 } ; 00619 00620 /*------------------------------------------------------------------------ 00621 Function: resolveNeutrals 00622 00623 Resolves the directionality of neutral character types. 00624 00625 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm. 00626 00627 Input: Array of embedding levels 00628 Character count 00629 Baselevel 00630 00631 In/Out: Array of directional classes 00632 00633 Note: On input only these directional classes are expected 00634 R, L, N, AN, EN and BN 00635 00636 W8 resolves a number of ENs to L 00637 ------------------------------------------------------------------------*/ 00638 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch) 00639 { 00640 /* the state at the start of text depends on the base level */ 00641 int state = odd(baselevel) ? r : l; 00642 int cls; 00643 00644 int cchRun = 0; 00645 int level = baselevel; 00646 00647 int action, clsRun, clsNew; 00648 int ich = 0; 00649 for (; ich < cch; ich++) 00650 { 00651 /* ignore boundary neutrals */ 00652 if (pcls[ich] == BN) 00653 { 00654 /* include in the count for a deferred run */ 00655 if (cchRun) 00656 cchRun++; 00657 00658 /* skip any further processing */ 00659 continue; 00660 } 00661 00662 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */ 00663 cls = pcls[ich]; 00664 00665 action = actionNeutrals[state][cls]; 00666 00667 /* resolve the directionality for deferred runs */ 00668 clsRun = GetDeferredNeutrals(action, level); 00669 if (clsRun != N) 00670 { 00671 SetDeferredRun(pcls, cchRun, ich, clsRun); 00672 cchRun = 0; 00673 } 00674 00675 /* resolve the directionality class at the current location */ 00676 clsNew = GetResolvedNeutrals(action); 00677 if (clsNew != N) 00678 pcls[ich] = clsNew; 00679 00680 if (In & action) 00681 cchRun++; 00682 00683 state = stateNeutrals[state][cls]; 00684 level = plevel[ich]; 00685 } 00686 00687 /* resolve any deferred runs */ 00688 cls = EmbeddingDirection(level); /* eor has type of current level */ 00689 00690 /* resolve the directionality for deferred runs */ 00691 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level); 00692 if (clsRun != N) 00693 SetDeferredRun(pcls, cchRun, ich, clsRun); 00694 } 00695 00696 /* RESOLVE IMPLICIT */ 00697 00698 /*------------------------------------------------------------------------ 00699 Function: resolveImplicit 00700 00701 Recursively resolves implicit embedding levels. 00702 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. 00703 00704 Input: Array of direction classes 00705 Character count 00706 Base level 00707 00708 In/Out: Array of embedding levels 00709 00710 Note: levels may exceed 15 on output. 00711 Accepted subset of direction classes 00712 R, L, AN, EN 00713 ------------------------------------------------------------------------*/ 00714 static const WORD addLevel[][4] = 00715 { 00716 /* L, R, AN, EN */ 00717 /* even */ { 0, 1, 2, 2, }, 00718 /* odd */ { 1, 0, 1, 1, } 00719 00720 }; 00721 00722 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch) 00723 { 00724 int ich = 0; 00725 for (; ich < cch; ich++) 00726 { 00727 /* cannot resolve bn here, since some bn were resolved to strong 00728 * types in resolveWeak. To remove these we need the original 00729 * types, which are available again in resolveWhiteSpace */ 00730 if (pcls[ich] == BN) 00731 { 00732 continue; 00733 } 00734 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */ 00735 ASSERT(pcls[ich] < 5); /* "Out of range." */ 00736 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1]; 00737 } 00738 } 00739 00740 /************************************************************* 00741 * BIDI_DeterminLevels 00742 */ 00743 BOOL BIDI_DetermineLevels( 00744 LPCWSTR lpString, /* [in] The string for which information is to be returned */ 00745 INT uCount, /* [in] Number of WCHARs in string. */ 00746 const SCRIPT_STATE *s, 00747 const SCRIPT_CONTROL *c, 00748 WORD *lpOutLevels /* [out] final string levels */ 00749 ) 00750 { 00751 WORD *chartype; 00752 unsigned baselevel = 0,j; 00753 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount); 00754 00755 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD)); 00756 if (!chartype) 00757 { 00758 WARN("Out of memory\n"); 00759 return FALSE; 00760 } 00761 00762 baselevel = s->uBidiLevel; 00763 00764 classify(lpString, chartype, uCount, c); 00765 00766 for (j = 0; j < uCount; ++j) 00767 switch(chartype[j]) 00768 { 00769 case B: 00770 case S: 00771 case WS: 00772 case ON: chartype[j] = N; 00773 default: continue; 00774 } 00775 00776 /* resolve explicit */ 00777 resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0); 00778 00779 /* resolve weak */ 00780 resolveWeak(baselevel, chartype, lpOutLevels, uCount); 00781 00782 /* resolve neutrals */ 00783 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount); 00784 00785 /* resolveImplicit */ 00786 resolveImplicit(chartype, lpOutLevels, uCount); 00787 00788 HeapFree(GetProcessHeap(), 0, chartype); 00789 return TRUE; 00790 } 00791 00792 /* reverse cch indexes */ 00793 static void reverse(int *pidx, int cch) 00794 { 00795 int temp; 00796 int ich = 0; 00797 for (; ich < --cch; ich++) 00798 { 00799 temp = pidx[ich]; 00800 pidx[ich] = pidx[cch]; 00801 pidx[cch] = temp; 00802 } 00803 } 00804 00805 00806 /*------------------------------------------------------------------------ 00807 Functions: reorder/reorderLevel 00808 00809 Recursively reorders the display string 00810 "From the highest level down, reverse all characters at that level and 00811 higher, down to the lowest odd level" 00812 00813 Implements rule L2 of the Unicode bidi Algorithm. 00814 00815 Input: Array of embedding levels 00816 Character count 00817 Flag enabling reversal (set to false by initial caller) 00818 00819 In/Out: Text to reorder 00820 00821 Note: levels may exceed 15 resp. 61 on input. 00822 00823 Rule L3 - reorder combining marks is not implemented here 00824 Rule L4 - glyph mirroring is implemented as a display option below 00825 00826 Note: this should be applied a line at a time 00827 -------------------------------------------------------------------------*/ 00828 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 00829 { 00830 int ich = 0; 00831 00832 /* true as soon as first odd level encountered */ 00833 fReverse = fReverse || odd(level); 00834 00835 for (; ich < cch; ich++) 00836 { 00837 if (plevel[ich] < level) 00838 { 00839 break; 00840 } 00841 else if (plevel[ich] > level) 00842 { 00843 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich, 00844 cch - ich, fReverse) - 1; 00845 } 00846 } 00847 if (fReverse) 00848 { 00849 reverse(pIndexs, ich); 00850 } 00851 return ich; 00852 } 00853 00854 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */ 00855 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 00856 { 00857 int ich = 0; 00858 int newlevel = -1; 00859 00860 /* true as soon as first odd level encountered */ 00861 fReverse = fReverse || odd(level); 00862 00863 for (; ich < cch; ich++) 00864 { 00865 if (plevel[ich] < level) 00866 break; 00867 else if (plevel[ich] > level) 00868 newlevel = ich; 00869 } 00870 if (fReverse) 00871 { 00872 reverse(pIndexs, ich); 00873 } 00874 00875 if (newlevel > 1) 00876 { 00877 ich = 0; 00878 for (; ich < cch; ich++) 00879 if (plevel[ich] > level) 00880 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich, 00881 cch - ich, fReverse) - 1; 00882 } 00883 00884 return ich; 00885 } 00886 00887 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c, 00888 WORD* lpStrength) 00889 { 00890 int i; 00891 classify(lpString, lpStrength, uCount, c); 00892 00893 for ( i = 0; i < uCount; i++) 00894 { 00895 switch(lpStrength[i]) 00896 { 00897 case L: 00898 case LRE: 00899 case LRO: 00900 case R: 00901 case AL: 00902 case RLE: 00903 case RLO: 00904 lpStrength[i] = BIDI_STRONG; 00905 break; 00906 case PDF: 00907 case EN: 00908 case ES: 00909 case ET: 00910 case AN: 00911 case CS: 00912 case BN: 00913 lpStrength[i] = BIDI_WEAK; 00914 break; 00915 case B: 00916 case S: 00917 case WS: 00918 case ON: 00919 default: /* Neutrals and NSM */ 00920 lpStrength[i] = BIDI_NEUTRAL; 00921 } 00922 } 00923 return TRUE; 00924 } Generated on Sat May 26 2012 04:25:18 for ReactOS by
1.7.6.1
|