ReactOS  0.4.13-dev-563-g0561610
indic.c
Go to the documentation of this file.
1 /*
2  * Implementation of Indic Syllables for the Uniscribe Script Processor
3  *
4  * Copyright 2011 CodeWeavers, Aric Stewart
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  */
21 #include "config.h"
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
33 
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
37 
39 
40 static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f)
41 {
42  int i;
43  if (TRACE_ON(uniscribe))
44  {
45  for (i = 0; i < char_count; ++i)
46  {
47  switch (f(str[i]))
48  {
49  case lex_Consonant: TRACE("C"); break;
50  case lex_Ra: TRACE("Ra"); break;
51  case lex_Vowel: TRACE("V"); break;
52  case lex_Nukta: TRACE("N"); break;
53  case lex_Halant: TRACE("H"); break;
54  case lex_ZWNJ: TRACE("Zwnj"); break;
55  case lex_ZWJ: TRACE("Zwj"); break;
56  case lex_Matra_post: TRACE("Mp");break;
57  case lex_Matra_above: TRACE("Ma");break;
58  case lex_Matra_below: TRACE("Mb");break;
59  case lex_Matra_pre: TRACE("Mm");break;
60  case lex_Modifier: TRACE("Sm"); break;
61  case lex_Vedic: TRACE("Vd"); break;
62  case lex_Anudatta: TRACE("A"); break;
63  case lex_Composed_Vowel: TRACE("t"); break;
64  default:
65  TRACE("X"); break;
66  }
67  }
68  TRACE("\n");
69  }
70 }
71 
72 static inline BOOL is_matra( int type )
73 {
74  return (type == lex_Matra_above || type == lex_Matra_below ||
77 }
78 
79 static inline BOOL is_joiner( int type )
80 {
81  return (type == lex_ZWJ || type == lex_ZWNJ);
82 }
83 
84 static int consonant_header(const WCHAR *input, unsigned int cChar,
85  unsigned int start, unsigned int next, lexical_function lex)
86 {
87  if (!is_consonant( lex(input[next]) )) return -1;
88  next++;
89  if ((next < cChar) && lex(input[next]) == lex_Nukta)
90  next++;
91  if ((next < cChar) && lex(input[next])==lex_Halant)
92  {
93  next++;
94  if((next < cChar) && is_joiner( lex(input[next]) ))
95  next++;
96  if ((next < cChar) && is_consonant( lex(input[next]) ))
97  return next;
98  }
99  else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
100  {
101  next+=2;
102  if ((next < cChar) && is_consonant( lex(input[next]) ))
103  return next;
104  }
105  return -1;
106 }
107 
108 static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar,
109  unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
110 {
111  int check;
112  int headers = 0;
113  do
114  {
115  check = consonant_header(input,cChar,start,next,lex);
116  if (check != -1)
117  {
118  next = check;
119  headers++;
120  }
121  } while (check != -1);
122  if (headers || is_consonant( lex(input[next]) ))
123  {
124  *main = next;
125  next++;
126  }
127  else
128  return -1;
129  if ((next < cChar) && lex(input[next]) == lex_Nukta)
130  next++;
131  if ((next < cChar) && lex(input[next]) == lex_Anudatta)
132  next++;
133 
134  if ((next < cChar) && lex(input[next]) == lex_Halant)
135  {
136  next++;
137  if((next < cChar) && is_joiner( lex(input[next]) ))
138  next++;
139  }
140  else if (next < cChar)
141  {
142  while((next < cChar) && is_matra( lex(input[next]) ))
143  next++;
144  if ((next < cChar) && lex(input[next]) == lex_Nukta)
145  next++;
146  if ((next < cChar) && lex(input[next]) == lex_Halant)
147  next++;
148  }
149  if ((next < cChar) && lex(input[next]) == lex_Modifier)
150  next++;
151  if ((next < cChar) && lex(input[next]) == lex_Vedic)
152  next++;
153  return next;
154 }
155 
156 static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar,
157  unsigned int start, unsigned int next, lexical_function lex)
158 {
159  if ((next < cChar) && lex(input[next]) == lex_Nukta)
160  next++;
161  if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
162  next+=3;
163  else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
164  next+=2;
165  else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
166  next+=2;
167 
168  if ((next < cChar) && is_matra( lex(input[next]) ))
169  {
170  while((next < cChar) && is_matra( lex(input[next]) ))
171  next++;
172  if ((next < cChar) && lex(input[next]) == lex_Nukta)
173  next++;
174  if ((next < cChar) && lex(input[next]) == lex_Halant)
175  next++;
176  }
177 
178  if ((next < cChar) && lex(input[next]) == lex_Modifier)
179  next++;
180  if ((next < cChar) && lex(input[next]) == lex_Vedic)
181  next++;
182  return next;
183 }
184 
185 static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar,
186  unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
187 {
188  if (lex(input[next])==lex_Vowel)
189  {
190  *main = next;
191  return parse_vowel_syllable(input, cChar, start, next+1, lex);
192  }
193  else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
194  {
195  *main = next+2;
196  return parse_vowel_syllable(input, cChar, start, next+3, lex);
197  }
198 
199  else if (start == next && lex(input[next])==lex_NBSP)
200  {
201  *main = next;
202  return parse_vowel_syllable(input, cChar, start, next+1, lex);
203  }
204  else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
205  {
206  *main = next+2;
207  return parse_vowel_syllable(input, cChar, start, next+3, lex);
208  }
209 
210  return parse_consonant_syllable(input, cChar, start, main, next, lex);
211 }
212 
214  const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
215 {
216  if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
217  {
218  if (modern)
219  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
220  else
221  {
222  WCHAR cc[2];
223  cc[0] = pwChar[s->base];
224  cc[1] = pwChar[s->base-1];
225  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
226  }
227  }
228  return FALSE;
229 }
230 
232  const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
233 {
234  if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
235  {
236  if (modern)
237  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
238  else
239  {
240  WCHAR cc[2];
241  cc[0] = pwChar[s->base];
242  cc[1] = pwChar[s->base-1];
243  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
244  }
245  }
246  return FALSE;
247 }
248 
250  const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
251 {
252  if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
253  {
254  if (modern)
255  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
256  else
257  {
258  WCHAR cc[2];
259  cc[0] = pwChar[s->base];
260  cc[1] = pwChar[s->base-1];
261  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
262  }
263  }
264  return FALSE;
265 }
266 
268  const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical)
269 {
270  if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
271  return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
272  return FALSE;
273 }
274 
276  const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern)
277 {
278  int i;
279  BOOL blwf = FALSE;
280  BOOL pref = FALSE;
281 
282  /* remove ralf from consideration */
283  if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
284  {
285  s->ralf = s->start;
286  s->start+=2;
287  }
288 
289  /* try to find a base consonant */
290  if (!is_consonant( lex(input[s->base]) ))
291  {
292  for (i = s->end; i >= s->start; i--)
293  if (is_consonant( lex(input[i]) ))
294  {
295  s->base = i;
296  break;
297  }
298  }
299 
300  while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
301  {
302  if (blwf && s->blwf == -1)
303  s->blwf = s->base - 1;
304  if (pref && s->pref == -1)
305  s->pref = s->base - 1;
306 
307  for (i = s->base-1; i >= s->start; i--)
308  if (is_consonant( lex(input[i]) ))
309  {
310  s->base = i;
311  break;
312  }
313  }
314 
315  if (s->ralf >= 0)
316  s->start = s->ralf;
317 
318  if (s->ralf == s->base)
319  s->ralf = -1;
320 
321  return s->base;
322 }
323 
324 void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar,
325  IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
326 {
327  unsigned int center = 0;
328  int index = 0;
329  int next = 0;
330 
331  *syllable_count = 0;
332 
333  if (!lex)
334  {
335  ERR("Failure to have required functions\n");
336  return;
337  }
338 
339  debug_output_string(input, cChar, lex);
340  while (next != -1)
341  {
342  while((next < cChar) && lex(input[next]) == lex_Generic)
343  next++;
344  index = next;
345  if (next >= cChar)
346  break;
347  next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
348  if (next != -1)
349  {
350  if (*syllable_count)
351  *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
352  else
353  *syllables = heap_alloc(sizeof(**syllables));
354  (*syllables)[*syllable_count].start = index;
355  (*syllables)[*syllable_count].base = center;
356  (*syllables)[*syllable_count].ralf = -1;
357  (*syllables)[*syllable_count].blwf = -1;
358  (*syllables)[*syllable_count].pref = -1;
359  (*syllables)[*syllable_count].end = next-1;
360  FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
361  index = next;
362  *syllable_count = (*syllable_count)+1;
363  }
364  else if (index < cChar)
365  {
366  TRACE("Processing failed at %i\n",index);
367  next = ++index;
368  }
369  }
370  TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
371 }
372 
374  IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
375 {
376  int i;
377 
378  if (!reorder_f)
379  {
380  ERR("Failure to have required functions\n");
381  return;
382  }
383 
384  Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
385  for (i = 0; i < *syllable_count; i++)
386  reorder_f(input, &(*syllables)[i], lex);
387 }
int SHAPE_does_GSUB_feature_apply_to_chars(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *chars, int write_dir, int count, const char *feature)
Definition: shape.c:715
void Indic_ReorderCharacters(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, WCHAR *input, unsigned int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
Definition: indic.c:373
static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
Definition: indic.c:231
int main(int argc, char *argv[])
Definition: atactl.cpp:1685
static int consonant_header(const WCHAR *input, unsigned int cChar, unsigned int start, unsigned int next, lexical_function lex)
Definition: indic.c:84
static HDC
Definition: imagelist.c:92
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
vector< Header * > headers
Definition: sdkparse.cpp:39
void(* reorder_function)(WCHAR *chars, IndicSyllable *syllable, lexical_function lex)
static void * heap_alloc(size_t len)
Definition: appwiz.h:65
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
Definition: indic.c:249
static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern)
Definition: indic.c:275
static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
Definition: indic.c:213
unsigned int BOOL
Definition: ntddk_ex.h:94
void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
Definition: indic.c:324
int(* lexical_function)(WCHAR c)
const WCHAR * str
GLuint index
Definition: glext.h:6031
GLfloat f
Definition: glext.h:7540
#define TRACE(s)
Definition: solgame.cpp:4
#define GetProcessHeap()
Definition: compat.h:395
__wchar_t WCHAR
Definition: xmlstorage.h:180
static BOOL is_matra(int type)
Definition: indic.c:72
void check(CONTEXT *pContext)
Definition: NtContinue.c:61
#define index(s, c)
Definition: various.h:29
HDC hdc
Definition: main.c:9
GLdouble s
Definition: gl.h:2039
static BOOL is_consonant(int type)
static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar, unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
Definition: indic.c:108
static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar, unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
Definition: indic.c:185
WINE_DEFAULT_DEBUG_CHANNEL(uniscribe)
#define ERR(fmt,...)
Definition: debug.h:109
uint32_t cc
Definition: isohybrid.c:75
static unsigned __int64 next
Definition: rand_nt.c:6
GLenum GLenum GLenum input
Definition: glext.h:9031
static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f)
Definition: indic.c:40
static SCRIPT_CACHE * psc
Definition: usp10.c:64
GLuint start
Definition: gl.h:1545
#define f
Definition: ke_i.h:83
#define HeapReAlloc
Definition: compat.h:393
static SCRIPT_CACHE SCRIPT_ANALYSIS * psa
Definition: usp10.c:64
static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical)
Definition: indic.c:267
#define TRACE_ON(x)
Definition: compat.h:65
static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar, unsigned int start, unsigned int next, lexical_function lex)
Definition: indic.c:156
static BOOL is_joiner(int type)
Definition: indic.c:79