Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygencreate_nls.c
Go to the documentation of this file.
00001 /* 00002 * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions. 00003 * Tool for creating NT-like l_intl.nls file for case mapping of unicode 00004 * characters. 00005 * Copyright 2000 Timoshkov Dmitry 00006 * Copyright 2001 Matei Alexandru 00007 * 00008 * Sources of information: 00009 * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html 00010 * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls 00011 */ 00012 #include <windows.h> 00013 #include <stdio.h> 00014 #include <stdlib.h> 00015 #include <malloc.h> 00016 #include <string.h> 00017 #include <ctype.h> 00018 00019 static const WCHAR * const uprtable[256]; 00020 static const WCHAR * const lwrtable[256]; 00021 00022 #define NLSDIR "../../media/nls" 00023 #define LIBDIR "unicode.org/" 00024 00025 typedef struct { 00026 WORD wSize; /* in words 0x000D */ 00027 WORD CodePage; 00028 WORD MaxCharSize; /* 1 or 2 */ 00029 BYTE DefaultChar[MAX_DEFAULTCHAR]; 00030 WCHAR UnicodeDefaultChar; 00031 WCHAR unknown1; 00032 WCHAR unknown2; 00033 BYTE LeadByte[MAX_LEADBYTES]; 00034 } __attribute__((packed)) NLS_FILE_HEADER; 00035 00036 /* 00037 Support for translation from the multiple unicode chars 00038 to the single code page char. 00039 00040 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;; 00041 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;; 00042 2010;HYPHEN;Pd;0;ON;;;;;N;;;;; 00043 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;; 00044 2013;EN DASH;Pd;0;ON;;;;;N;;;;; 00045 2014;EM DASH;Pd;0;ON;;;;;N;;;;; 00046 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;; 00047 */ 00048 00049 /* HYPHEN-MINUS aliases */ 00050 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0}; 00051 00052 static struct { 00053 WCHAR cp_char; 00054 WCHAR *alias; /* must be 0 terminated */ 00055 } u2cp_alias[] = { 00056 /* HYPHEN-MINUS aliases */ 00057 {0x002D, hyphen_aliases} 00058 }; 00059 00060 static void patch_aliases(void *u2cp, CPINFOEXA *cpi) 00061 { 00062 int i, j; 00063 WCHAR *wc, *alias; 00064 BYTE *c; 00065 00066 if(cpi->MaxCharSize == 2) { 00067 wc = (WCHAR *)u2cp; 00068 for(i = 0; i < 65536; i++) { 00069 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { 00070 alias = u2cp_alias[j].alias; 00071 while(*alias) { 00072 if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) { 00073 wc[i] = u2cp_alias[j].cp_char; 00074 } 00075 alias++; 00076 } 00077 } 00078 } 00079 } 00080 else { 00081 c = (BYTE *)u2cp; 00082 for(i = 0; i < 65536; i++) { 00083 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { 00084 alias = u2cp_alias[j].alias; 00085 while(*alias) { 00086 if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) { 00087 c[i] = (BYTE)u2cp_alias[j].cp_char; 00088 } 00089 alias++; 00090 } 00091 } 00092 } 00093 } 00094 } 00095 00096 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table) 00097 { 00098 void *u2cp; 00099 WCHAR *wc; 00100 CHAR *c; 00101 int i; 00102 BOOL ret = TRUE; 00103 00104 u2cp = malloc(cpi->MaxCharSize * 65536); 00105 if(!u2cp) { 00106 printf("Not enough memory for Unicode to Codepage table\n"); 00107 return FALSE; 00108 } 00109 00110 if(cpi->MaxCharSize == 2) { 00111 wc = (WCHAR *)u2cp; 00112 for(i = 0; i < 65536; i++) 00113 wc[i] = *(WCHAR *)cpi->DefaultChar; 00114 00115 for(i = 0; i < 65536; i++) 00116 if (table[i] != '?') 00117 wc[table[i]] = (WCHAR)i; 00118 } 00119 else { 00120 c = (CHAR *)u2cp; 00121 for(i = 0; i < 65536; i++) 00122 c[i] = cpi->DefaultChar[0]; 00123 00124 for(i = 0; i < 256; i++) 00125 if (table[i] != '?') 00126 c[table[i]] = (CHAR)i; 00127 } 00128 00129 patch_aliases(u2cp, cpi); 00130 00131 if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536) 00132 ret = FALSE; 00133 00134 free(u2cp); 00135 00136 return ret; 00137 } 00138 00139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table) 00140 { 00141 WCHAR sub_table[256]; 00142 WORD offset, offsets[256]; 00143 int i, j, range; 00144 00145 memset(offsets, 0, sizeof(offsets)); 00146 00147 offset = 0; 00148 00149 for(i = 0; i < MAX_LEADBYTES; i += 2) { 00150 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { 00151 offset += 256; 00152 offsets[range] = offset; 00153 } 00154 } 00155 00156 if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets)) 00157 return FALSE; 00158 00159 for(i = 0; i < MAX_LEADBYTES; i += 2) { 00160 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { 00161 /*printf("Writing sub table for LeadByte %02X\n", range);*/ 00162 for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) { 00163 sub_table[j - MAKEWORD(0, range)] = table[j]; 00164 } 00165 00166 if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table)) 00167 return FALSE; 00168 } 00169 } 00170 00171 return TRUE; 00172 } 00173 00174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable) 00175 { 00176 FILE *out; 00177 NLS_FILE_HEADER nls; 00178 WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i; 00179 00180 printf("Creating NLS table \"%s\"\n", name); 00181 00182 if(!(out = fopen(name, "wb"))) { 00183 printf("Could not create file \"%s\"\n", name); 00184 return FALSE; 00185 } 00186 00187 memset(&nls, 0, sizeof(nls)); 00188 00189 nls.wSize = sizeof(nls) / sizeof(WORD); 00190 nls.CodePage = cpi->CodePage; 00191 nls.MaxCharSize = cpi->MaxCharSize; 00192 memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR); 00193 nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar; 00194 nls.unknown1 = '?'; 00195 nls.unknown2 = '?'; 00196 memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES); 00197 00198 if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) { 00199 fclose(out); 00200 printf("Could not write to file \"%s\"\n", name); 00201 return FALSE; 00202 } 00203 00204 number_of_lb_ranges = 0; 00205 number_of_lb_subtables = 0; 00206 00207 for(i = 0; i < MAX_LEADBYTES; i += 2) { 00208 if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) { 00209 number_of_lb_ranges++; 00210 number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1; 00211 } 00212 } 00213 00214 /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/ 00215 /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/ 00216 00217 /* Calculate offset to Unicode to CP table in words: 00218 * 1. (256 * sizeof(WORD)) primary CP to Unicode table + 00219 * 2. (WORD) optional OEM glyph table size in words + 00220 * 3. OEM glyph table size in words * sizeof(WORD) + 00221 * 4. (WORD) Number of DBCS LeadByte ranges + 00222 * 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables 00223 * 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables + 00224 * 7. (WORD) Unknown flag 00225 */ 00226 00227 wValue = (256 * sizeof(WORD) + /* 1 */ 00228 sizeof(WORD) + /* 2 */ 00229 ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */ 00230 sizeof(WORD) + /* 4 */ 00231 ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */ 00232 number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */ 00233 sizeof(WORD) /* 7 */ 00234 ) / sizeof(WORD); 00235 00236 /* offset of Unicode to CP table in words */ 00237 fwrite(&wValue, 1, sizeof(wValue), out); 00238 00239 /* primary CP to Unicode table */ 00240 if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { 00241 fclose(out); 00242 printf("Could not write to file \"%s\"\n", name); 00243 return FALSE; 00244 } 00245 00246 /* optional OEM glyph table size in words */ 00247 wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0; 00248 fwrite(&wValue, 1, sizeof(wValue), out); 00249 00250 /* optional OEM to Unicode table */ 00251 if (oemtable) { 00252 if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { 00253 fclose(out); 00254 printf("Could not write to file \"%s\"\n", name); 00255 return FALSE; 00256 } 00257 } 00258 00259 /* Number of DBCS LeadByte ranges */ 00260 fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out); 00261 00262 /* offsets of lead byte sub tables and lead byte sub tables */ 00263 if(number_of_lb_ranges > 0) { 00264 if(!write_lb_ranges(out, cpi, table)) { 00265 fclose(out); 00266 printf("Could not write to file \"%s\"\n", name); 00267 return FALSE; 00268 } 00269 } 00270 00271 /* Unknown flag */ 00272 wValue = 0; 00273 fwrite(&wValue, 1, sizeof(wValue), out); 00274 00275 if(!write_unicode2cp_table(out, cpi, table)) { 00276 fclose(out); 00277 printf("Could not write to file \"%s\"\n", name); 00278 return FALSE; 00279 } 00280 00281 fclose(out); 00282 return TRUE; 00283 } 00284 00285 /* correct the codepage information such as default chars */ 00286 static void patch_codepage_info(CPINFOEXA *cpi) 00287 { 00288 /* currently nothing */ 00289 } 00290 00291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi) 00292 { 00293 char buf[256]; 00294 char *p; 00295 DWORD n, value; 00296 FILE *file; 00297 WCHAR *table; 00298 int lb_ranges, lb_range_started, line; 00299 00300 printf("Loading translation table \"%s\"\n", table_name); 00301 00302 /* Init to default values */ 00303 memset(cpi, 0, sizeof(CPINFOEXA)); 00304 cpi->CodePage = cp; 00305 *(WCHAR *)cpi->DefaultChar = '?'; 00306 cpi->MaxCharSize = 1; 00307 cpi->UnicodeDefaultChar = '?'; 00308 00309 patch_codepage_info(cpi); 00310 00311 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); 00312 if(!table) { 00313 printf("Not enough memory for Codepage to Unicode table\n"); 00314 return NULL; 00315 } 00316 00317 for(n = 0; n < 256; n++) 00318 table[n] = (WCHAR)n; 00319 00320 for(n = 256; n < 65536; n++) 00321 table[n] = cpi->UnicodeDefaultChar; 00322 00323 file = fopen(table_name, "r"); 00324 if(file == NULL) { 00325 free(table); 00326 return NULL; 00327 } 00328 00329 line = 0; 00330 lb_ranges = 0; 00331 lb_range_started = 0; 00332 00333 while(fgets(buf, sizeof(buf), file)) { 00334 line++; 00335 p = buf; 00336 while(isspace(*p)) p++; 00337 00338 if(!*p || p[0] == '#') 00339 continue; 00340 00341 n = strtol(p, &p, 0); 00342 if(n > 0xFFFF) { 00343 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name); 00344 continue; 00345 } 00346 00347 if(n > 0xFF && cpi->MaxCharSize != 2) { 00348 /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/ 00349 cpi->MaxCharSize = 2; 00350 } 00351 00352 while(isspace(*p)) p++; 00353 00354 if(!*p || p[0] == '#') { 00355 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 00356 } 00357 else { 00358 value = strtol(p, &p, 0); 00359 if(value > 0xFFFF) { 00360 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name); 00361 } 00362 table[n] = (WCHAR)value; 00363 } 00364 00365 /* wait for comment */ 00366 while(*p && *p != '#') p++; 00367 00368 if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) { 00369 /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/ 00370 if(n > 0xFF) { 00371 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n); 00372 continue; 00373 } 00374 00375 table[n] = (WCHAR)0; 00376 00377 if(lb_range_started) { 00378 cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n; 00379 } 00380 else { 00381 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/ 00382 if(lb_ranges < MAX_LEADBYTES/2) { 00383 lb_ranges++; 00384 lb_range_started = 1; 00385 cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n; 00386 } 00387 else 00388 printf("Line %d: Error: could not start new lead byte range\n", line); 00389 } 00390 } 00391 else { 00392 if(lb_range_started) 00393 lb_range_started = 0; 00394 } 00395 } 00396 00397 fclose(file); 00398 00399 return table; 00400 } 00401 00402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi) 00403 { 00404 char buf[256]; 00405 char *p; 00406 DWORD n, value; 00407 FILE *file; 00408 WCHAR *table; 00409 int line; 00410 00411 printf("Loading oem glyph table \"%s\"\n", table_name); 00412 00413 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); 00414 if(!table) { 00415 printf("Not enough memory for Codepage to Unicode table\n"); 00416 return NULL; 00417 } 00418 00419 memcpy(table, def_table, 65536 * sizeof(WCHAR)); 00420 00421 file = fopen(table_name, "r"); 00422 if(file == NULL) { 00423 free(table); 00424 return NULL; 00425 } 00426 00427 while(fgets(buf, sizeof(buf), file)) { 00428 line++; 00429 p = buf; 00430 while(isspace(*p)) p++; 00431 00432 if(!*p || p[0] == '#') 00433 continue; 00434 00435 value = strtol(p, &p, 16); 00436 if(value > 0xFFFF) { 00437 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name); 00438 continue; 00439 } 00440 00441 while(isspace(*p)) p++; 00442 00443 if(!*p || p[0] == '#') { 00444 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 00445 continue; 00446 } 00447 else { 00448 n = strtol(p, &p, 16); 00449 if(n > 0xFFFF) { 00450 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name); 00451 continue; 00452 } 00453 } 00454 00455 if (cpi->CodePage == 864) { 00456 while(isspace(*p)) p++; 00457 00458 if(!*p || p[0] == '#' || p[0] == '-') { 00459 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 00460 continue; 00461 } 00462 else { 00463 n = strtol(p, &p, 16); 00464 if(n > 0xFFFF) { 00465 printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name); 00466 } 00467 continue; 00468 } 00469 } 00470 00471 table[n] = (WCHAR)value; 00472 } 00473 00474 fclose(file); 00475 00476 return table; 00477 } 00478 00479 int write_nls_files() 00480 { 00481 WCHAR *table; 00482 WCHAR *oemtable; 00483 char nls_filename[256]; 00484 CPINFOEXA cpi; 00485 int i; 00486 struct code_page { 00487 UINT cp; 00488 BOOL oem; 00489 char *table_filename; 00490 char *comment; 00491 } pages[] = { 00492 {37, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"}, 00493 {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"}, 00494 {437, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"}, 00495 {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"}, 00496 /*{708, FALSE, "", "Arabic ASMO"},*/ 00497 /*{720, FALSE, "", "Arabic Transparent ASMO"},*/ 00498 {737, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"}, 00499 {775, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"}, 00500 {850, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"}, 00501 {852, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"}, 00502 {855, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" }, 00503 {856, TRUE, LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"}, 00504 {857, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"}, 00505 {860, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"}, 00506 {861, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"}, 00507 {862, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"}, 00508 {863, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"}, 00509 {864, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"}, 00510 {865, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"}, 00511 {866, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"}, 00512 {869, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"}, 00513 /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/ 00514 {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"}, 00515 {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"}, 00516 {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, 00517 {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"}, 00518 {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"}, 00519 {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"}, 00520 {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"}, 00521 {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"}, 00522 {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"}, 00523 {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"}, 00524 {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"}, 00525 {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"}, 00526 {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"}, 00527 {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"}, 00528 {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"}, 00529 {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"}, 00530 {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"}, 00531 {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"}, 00532 {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"}, 00533 {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"}, 00534 {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"}, 00535 {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"}, 00536 {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"}, 00537 {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"}, 00538 /*{20000, FALSE, "", "CNS Taiwan"},*/ 00539 /*{20001, FALSE, "", "TCA Taiwan"},*/ 00540 /*{20002, FALSE, "", "Eten Taiwan"},*/ 00541 /*{20003, FALSE, "", "IBM5550 Taiwan"},*/ 00542 /*{20004, FALSE, "", "TeleText Taiwan"},*/ 00543 /*{20005, FALSE, "", "Wang Taiwan"},*/ 00544 /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/ 00545 /*{20106, FALSE, "", "IA5 German"},*/ 00546 /*{20107, FALSE, "", "IA5 Swedish"},*/ 00547 /*{20108, FALSE, "", "IA5 Norwegian"},*/ 00548 /*{20127, FALSE, "", "US ASCII"}, */ 00549 /*{20261, FALSE, "", "T.61"},*/ 00550 /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/ 00551 /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/ 00552 /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/ 00553 /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/ 00554 /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/ 00555 /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/ 00556 /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/ 00557 /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/ 00558 /*{20297, FALSE, "", "IBM EBCDIC France"},*/ 00559 /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/ 00560 /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/ 00561 /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/ 00562 /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/ 00563 /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/ 00564 {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"}, 00565 /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/ 00566 {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, 00567 /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/ 00568 /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/ 00569 /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/ 00570 {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"}, 00571 {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"}, 00572 {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"}, 00573 {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"}, 00574 {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"}, 00575 {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"}, 00576 {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"}, 00577 {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"}, 00578 {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"} 00579 }; 00580 00581 for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) { 00582 table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi); 00583 if(!table) { 00584 printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment); 00585 continue; 00586 } 00587 00588 if (pages[i].oem) { 00589 oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi); 00590 if(!oemtable) { 00591 printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table"); 00592 continue; 00593 } 00594 } 00595 00596 sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage); 00597 if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) { 00598 printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment); 00599 } 00600 00601 if (pages[i].oem) 00602 free(oemtable); 00603 00604 free(table); 00605 } 00606 00607 return 0; 00608 } 00609 00610 00611 00612 static WORD *to_upper_org = NULL, *to_lower_org = NULL; 00613 00614 #if 0 00615 static WORD diffs[256]; 00616 static int number_of_diffs; 00617 #endif 00618 00619 static WORD number_of_subtables_with_diffs; 00620 /* pointers to subtables with 16 elements in each to the main table */ 00621 static WORD *subtables_with_diffs[4096]; 00622 00623 static WORD number_of_subtables_with_offsets; 00624 /* subtables with 16 elements */ 00625 static WORD subtables_with_offsets[4096 * 16]; 00626 00627 static void test_packed_table(WCHAR *table) 00628 { 00629 WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890"; 00630 //WORD diff, off; 00631 //WORD *sub_table; 00632 DWORD i, len; 00633 00634 len = lstrlenW(test_str); 00635 00636 for(i = 0; i < len + 1; i++) { 00637 /*off = table[HIBYTE(test_str[i])]; 00638 00639 sub_table = table + off; 00640 off = sub_table[LOBYTE(test_str[i]) >> 4]; 00641 00642 sub_table = table + off; 00643 off = LOBYTE(test_str[i]) & 0x0F; 00644 00645 diff = sub_table[off]; 00646 00647 test_str[i] += diff;*/ 00648 test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)]; 00649 } 00650 /* 00651 { 00652 FILE *file; 00653 static int n = 0; 00654 char name[20]; 00655 00656 sprintf(name, "text%02d.dat", n++); 00657 file = fopen(name, "wb"); 00658 fwrite(test_str, len * sizeof(WCHAR), 1, file); 00659 fclose(file); 00660 }*/ 00661 } 00662 00663 static BOOL CreateCaseDiff(char *table_name) 00664 { 00665 char buf[256]; 00666 char *p; 00667 WORD code, case_mapping; 00668 FILE *file; 00669 int line; 00670 00671 to_upper_org = (WORD *)calloc(65536, sizeof(WORD)); 00672 if(!to_upper_org) { 00673 printf("Not enough memory for to upper table\n"); 00674 return FALSE; 00675 } 00676 00677 to_lower_org = (WORD *)calloc(65536, sizeof(WORD)); 00678 if(!to_lower_org) { 00679 printf("Not enough memory for to lower table\n"); 00680 return FALSE; 00681 } 00682 00683 file = fopen(table_name, "r"); 00684 if(file == NULL) { 00685 printf("Could not open file \"%s\"\n", table_name); 00686 return FALSE; 00687 } 00688 00689 line = 0; 00690 00691 while(fgets(buf, sizeof(buf), file)) { 00692 line++; 00693 p = buf; 00694 while(*p && isspace(*p)) p++; 00695 00696 if(!*p) 00697 continue; 00698 00699 /* 0. Code value */ 00700 code = (WORD)strtol(p, &p, 16); 00701 00702 //if(code != 0x9A0 && code != 0xBA0) 00703 //continue; 00704 00705 while(*p && *p != ';') p++; 00706 if(!*p) 00707 continue; 00708 p++; 00709 00710 /* 1. Character name */ 00711 while(*p && *p != ';') p++; 00712 if(!*p) 00713 continue; 00714 p++; 00715 00716 /* 2. General Category */ 00717 while(*p && *p != ';') p++; 00718 if(!*p) 00719 continue; 00720 p++; 00721 00722 /* 3. Canonical Combining Classes */ 00723 while(*p && *p != ';') p++; 00724 if(!*p) 00725 continue; 00726 p++; 00727 00728 /* 4. Bidirectional Category */ 00729 while(*p && *p != ';') p++; 00730 if(!*p) 00731 continue; 00732 p++; 00733 00734 /* 5. Character Decomposition Mapping */ 00735 while(*p && *p != ';') p++; 00736 if(!*p) 00737 continue; 00738 p++; 00739 00740 /* 6. Decimal digit value */ 00741 while(*p && *p != ';') p++; 00742 if(!*p) 00743 continue; 00744 p++; 00745 00746 /* 7. Digit value */ 00747 while(*p && *p != ';') p++; 00748 if(!*p) 00749 continue; 00750 p++; 00751 00752 /* 8. Numeric value */ 00753 while(*p && *p != ';') p++; 00754 if(!*p) 00755 continue; 00756 p++; 00757 00758 /* 9. Mirrored */ 00759 while(*p && *p != ';') p++; 00760 if(!*p) 00761 continue; 00762 p++; 00763 00764 /* 10. Unicode 1.0 Name */ 00765 while(*p && *p != ';') p++; 00766 if(!*p) 00767 continue; 00768 p++; 00769 00770 /* 11. 10646 comment field */ 00771 while(*p && *p != ';') p++; 00772 if(!*p) 00773 continue; 00774 p++; 00775 00776 /* 12. Uppercase Mapping */ 00777 while(*p && isspace(*p)) p++; 00778 if(!*p) continue; 00779 if(*p != ';') { 00780 case_mapping = (WORD)strtol(p, &p, 16); 00781 to_upper_org[code] = case_mapping - code; 00782 while(*p && *p != ';') p++; 00783 } 00784 else 00785 p++; 00786 00787 /* 13. Lowercase Mapping */ 00788 while(*p && isspace(*p)) p++; 00789 if(!*p) continue; 00790 if(*p != ';') { 00791 case_mapping = (WORD)strtol(p, &p, 16); 00792 to_lower_org[code] = case_mapping - code; 00793 while(*p && *p != ';') p++; 00794 } 00795 else 00796 p++; 00797 00798 /* 14. Titlecase Mapping */ 00799 while(*p && *p != ';') p++; 00800 if(!*p) 00801 continue; 00802 p++; 00803 } 00804 00805 fclose(file); 00806 00807 return TRUE; 00808 } 00809 00810 #if 0 00811 static int find_diff(WORD diff) 00812 { 00813 int i; 00814 00815 for(i = 0; i < number_of_diffs; i++) { 00816 if(diffs[i] == diff) 00817 return i; 00818 } 00819 00820 return -1; 00821 } 00822 #endif 00823 00824 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable) 00825 { 00826 WORD index; 00827 00828 for(index = 0; index < number_of_subtables_with_diffs; index++) { 00829 if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) { 00830 return index; 00831 } 00832 } 00833 00834 if(number_of_subtables_with_diffs >= 4096) { 00835 printf("Could not add new subtable with diffs, storage is full\n"); 00836 return 0; 00837 } 00838 00839 subtables_with_diffs[number_of_subtables_with_diffs] = subtable; 00840 number_of_subtables_with_diffs++; 00841 00842 return index; 00843 } 00844 00845 static WORD find_subtable_with_offsets(WORD *subtable) 00846 { 00847 WORD index; 00848 00849 for(index = 0; index < number_of_subtables_with_offsets; index++) { 00850 if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) { 00851 return index; 00852 } 00853 } 00854 00855 if(number_of_subtables_with_offsets >= 4096) { 00856 printf("Could not add new subtable with offsets, storage is full\n"); 00857 return 0; 00858 } 00859 00860 memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD)); 00861 number_of_subtables_with_offsets++; 00862 00863 return index; 00864 } 00865 00866 static WORD *pack_table(WORD *table, WORD *packed_size_in_words) 00867 { 00868 WORD high, low4, index; 00869 WORD main_index[256]; 00870 WORD temp_subtable[16]; 00871 WORD *packed_table; 00872 WORD *subtable_src, *subtable_dst; 00873 00874 memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs)); 00875 number_of_subtables_with_diffs = 0; 00876 00877 memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets)); 00878 number_of_subtables_with_offsets = 0; 00879 00880 for(high = 0; high < 256; high++) { 00881 for(low4 = 0; low4 < 256; low4 += 16) { 00882 index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]); 00883 00884 temp_subtable[low4 >> 4] = index; 00885 } 00886 00887 index = find_subtable_with_offsets(temp_subtable); 00888 main_index[high] = index; 00889 } 00890 00891 *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16; 00892 packed_table = calloc(*packed_size_in_words, sizeof(WORD)); 00893 00894 /* fill main index according to the subtables_with_offsets */ 00895 for(high = 0; high < 256; high++) { 00896 packed_table[high] = 0x100 + main_index[high] * 16; 00897 } 00898 00899 //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16); 00900 00901 /* fill subtable index according to the subtables_with_diffs */ 00902 for(index = 0; index < number_of_subtables_with_offsets; index++) { 00903 subtable_dst = packed_table + 0x100 + index * 16; 00904 subtable_src = &subtables_with_offsets[index * 16]; 00905 00906 for(low4 = 0; low4 < 16; low4++) { 00907 subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16; 00908 } 00909 } 00910 00911 00912 for(index = 0; index < number_of_subtables_with_diffs; index++) { 00913 subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16; 00914 memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD)); 00915 00916 } 00917 00918 00919 test_packed_table(packed_table); 00920 00921 return packed_table; 00922 } 00923 00924 int write_casemap_file(void) 00925 { 00926 WORD packed_size_in_words, offset_to_next_table_in_words; 00927 WORD *packed_table, value; 00928 FILE *file; 00929 00930 if(!CreateCaseDiff(LIBDIR"UnicodeData.txt")) 00931 return -1; 00932 00933 file = fopen(NLSDIR"/l_intl.nls", "wb"); 00934 00935 /* write version number */ 00936 value = 1; 00937 fwrite(&value, 1, sizeof(WORD), file); 00938 00939 /* pack upper case table */ 00940 packed_table = pack_table(to_upper_org, &packed_size_in_words); 00941 offset_to_next_table_in_words = packed_size_in_words + 1; 00942 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); 00943 /* write packed upper case table */ 00944 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); 00945 free(packed_table); 00946 00947 /* pack lower case table */ 00948 packed_table = pack_table(to_lower_org, &packed_size_in_words); 00949 offset_to_next_table_in_words = packed_size_in_words + 1; 00950 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); 00951 /* write packed lower case table */ 00952 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); 00953 free(packed_table); 00954 00955 fclose(file); 00956 00957 free(to_upper_org); 00958 free(to_lower_org); 00959 00960 return 0; 00961 } 00962 00963 int main() 00964 { 00965 write_nls_files(); 00966 write_casemap_file(); 00967 00968 return 0; 00969 } Generated on Sun May 27 2012 04:37:45 for ReactOS by
1.7.6.1
|