ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

create_nls.c
Go to the documentation of this file.
00001 /*
00002  * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
00003  * Tool for creating NT-like l_intl.nls file for case mapping of unicode
00004  * characters.
00005  * Copyright 2000 Timoshkov Dmitry
00006  * Copyright 2001 Matei Alexandru
00007  *
00008  * Sources of information:
00009  * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
00010  * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls
00011  */
00012 #include <windows.h>
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <malloc.h>
00016 #include <string.h>
00017 #include <ctype.h>
00018 
00019 static const WCHAR * const uprtable[256];
00020 static const WCHAR * const lwrtable[256];
00021 
00022 #define NLSDIR "../../media/nls"
00023 #define LIBDIR "unicode.org/"
00024 
00025 typedef struct {
00026     WORD wSize; /* in words 0x000D */
00027     WORD CodePage;
00028     WORD MaxCharSize; /* 1 or 2 */
00029     BYTE DefaultChar[MAX_DEFAULTCHAR];
00030     WCHAR UnicodeDefaultChar;
00031     WCHAR unknown1;
00032     WCHAR unknown2;
00033     BYTE LeadByte[MAX_LEADBYTES];
00034 } __attribute__((packed)) NLS_FILE_HEADER;
00035 
00036 /*
00037 Support for translation from the multiple unicode chars
00038 to the single code page char.
00039 
00040 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
00041 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
00042 2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
00043 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
00044 2013;EN DASH;Pd;0;ON;;;;;N;;;;;
00045 2014;EM DASH;Pd;0;ON;;;;;N;;;;;
00046 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
00047 */
00048 
00049 /* HYPHEN-MINUS aliases */
00050 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
00051 
00052 static struct {
00053     WCHAR cp_char;
00054     WCHAR *alias; /* must be 0 terminated */
00055 } u2cp_alias[] = {
00056 /* HYPHEN-MINUS aliases */
00057 {0x002D, hyphen_aliases}
00058 };
00059 
00060 static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
00061 {
00062     int i, j;
00063     WCHAR *wc, *alias;
00064     BYTE *c;
00065 
00066     if(cpi->MaxCharSize == 2) {
00067     wc = (WCHAR *)u2cp;
00068     for(i = 0; i < 65536; i++) {
00069         for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
00070         alias = u2cp_alias[j].alias;
00071         while(*alias) {
00072             if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
00073             wc[i] = u2cp_alias[j].cp_char;
00074             }
00075             alias++;
00076         }
00077         }
00078     }
00079     }
00080     else {
00081     c = (BYTE *)u2cp;
00082     for(i = 0; i < 65536; i++) {
00083         for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
00084         alias = u2cp_alias[j].alias;
00085         while(*alias) {
00086             if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
00087             c[i] = (BYTE)u2cp_alias[j].cp_char;
00088             }
00089             alias++;
00090         }
00091         }
00092     }
00093     }
00094 }
00095 
00096 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
00097 {
00098     void *u2cp;
00099     WCHAR *wc;
00100     CHAR *c;
00101     int i;
00102     BOOL ret = TRUE;
00103 
00104     u2cp = malloc(cpi->MaxCharSize * 65536);
00105     if(!u2cp) {
00106     printf("Not enough memory for Unicode to Codepage table\n");
00107     return FALSE;
00108     }
00109 
00110     if(cpi->MaxCharSize == 2) {
00111     wc = (WCHAR *)u2cp;
00112     for(i = 0; i < 65536; i++)
00113         wc[i] = *(WCHAR *)cpi->DefaultChar;
00114 
00115     for(i = 0; i < 65536; i++)
00116         if (table[i] != '?')
00117         wc[table[i]] = (WCHAR)i;
00118     }
00119     else {
00120     c = (CHAR *)u2cp;
00121     for(i = 0; i < 65536; i++)
00122         c[i] = cpi->DefaultChar[0];
00123 
00124     for(i = 0; i < 256; i++)
00125         if (table[i] != '?')
00126         c[table[i]] = (CHAR)i;
00127     }
00128 
00129     patch_aliases(u2cp, cpi);
00130 
00131     if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
00132     ret = FALSE;
00133 
00134     free(u2cp);
00135 
00136     return ret;
00137 }
00138 
00139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
00140 {
00141     WCHAR sub_table[256];
00142     WORD offset, offsets[256];
00143     int i, j, range;
00144 
00145     memset(offsets, 0, sizeof(offsets));
00146 
00147     offset = 0;
00148 
00149     for(i = 0; i < MAX_LEADBYTES; i += 2) {
00150     for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
00151         offset += 256;
00152         offsets[range] = offset;
00153     }
00154     }
00155 
00156     if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
00157     return FALSE;
00158 
00159     for(i = 0; i < MAX_LEADBYTES; i += 2) {
00160     for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
00161         /*printf("Writing sub table for LeadByte %02X\n", range);*/
00162         for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
00163         sub_table[j - MAKEWORD(0, range)] = table[j];
00164         }
00165 
00166         if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
00167         return FALSE;
00168     }
00169     }
00170 
00171     return TRUE;
00172 }
00173 
00174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
00175 {
00176     FILE *out;
00177     NLS_FILE_HEADER nls;
00178     WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
00179 
00180     printf("Creating NLS table \"%s\"\n", name);
00181 
00182     if(!(out = fopen(name, "wb"))) {
00183     printf("Could not create file \"%s\"\n", name);
00184     return FALSE;
00185     }
00186 
00187     memset(&nls, 0, sizeof(nls));
00188 
00189     nls.wSize = sizeof(nls) / sizeof(WORD);
00190     nls.CodePage = cpi->CodePage;
00191     nls.MaxCharSize = cpi->MaxCharSize;
00192     memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
00193     nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
00194     nls.unknown1 = '?';
00195     nls.unknown2 = '?';
00196     memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
00197 
00198     if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
00199     fclose(out);
00200     printf("Could not write to file \"%s\"\n", name);
00201     return FALSE;
00202     }
00203 
00204     number_of_lb_ranges = 0;
00205     number_of_lb_subtables = 0;
00206 
00207     for(i = 0; i < MAX_LEADBYTES; i += 2) {
00208     if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
00209         number_of_lb_ranges++;
00210         number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
00211     }
00212     }
00213 
00214     /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
00215     /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
00216 
00217     /* Calculate offset to Unicode to CP table in words:
00218      *  1. (256 * sizeof(WORD)) primary CP to Unicode table +
00219      *  2. (WORD) optional OEM glyph table size in words +
00220      *  3. OEM glyph table size in words * sizeof(WORD) +
00221      *  4. (WORD) Number of DBCS LeadByte ranges +
00222      *  5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
00223      *  6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
00224      *  7. (WORD) Unknown flag
00225      */
00226 
00227     wValue = (256 * sizeof(WORD) + /* 1 */
00228           sizeof(WORD) + /* 2 */
00229           ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
00230           sizeof(WORD) + /* 4 */
00231           ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
00232           number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
00233           sizeof(WORD) /* 7 */
00234           ) / sizeof(WORD);
00235 
00236     /* offset of Unicode to CP table in words */
00237     fwrite(&wValue, 1, sizeof(wValue), out);
00238 
00239     /* primary CP to Unicode table */
00240     if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
00241     fclose(out);
00242     printf("Could not write to file \"%s\"\n", name);
00243     return FALSE;
00244     }
00245 
00246     /* optional OEM glyph table size in words */
00247     wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
00248     fwrite(&wValue, 1, sizeof(wValue), out);
00249 
00250     /* optional OEM to Unicode table */
00251     if (oemtable) {
00252     if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
00253         fclose(out);
00254         printf("Could not write to file \"%s\"\n", name);
00255         return FALSE;
00256     }
00257     }
00258 
00259     /* Number of DBCS LeadByte ranges */
00260     fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
00261 
00262     /* offsets of lead byte sub tables and lead byte sub tables */
00263     if(number_of_lb_ranges > 0) {
00264     if(!write_lb_ranges(out, cpi, table)) {
00265         fclose(out);
00266         printf("Could not write to file \"%s\"\n", name);
00267         return FALSE;
00268     }
00269     }
00270 
00271     /* Unknown flag */
00272     wValue = 0;
00273     fwrite(&wValue, 1, sizeof(wValue), out);
00274 
00275     if(!write_unicode2cp_table(out, cpi, table)) {
00276     fclose(out);
00277     printf("Could not write to file \"%s\"\n", name);
00278     return FALSE;
00279     }
00280 
00281     fclose(out);
00282     return TRUE;
00283 }
00284 
00285 /* correct the codepage information such as default chars */
00286 static void patch_codepage_info(CPINFOEXA *cpi)
00287 {
00288     /* currently nothing */
00289 }
00290 
00291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
00292 {
00293     char buf[256];
00294     char *p;
00295     DWORD n, value;
00296     FILE *file;
00297     WCHAR *table;
00298     int lb_ranges, lb_range_started, line;
00299 
00300     printf("Loading translation table \"%s\"\n", table_name);
00301 
00302     /* Init to default values */
00303     memset(cpi, 0, sizeof(CPINFOEXA));
00304     cpi->CodePage = cp;
00305     *(WCHAR *)cpi->DefaultChar = '?';
00306     cpi->MaxCharSize = 1;
00307     cpi->UnicodeDefaultChar = '?';
00308 
00309     patch_codepage_info(cpi);
00310 
00311     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
00312     if(!table) {
00313     printf("Not enough memory for Codepage to Unicode table\n");
00314     return NULL;
00315     }
00316 
00317     for(n = 0; n < 256; n++)
00318     table[n] = (WCHAR)n;
00319 
00320     for(n = 256; n < 65536; n++)
00321     table[n] = cpi->UnicodeDefaultChar;
00322 
00323     file = fopen(table_name, "r");
00324     if(file == NULL) {
00325     free(table);
00326     return NULL;
00327     }
00328 
00329     line = 0;
00330     lb_ranges = 0;
00331     lb_range_started = 0;
00332 
00333     while(fgets(buf, sizeof(buf), file)) {
00334     line++;
00335     p = buf;
00336     while(isspace(*p)) p++;
00337 
00338     if(!*p || p[0] == '#')
00339         continue;
00340 
00341     n = strtol(p, &p, 0);
00342     if(n > 0xFFFF) {
00343         printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
00344         continue;
00345     }
00346 
00347     if(n > 0xFF && cpi->MaxCharSize != 2) {
00348         /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
00349         cpi->MaxCharSize = 2;
00350     }
00351 
00352     while(isspace(*p)) p++;
00353 
00354     if(!*p || p[0] == '#') {
00355         /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
00356     }
00357     else {
00358         value = strtol(p, &p, 0);
00359         if(value > 0xFFFF) {
00360         printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
00361         }
00362         table[n] = (WCHAR)value;
00363     }
00364 
00365     /* wait for comment */
00366     while(*p && *p != '#') p++;
00367 
00368     if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
00369         /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
00370         if(n > 0xFF) {
00371         printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
00372         continue;
00373         }
00374 
00375         table[n] = (WCHAR)0;
00376 
00377         if(lb_range_started) {
00378         cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
00379         }
00380         else {
00381         /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
00382         if(lb_ranges < MAX_LEADBYTES/2) {
00383             lb_ranges++;
00384             lb_range_started = 1;
00385             cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
00386         }
00387         else
00388             printf("Line %d: Error: could not start new lead byte range\n", line);
00389         }
00390     }
00391     else {
00392         if(lb_range_started)
00393         lb_range_started = 0;
00394     }
00395     }
00396 
00397     fclose(file);
00398 
00399     return table;
00400 }
00401 
00402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
00403 {
00404     char buf[256];
00405     char *p;
00406     DWORD n, value;
00407     FILE *file;
00408     WCHAR *table;
00409     int line;
00410 
00411     printf("Loading oem glyph table \"%s\"\n", table_name);
00412 
00413     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
00414     if(!table) {
00415     printf("Not enough memory for Codepage to Unicode table\n");
00416     return NULL;
00417     }
00418 
00419     memcpy(table, def_table, 65536 * sizeof(WCHAR));
00420 
00421     file = fopen(table_name, "r");
00422     if(file == NULL) {
00423     free(table);
00424     return NULL;
00425     }
00426 
00427     while(fgets(buf, sizeof(buf), file)) {
00428     line++;
00429     p = buf;
00430     while(isspace(*p)) p++;
00431 
00432     if(!*p || p[0] == '#')
00433         continue;
00434 
00435     value = strtol(p, &p, 16);
00436     if(value > 0xFFFF) {
00437         printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
00438         continue;
00439     }
00440 
00441     while(isspace(*p)) p++;
00442 
00443     if(!*p || p[0] == '#') {
00444         /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
00445         continue;
00446     }
00447     else {
00448         n = strtol(p, &p, 16);
00449         if(n > 0xFFFF) {
00450         printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
00451         continue;
00452         }
00453     }
00454 
00455     if (cpi->CodePage == 864) {
00456         while(isspace(*p)) p++;
00457 
00458         if(!*p || p[0] == '#' || p[0] == '-') {
00459         /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
00460         continue;
00461         }
00462         else {
00463         n = strtol(p, &p, 16);
00464         if(n > 0xFFFF) {
00465             printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
00466         }
00467         continue;
00468         }
00469     }
00470 
00471     table[n] = (WCHAR)value;
00472     }
00473 
00474     fclose(file);
00475 
00476     return table;
00477 }
00478 
00479 int write_nls_files()
00480 {
00481     WCHAR *table;
00482     WCHAR *oemtable;
00483     char nls_filename[256];
00484     CPINFOEXA cpi;
00485     int i;
00486     struct code_page {
00487     UINT cp;
00488     BOOL oem;
00489     char *table_filename;
00490     char *comment;
00491     } pages[] = {
00492     {37,  FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
00493     {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
00494     {437, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
00495     {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
00496     /*{708, FALSE, "", "Arabic ASMO"},*/
00497     /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
00498     {737, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
00499     {775, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
00500     {850, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
00501     {852, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
00502     {855, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
00503     {856, TRUE,  LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
00504     {857, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
00505     {860, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
00506     {861, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
00507     {862, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
00508     {863, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
00509     {864, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
00510     {865, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
00511     {866, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
00512     {869, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
00513     /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
00514     {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
00515     {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
00516     {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
00517     {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
00518     {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
00519     {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
00520     {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
00521     {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
00522     {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
00523     {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
00524     {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
00525     {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
00526     {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
00527     {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
00528     {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
00529     {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
00530     {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
00531     {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
00532     {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
00533     {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
00534     {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
00535     {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
00536     {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
00537     {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
00538     /*{20000, FALSE, "", "CNS Taiwan"},*/
00539     /*{20001, FALSE, "", "TCA Taiwan"},*/
00540     /*{20002, FALSE, "", "Eten Taiwan"},*/
00541     /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
00542     /*{20004, FALSE, "", "TeleText Taiwan"},*/
00543     /*{20005, FALSE, "", "Wang Taiwan"},*/
00544     /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
00545     /*{20106, FALSE, "", "IA5 German"},*/
00546     /*{20107, FALSE, "", "IA5 Swedish"},*/
00547     /*{20108, FALSE, "", "IA5 Norwegian"},*/
00548     /*{20127, FALSE, "", "US ASCII"}, */
00549     /*{20261, FALSE, "", "T.61"},*/
00550     /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
00551     /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
00552     /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
00553     /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
00554     /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
00555     /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
00556     /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
00557     /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
00558     /*{20297, FALSE, "", "IBM EBCDIC France"},*/
00559     /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
00560     /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/
00561     /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
00562     /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
00563     /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
00564     {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"},
00565     /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
00566     {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
00567     /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
00568     /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
00569     /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
00570     {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
00571     {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
00572     {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
00573     {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
00574     {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
00575     {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
00576     {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
00577     {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
00578     {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
00579     };
00580 
00581     for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
00582     table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
00583     if(!table) {
00584         printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
00585         continue;
00586     }
00587 
00588     if (pages[i].oem) {
00589         oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
00590         if(!oemtable) {
00591         printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
00592         continue;
00593         }
00594     }
00595 
00596     sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
00597     if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
00598         printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
00599     }
00600 
00601     if (pages[i].oem)
00602         free(oemtable);
00603 
00604     free(table);
00605     }
00606 
00607     return 0;
00608 }
00609 
00610 
00611 
00612 static WORD *to_upper_org = NULL, *to_lower_org = NULL;
00613 
00614 #if 0
00615 static WORD diffs[256];
00616 static int number_of_diffs;
00617 #endif
00618 
00619 static WORD number_of_subtables_with_diffs;
00620 /* pointers to subtables with 16 elements in each to the main table */
00621 static WORD *subtables_with_diffs[4096];
00622 
00623 static WORD number_of_subtables_with_offsets;
00624 /* subtables with 16 elements  */
00625 static WORD subtables_with_offsets[4096 * 16];
00626 
00627 static void test_packed_table(WCHAR *table)
00628 {
00629     WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890";
00630     //WORD diff, off;
00631     //WORD *sub_table;
00632     DWORD i, len;
00633 
00634     len = lstrlenW(test_str);
00635 
00636     for(i = 0; i < len + 1; i++) {
00637     /*off = table[HIBYTE(test_str[i])];
00638 
00639     sub_table = table + off;
00640     off = sub_table[LOBYTE(test_str[i]) >> 4];
00641 
00642     sub_table = table + off;
00643     off = LOBYTE(test_str[i]) & 0x0F;
00644 
00645     diff = sub_table[off];
00646 
00647     test_str[i] += diff;*/
00648     test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
00649     }
00650 /*
00651     {
00652     FILE *file;
00653     static int n = 0;
00654     char name[20];
00655 
00656     sprintf(name, "text%02d.dat", n++);
00657     file = fopen(name, "wb");
00658     fwrite(test_str, len * sizeof(WCHAR), 1, file);
00659     fclose(file);
00660     }*/
00661 }
00662 
00663 static BOOL CreateCaseDiff(char *table_name)
00664 {
00665     char buf[256];
00666     char *p;
00667     WORD code, case_mapping;
00668     FILE *file;
00669     int line;
00670 
00671     to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
00672     if(!to_upper_org) {
00673     printf("Not enough memory for to upper table\n");
00674     return FALSE;
00675     }
00676 
00677     to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
00678     if(!to_lower_org) {
00679     printf("Not enough memory for to lower table\n");
00680     return FALSE;
00681     }
00682 
00683     file = fopen(table_name, "r");
00684     if(file == NULL) {
00685     printf("Could not open file \"%s\"\n", table_name);
00686     return FALSE;
00687     }
00688 
00689     line = 0;
00690 
00691     while(fgets(buf, sizeof(buf), file)) {
00692     line++;
00693     p = buf;
00694     while(*p && isspace(*p)) p++;
00695 
00696     if(!*p)
00697         continue;
00698 
00699     /* 0. Code value */
00700     code = (WORD)strtol(p, &p, 16);
00701 
00702     //if(code != 0x9A0 && code != 0xBA0)
00703         //continue;
00704 
00705     while(*p && *p != ';') p++;
00706     if(!*p)
00707         continue;
00708     p++;
00709 
00710     /* 1. Character name */
00711     while(*p && *p != ';') p++;
00712     if(!*p)
00713         continue;
00714     p++;
00715 
00716     /* 2. General Category */
00717     while(*p && *p != ';') p++;
00718     if(!*p)
00719         continue;
00720     p++;
00721 
00722     /* 3. Canonical Combining Classes */
00723     while(*p && *p != ';') p++;
00724     if(!*p)
00725         continue;
00726     p++;
00727 
00728     /* 4. Bidirectional Category */
00729     while(*p && *p != ';') p++;
00730     if(!*p)
00731         continue;
00732     p++;
00733 
00734     /* 5. Character Decomposition Mapping */
00735     while(*p && *p != ';') p++;
00736     if(!*p)
00737         continue;
00738     p++;
00739 
00740     /* 6. Decimal digit value */
00741     while(*p && *p != ';') p++;
00742     if(!*p)
00743         continue;
00744     p++;
00745 
00746     /* 7. Digit value */
00747     while(*p && *p != ';') p++;
00748     if(!*p)
00749         continue;
00750     p++;
00751 
00752     /* 8. Numeric value */
00753     while(*p && *p != ';') p++;
00754     if(!*p)
00755         continue;
00756     p++;
00757 
00758     /* 9. Mirrored */
00759     while(*p && *p != ';') p++;
00760     if(!*p)
00761         continue;
00762     p++;
00763 
00764     /* 10. Unicode 1.0 Name */
00765     while(*p && *p != ';') p++;
00766     if(!*p)
00767         continue;
00768     p++;
00769 
00770     /* 11. 10646 comment field */
00771     while(*p && *p != ';') p++;
00772     if(!*p)
00773         continue;
00774     p++;
00775 
00776     /* 12. Uppercase Mapping */
00777     while(*p && isspace(*p)) p++;
00778     if(!*p) continue;
00779     if(*p != ';') {
00780         case_mapping = (WORD)strtol(p, &p, 16);
00781         to_upper_org[code] = case_mapping - code;
00782         while(*p && *p != ';') p++;
00783     }
00784     else
00785         p++;
00786 
00787     /* 13. Lowercase Mapping */
00788     while(*p && isspace(*p)) p++;
00789     if(!*p) continue;
00790     if(*p != ';') {
00791         case_mapping = (WORD)strtol(p, &p, 16);
00792         to_lower_org[code] = case_mapping - code;
00793         while(*p && *p != ';') p++;
00794     }
00795     else
00796         p++;
00797 
00798     /* 14. Titlecase Mapping */
00799     while(*p && *p != ';') p++;
00800     if(!*p)
00801         continue;
00802     p++;
00803     }
00804 
00805     fclose(file);
00806 
00807     return TRUE;
00808 }
00809 
00810 #if 0
00811 static int find_diff(WORD diff)
00812 {
00813     int i;
00814 
00815     for(i = 0; i < number_of_diffs; i++) {
00816     if(diffs[i] == diff)
00817         return i;
00818     }
00819 
00820     return -1;
00821 }
00822 #endif
00823 
00824 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
00825 {
00826     WORD index;
00827 
00828     for(index = 0; index < number_of_subtables_with_diffs; index++) {
00829     if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
00830         return index;
00831     }
00832     }
00833 
00834     if(number_of_subtables_with_diffs >= 4096) {
00835     printf("Could not add new subtable with diffs, storage is full\n");
00836     return 0;
00837     }
00838 
00839     subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
00840     number_of_subtables_with_diffs++;
00841 
00842     return index;
00843 }
00844 
00845 static WORD find_subtable_with_offsets(WORD *subtable)
00846 {
00847     WORD index;
00848 
00849     for(index = 0; index < number_of_subtables_with_offsets; index++) {
00850     if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
00851         return index;
00852     }
00853     }
00854 
00855     if(number_of_subtables_with_offsets >= 4096) {
00856     printf("Could not add new subtable with offsets, storage is full\n");
00857     return 0;
00858     }
00859 
00860     memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
00861     number_of_subtables_with_offsets++;
00862 
00863     return index;
00864 }
00865 
00866 static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
00867 {
00868     WORD high, low4, index;
00869     WORD main_index[256];
00870     WORD temp_subtable[16];
00871     WORD *packed_table;
00872     WORD *subtable_src, *subtable_dst;
00873 
00874     memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
00875     number_of_subtables_with_diffs = 0;
00876 
00877     memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
00878     number_of_subtables_with_offsets = 0;
00879 
00880     for(high = 0; high < 256; high++) {
00881     for(low4 = 0; low4 < 256; low4 += 16) {
00882         index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
00883 
00884         temp_subtable[low4 >> 4] = index;
00885     }
00886 
00887     index = find_subtable_with_offsets(temp_subtable);
00888     main_index[high] = index;
00889     }
00890 
00891     *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
00892     packed_table = calloc(*packed_size_in_words, sizeof(WORD));
00893 
00894     /* fill main index according to the subtables_with_offsets */
00895     for(high = 0; high < 256; high++) {
00896     packed_table[high] = 0x100 + main_index[high] * 16;
00897     }
00898 
00899     //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
00900 
00901     /* fill subtable index according to the subtables_with_diffs */
00902     for(index = 0; index < number_of_subtables_with_offsets; index++) {
00903     subtable_dst = packed_table + 0x100 + index * 16;
00904     subtable_src = &subtables_with_offsets[index * 16];
00905 
00906     for(low4 = 0; low4 < 16; low4++) {
00907         subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
00908     }
00909     }
00910 
00911 
00912     for(index = 0; index < number_of_subtables_with_diffs; index++) {
00913     subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
00914     memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
00915 
00916     }
00917 
00918 
00919     test_packed_table(packed_table);
00920 
00921     return packed_table;
00922 }
00923 
00924 int write_casemap_file(void)
00925 {
00926     WORD packed_size_in_words, offset_to_next_table_in_words;
00927     WORD *packed_table, value;
00928     FILE *file;
00929 
00930     if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
00931     return -1;
00932 
00933     file = fopen(NLSDIR"/l_intl.nls", "wb");
00934 
00935     /* write version number */
00936     value = 1;
00937     fwrite(&value, 1, sizeof(WORD), file);
00938 
00939     /* pack upper case table */
00940     packed_table = pack_table(to_upper_org, &packed_size_in_words);
00941     offset_to_next_table_in_words = packed_size_in_words + 1;
00942     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
00943     /* write packed upper case table */
00944     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
00945     free(packed_table);
00946 
00947     /* pack lower case table */
00948     packed_table = pack_table(to_lower_org, &packed_size_in_words);
00949     offset_to_next_table_in_words = packed_size_in_words + 1;
00950     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
00951     /* write packed lower case table */
00952     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
00953     free(packed_table);
00954 
00955     fclose(file);
00956 
00957     free(to_upper_org);
00958     free(to_lower_org);
00959 
00960     return 0;
00961 }
00962 
00963 int main()
00964 {
00965     write_nls_files();
00966     write_casemap_file();
00967 
00968     return 0;
00969 }

Generated on Sun May 27 2012 04:37:45 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.