ReactOS 0.4.15-dev-6068-g8061a6f
main.cpp
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS TXT to NLS Converter
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * FILE: sdk/tools/txt2nls/main.c
5 * COPYRIGHT: Copyright 2021 Jérôme Gardou <jerome.gardou@reactos.org>
6 */
7
8#include <iostream>
9#include <fstream>
10#include <limits>
11#include <vector>
12#include <cstring>
13#include <string>
14
15static const char whitespaces[] = " \t\f\v\n\r";
16static long line_number = -1;
17
18#pragma pack(push, 1)
19#define MAXIMUM_LEADBYTES 12
21{
30};
31static_assert(sizeof(NLS_FILE_HEADER) == 26, "Wrong size for NLS_FILE_HEADER");
32#pragma pack(pop)
33
34static std::istream& get_clean_line(std::istream& stream, std::string& str)
35{
36 do
37 {
38 std::istream& ret = std::getline(stream, str);
39 if (!ret)
40 return ret;
41
42 /* Ignore comments */
43 std::size_t comment_pos = str.find_first_of(';');
44 if (comment_pos != std::string::npos)
45 {
46 str.erase(comment_pos);
47 }
48
49 /* Remove trailing spaces */
50 std::size_t end_of_line = str.find_last_not_of(whitespaces);
51 if (end_of_line != std::string::npos)
52 str.erase(end_of_line + 1);
53 else
54 str.clear();
55
57 } while (str.empty());
58
59 return stream;
60}
61
62static void tokenize(std::string& str, std::string& token)
63{
64 std::size_t token_start = str.find_first_not_of(whitespaces);
65 if (token_start == std::string::npos)
66 {
67 token = "";
68 str.clear();
69 return;
70 }
71
72 std::size_t token_end = str.find_first_of(whitespaces, token_start);
73 if (token_end == std::string::npos)
74 {
75 token = str.substr(token_start);
76 str.clear();
77 return;
78 }
79
80 token = str.substr(token_start, token_end);
81 str.erase(0, str.find_first_not_of(whitespaces, token_end));
82}
83
84template<typename T>
85static void tokenize(std::string& str, T& int_token, int base = 0)
86{
87 std::string token;
89
90 long val;
91 val = std::stol(token, nullptr, base);
92 if ((val > std::numeric_limits<T>::max()) || (val < std::numeric_limits<T>::min()))
93 throw std::invalid_argument(token + " does not fit range ["
94 + std::to_string(std::numeric_limits<T>::min()) + ":" + std::to_string(std::numeric_limits<T>::max()) + "]");
95
96 int_token = val;
97}
98
99void error(const std::string& err)
100{
101 std::cerr << "Error parsing line " << line_number <<": " << err << std::endl;
102 std::exit(1);
103}
104
105int main(int argc, char* argv[])
106{
107 if (argc != 3)
108 {
109 std::cerr << "Usage: " << argv[0] << " <txt_in> <nls_out>" << std::endl;
110 return 1;
111 }
112
113 std::ifstream input(argv[1]);
114 if (!input.is_open())
115 {
116 std::cerr << "Unable to open " << argv[1] << std::endl;
117 return 1;
118 }
119
120 NLS_FILE_HEADER FileHeader;
121 memset(&FileHeader, 0, sizeof(FileHeader));
122
123 std::string curr_line;
124 // Get code page
125 if (!get_clean_line(input, curr_line))
126 {
127 std::cerr << "ERROR: File is empty" << std::endl;
128 return 1;
129 }
130
131 std::string token;
132 tokenize(curr_line, token);
133 if (token != "CODEPAGE")
134 error("expected CODEPAGE, got \"" + token + "\" instead");
135 try
136 {
137 tokenize(curr_line, FileHeader.CodePage, 10);
138 }
139 catch(const std::invalid_argument& ia)
140 {
141 error(ia.what());
142 }
143
144 if (!curr_line.empty())
145 error("Garbage after CODEPAGE statement: \"" + curr_line + "\"");
146
147 /* Get CPINFO */
148 if (!get_clean_line(input, curr_line))
149 error("Nothing after CODEPAGE statement");
150
151 tokenize(curr_line, token);
152 if (token != "CPINFO")
153 error("Expected CPINFO, got \"" + token + "\" instead");
154 try
155 {
156 tokenize(curr_line, FileHeader.MaximumCharacterSize);
157 tokenize(curr_line, FileHeader.DefaultChar);
158 tokenize(curr_line, FileHeader.UniDefaultChar);
159 }
160 catch(const std::invalid_argument& ia)
161 {
162 error(ia.what());
163 return 1;
164 }
165 if (!curr_line.empty())
166 error("Garbage after CPINFO statement: \"" + curr_line + "\"");
167 if ((FileHeader.MaximumCharacterSize != 1) && (FileHeader.MaximumCharacterSize != 2))
168 error("Expected 1 or 2 as max char size in CPINFO, got \"" + std::to_string(FileHeader.MaximumCharacterSize) + "\" instead");
169 if ((FileHeader.MaximumCharacterSize == 1) && (FileHeader.DefaultChar > std::numeric_limits<uint8_t>::max()))
170 error("Default MB character " + std::to_string(FileHeader.DefaultChar) + " doesn't fit in a 8-bit value");
171
172 /* Setup tables & default values */
173 bool has_mbtable = false;
174 uint16_t mb_table[256] = {0};
175
176 bool has_wctable = false;
177 uint8_t* wc_table = new uint8_t[65536 * FileHeader.MaximumCharacterSize];
178 if (FileHeader.MaximumCharacterSize == 1)
179 {
180 for (int i = 0; i < 65536; i++)
181 wc_table[i] = FileHeader.DefaultChar;
182 }
183 else
184 {
185 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
186 for (int i = 0; i < 65536; i++)
187 wc_table_dbcs[i] = FileHeader.DefaultChar;
188 }
189
190 std::vector<uint16_t> dbcs_table;
191 uint16_t lb_offsets[256] = {0};
192 uint16_t dbcs_range_count = 0;
193
194 uint16_t glyph_table[256] = {0};
195 bool has_glyphs = false;
196
197 /* Now parse */
198 while (get_clean_line(input, curr_line))
199 {
200 tokenize(curr_line, token);
201
202 if (token == "ENDCODEPAGE")
203 {
204 if (!curr_line.empty())
205 error("Garbage after ENDCODEPAGE statement: \"" + curr_line + "\"");
206 break;
207 }
208 else if (token == "MBTABLE")
209 {
211 try
212 {
213 tokenize(curr_line, table_size);
214 }
215 catch(const std::invalid_argument& ia)
216 {
217 error(ia.what());
218 }
219 if (has_mbtable)
220 error("MBTABLE can only be declared once");
221 if (table_size > 256)
222 error("MBTABLE size can't be larger than 256");
223 if (!curr_line.empty())
224 error("Garbage after MBTABLE statement: \"" + curr_line + "\"");
225
226 has_mbtable = true;
227 while (table_size--)
228 {
229 if (!get_clean_line(input, curr_line))
230 error("Expected " + std::to_string(table_size + 1) + " more lines after MBTABLE token");
231
232 uint8_t mb;
233 uint16_t wc;
234
235 try
236 {
237 tokenize(curr_line, mb);
238 tokenize(curr_line, wc);
239 }
240 catch(const std::invalid_argument& ia)
241 {
242 error(ia.what());
243 }
244 if (!curr_line.empty())
245 error("Garbage after MBTABLE entry: \"" + curr_line + "\"");
246 mb_table[mb] = wc;
247 }
248 }
249 else if (token == "WCTABLE")
250 {
252 try
253 {
254 tokenize(curr_line, table_size);
255 }
256 catch(const std::invalid_argument& ia)
257 {
258 error(ia.what());
259 }
260 if (has_wctable)
261 error("WCTABLE can only be declared once");
262 if (!curr_line.empty())
263 error("Garbage after WCTABLE statement: \"" + curr_line + "\"");
264 if (table_size > 65536)
265 error("WCTABLE size can't be larger than 65536");
266
267 has_wctable = true;
268
269 if (FileHeader.MaximumCharacterSize == 1)
270 {
271 while (table_size--)
272 {
273 if (!get_clean_line(input, curr_line))
274 error("Expected " + std::to_string(table_size + 1) + " more lines after WCTABLE token");
275
276 uint8_t mb;
277 uint16_t wc;
278
279 try
280 {
281 tokenize(curr_line, wc);
282 tokenize(curr_line, mb);
283 }
284 catch(const std::invalid_argument& ia)
285 {
286 error(ia.what());
287 }
288 if (!curr_line.empty())
289 error("Garbage after WCTABLE entry: \"" + curr_line + "\"");
290 wc_table[wc] = mb;
291 }
292 }
293 else
294 {
295 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
296 while (table_size--)
297 {
298 if (!get_clean_line(input, curr_line))
299 error("Expected " + std::to_string(table_size + 1) + " more lines after WCTABLE token");
300 uint16_t mb;
301 uint16_t wc;
302
303 try
304 {
305 tokenize(curr_line, wc);
306 tokenize(curr_line, mb);
307 }
308 catch(const std::invalid_argument& ia)
309 {
310 error(ia.what());
311 }
312 if (!curr_line.empty())
313 error("Garbage after MBTABLE entry: \"" + curr_line + "\"");
314 wc_table_dbcs[wc] = mb;
315 }
316 }
317 }
318 else if (token == "DBCSRANGE")
319 {
320 if (dbcs_range_count != 0)
321 error("DBCSRANGE can only be declared once");
322
323 try
324 {
325 tokenize(curr_line, dbcs_range_count);
326 }
327 catch(const std::invalid_argument& ia)
328 {
329 error(ia.what());
330 }
331 if (dbcs_range_count > (MAXIMUM_LEADBYTES / 2))
332 error("DBCSRANGE count can't exceed " + std::to_string(MAXIMUM_LEADBYTES / 2));
333 if (!curr_line.empty())
334 error("Garbage after DBCSRANGE token");
335
336 std::size_t current_offset = 0;
337
338 uint16_t range_count = dbcs_range_count;
339 uint16_t current_range = 0;
340 while (range_count--)
341 {
342 if (!get_clean_line(input, curr_line))
343 error("Expected new range after DBCSRANGE");
344
345 uint8_t RangeStart, RangeEnd;
346 try
347 {
348 tokenize(curr_line, RangeStart);
349 tokenize(curr_line, RangeEnd);
350 }
351 catch(const std::invalid_argument& ia)
352 {
353 error(ia.what());
354 }
355 if (!curr_line.empty())
356 error("Garbage after DBCS range declaration");
357
358 if (RangeStart > RangeEnd)
359 error("Invalid range specified for DBCSRANGE");
360
361 FileHeader.LeadByte[current_range*2] = RangeStart;
362 FileHeader.LeadByte[current_range*2+1] = RangeEnd;
363 current_range++;
364
365 dbcs_table.resize(dbcs_table.size() + 256 * (RangeEnd - RangeStart + 1), FileHeader.UniDefaultChar);
366
367 for (uint8_t LeadByte = RangeStart; LeadByte <= RangeEnd; LeadByte++)
368 {
369 if (!get_clean_line(input, curr_line))
370 error("Expected new DBCSTABLE after DBCS range declaration");
371
372 tokenize(curr_line, token);
373 if (token != "DBCSTABLE")
374 error("Expected new DBCSTABLE after DBCS range declaration");
375
377 try
378 {
379 tokenize(curr_line, table_size);
380 }
381 catch(const std::invalid_argument& ia)
382 {
383 error(ia.what());
384 }
385 if (table_size > 256)
386 error("DBCSTABLE can't have more than 256 entries");
387 while (table_size--)
388 {
389 if (!get_clean_line(input, curr_line))
390 error("Expected " + std::to_string(table_size + 1) + " more lines after DBCSTABLE token");
391
392 uint8_t mb;
393 uint16_t wc;
394
395 try
396 {
397 tokenize(curr_line, mb);
398 tokenize(curr_line, wc);
399 }
400 catch(const std::invalid_argument& ia)
401 {
402 error(ia.what());
403 }
404 if (!curr_line.empty())
405 error("Garbage after DBCSTABLE entry: \"" + curr_line + "\"");
406
407 dbcs_table[current_offset + mb] = wc;
408 }
409 current_offset += 256;
410 /* Offsets start at 256 for the offset table. */
411 lb_offsets[LeadByte] = current_offset;
412 }
413 }
414 }
415 else if (token == "GLYPHTABLE")
416 {
418 try
419 {
420 tokenize(curr_line, table_size);
421 }
422 catch(const std::invalid_argument& ia)
423 {
424 error(ia.what());
425 }
426 if (has_glyphs)
427 error("GLYPHTABLE can only be declared once");
428 if (table_size > 256)
429 error("GLYPHTABLE size can't be larger than 256");
430 if (!curr_line.empty())
431 error("Garbage after GLYPHTABLE statement: \"" + curr_line + "\"");
432 has_glyphs = true;
433
434 while (table_size--)
435 {
436 if (!get_clean_line(input, curr_line))
437 error("Expected " + std::to_string(table_size + 1) + " more lines after GLYPHTABLE token");
438
439 uint8_t mb;
440 uint16_t wc;
441
442 try
443 {
444 tokenize(curr_line, mb);
445 tokenize(curr_line, wc);
446 }
447 catch(const std::invalid_argument& ia)
448 {
449 error(ia.what());
450 }
451 if (!curr_line.empty())
452 error("Garbage after GLYPHTABLE entry: \"" + curr_line + "\"");
453 glyph_table[mb] = wc;
454 }
455 }
456 else
457 {
458 error("Unexpected token \"" + token + "\"");
459 }
460 }
461
462 if (token != "ENDCODEPAGE")
463 error("Expected last token to be \"ENDCODEPAGE\"");
464
465 input.close();
466
467 /* Ensure this is minimally workable */
468 if (!has_mbtable)
469 error("File has no MBTABLE statement");
470 if (!has_wctable)
471 error("File has no WCTABLE statement");
472
473 /* Glyph table fixup */
474 if (has_glyphs)
475 {
476 for(int i = 0; i < 256; i++)
477 {
478 if (glyph_table[i] == 0)
479 glyph_table[i] = mb_table[i];
480 }
481 }
482
483 /* Translated default char fixup */
484 if (FileHeader.MaximumCharacterSize == 1)
485 {
486 FileHeader.TransDefaultChar = mb_table[FileHeader.DefaultChar];
487 FileHeader.TransUniDefaultChar = wc_table[FileHeader.UniDefaultChar];
488 }
489 else
490 {
491 if (FileHeader.DefaultChar > 0xFF)
492 {
493 uint16_t offset = lb_offsets[FileHeader.DefaultChar >> 8];
494 if (!offset)
495 error("Default MB char is not translatable!");
496 FileHeader.TransDefaultChar = dbcs_table[(FileHeader.DefaultChar & 0xFF) + (offset - 256)];
497 }
498 else
499 {
500 FileHeader.TransDefaultChar = mb_table[FileHeader.DefaultChar];
501 }
502 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
503 FileHeader.TransUniDefaultChar = wc_table_dbcs[FileHeader.UniDefaultChar];
504 }
505 FileHeader.HeaderSize = sizeof(NLS_FILE_HEADER) / sizeof(uint16_t);
506
507 std::ofstream output(argv[2], std::ios_base::binary);
508
509 output.write(reinterpret_cast<char*>(&FileHeader), sizeof(FileHeader));
510
511 uint16_t wc_table_offset = sizeof(mb_table) / sizeof(uint16_t)
512 + 1 /* size of glyph table */
513 + (has_glyphs ? 256 : 0) /* Glyph table */
514 + 1 /* Number of DBCS LeadByte ranges */
515 + (dbcs_range_count ? 256 : 0) /* offsets of lead byte sub tables */
516 + dbcs_table.size() /* LeadByte sub tables */
517 + 1; /* Unknown flag */
518
519 output.write(reinterpret_cast<char*>(&wc_table_offset), sizeof(wc_table_offset));
520
521 output.write(reinterpret_cast<char*>(mb_table), sizeof(mb_table));
522
523 uint16_t glyph_table_size = has_glyphs ? 256 : 0;
524 output.write(reinterpret_cast<char*>(&glyph_table_size), sizeof(glyph_table_size));
525 if (has_glyphs)
526 output.write(reinterpret_cast<char*>(glyph_table), sizeof(glyph_table));
527
528 output.write(reinterpret_cast<char*>(&dbcs_range_count), sizeof(dbcs_range_count));
529 if (dbcs_range_count)
530 {
531 output.write(reinterpret_cast<char*>(lb_offsets), sizeof(lb_offsets));
532 }
533 if (dbcs_table.size())
534 {
535 output.write(reinterpret_cast<char*>(dbcs_table.data()), dbcs_table.size() * sizeof(uint16_t));
536 }
537
538 uint16_t unknown_flag = FileHeader.MaximumCharacterSize == 1 ? 0 : 4;
539 output.write(reinterpret_cast<char*>(&unknown_flag), sizeof(unknown_flag));
540
541 output.write(reinterpret_cast<char*>(wc_table), 65536 * FileHeader.MaximumCharacterSize);
542
543 output.close();
544 delete[] wc_table;
545
546 return 0;
547}
static int argc
Definition: ServiceArgs.c:12
unsigned short int uint16_t
Definition: acefiex.h:54
UINT32 uint32_t
Definition: types.h:75
int main()
Definition: test.c:6
GLuint GLfloat * val
Definition: glext.h:7180
GLenum GLenum GLenum input
Definition: glext.h:9031
GLintptr offset
Definition: glext.h:5920
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat token
Definition: glfuncs.h:210
#define T
Definition: mbstring.h:31
#define error(str)
Definition: mkdosfs.c:1605
#define argv
Definition: mplay32.c:18
BYTE uint8_t
Definition: msvideo1.c:66
#define uint16_t
Definition: nsiface.idl:60
#define err(...)
const WCHAR * str
#define memset(x, y, z)
Definition: compat.h:39
LOCAL int table_size
Definition: write.c:65
#define MAXIMUM_LEADBYTES
Definition: main.cpp:19
static std::istream & get_clean_line(std::istream &stream, std::string &str)
Definition: main.cpp:34
static const char whitespaces[]
Definition: main.cpp:15
static long line_number
Definition: main.cpp:16
static void tokenize(std::string &str, std::string &token)
Definition: main.cpp:62
uint16_t TransUniDefaultChar
Definition: main.cpp:28
uint16_t TransDefaultChar
Definition: main.cpp:27
uint16_t MaximumCharacterSize
Definition: main.cpp:24
uint16_t UniDefaultChar
Definition: main.cpp:26
uint16_t CodePage
Definition: main.cpp:23
uint8_t LeadByte[MAXIMUM_LEADBYTES]
Definition: main.cpp:29
uint16_t DefaultChar
Definition: main.cpp:25
uint16_t HeaderSize
Definition: main.cpp:22
Definition: parse.h:23
int ret