ReactOS 0.4.16-dev-319-g6cf4263
main.cpp
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS TXT to NLS Converter
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * FILE: sdk/tools/txt2nls/main.c
5 * COPYRIGHT: Copyright 2021 Jérôme Gardou <jerome.gardou@reactos.org>
6 */
7
8#include <iostream>
9#include <fstream>
10#include <limits>
11#include <vector>
12#include <cstring>
13#include <string>
14#include <cstdint>
15
16static const char whitespaces[] = " \t\f\v\n\r";
17static long line_number = -1;
18
19#pragma pack(push, 1)
20#define MAXIMUM_LEADBYTES 12
22{
31};
32static_assert(sizeof(NLS_FILE_HEADER) == 26, "Wrong size for NLS_FILE_HEADER");
33#pragma pack(pop)
34
35static std::istream& get_clean_line(std::istream& stream, std::string& str)
36{
37 do
38 {
39 std::istream& ret = std::getline(stream, str);
40 if (!ret)
41 return ret;
42
43 /* Ignore comments */
44 std::size_t comment_pos = str.find_first_of(';');
45 if (comment_pos != std::string::npos)
46 {
47 str.erase(comment_pos);
48 }
49
50 /* Remove trailing spaces */
51 std::size_t end_of_line = str.find_last_not_of(whitespaces);
52 if (end_of_line != std::string::npos)
53 str.erase(end_of_line + 1);
54 else
55 str.clear();
56
58 } while (str.empty());
59
60 return stream;
61}
62
63static void tokenize(std::string& str, std::string& token)
64{
65 std::size_t token_start = str.find_first_not_of(whitespaces);
66 if (token_start == std::string::npos)
67 {
68 token = "";
69 str.clear();
70 return;
71 }
72
73 std::size_t token_end = str.find_first_of(whitespaces, token_start);
74 if (token_end == std::string::npos)
75 {
76 token = str.substr(token_start);
77 str.clear();
78 return;
79 }
80
81 token = str.substr(token_start, token_end);
82 str.erase(0, str.find_first_not_of(whitespaces, token_end));
83}
84
85template<typename T>
86static void tokenize(std::string& str, T& int_token, int base = 0)
87{
88 std::string token;
90
91 long val;
92 val = std::stol(token, nullptr, base);
93 if ((val > std::numeric_limits<T>::max()) || (val < std::numeric_limits<T>::min()))
94 throw std::invalid_argument(token + " does not fit range ["
95 + std::to_string(std::numeric_limits<T>::min()) + ":" + std::to_string(std::numeric_limits<T>::max()) + "]");
96
97 int_token = val;
98}
99
100void error(const std::string& err)
101{
102 std::cerr << "Error parsing line " << line_number <<": " << err << std::endl;
103 std::exit(1);
104}
105
106int main(int argc, char* argv[])
107{
108 if (argc != 3)
109 {
110 std::cerr << "Usage: " << argv[0] << " <txt_in> <nls_out>" << std::endl;
111 return 1;
112 }
113
114 std::ifstream input(argv[1]);
115 if (!input.is_open())
116 {
117 std::cerr << "Unable to open " << argv[1] << std::endl;
118 return 1;
119 }
120
121 NLS_FILE_HEADER FileHeader;
122 memset(&FileHeader, 0, sizeof(FileHeader));
123
124 std::string curr_line;
125 // Get code page
126 if (!get_clean_line(input, curr_line))
127 {
128 std::cerr << "ERROR: File is empty" << std::endl;
129 return 1;
130 }
131
132 std::string token;
133 tokenize(curr_line, token);
134 if (token != "CODEPAGE")
135 error("expected CODEPAGE, got \"" + token + "\" instead");
136 try
137 {
138 tokenize(curr_line, FileHeader.CodePage, 10);
139 }
140 catch(const std::invalid_argument& ia)
141 {
142 error(ia.what());
143 }
144
145 if (!curr_line.empty())
146 error("Garbage after CODEPAGE statement: \"" + curr_line + "\"");
147
148 /* Get CPINFO */
149 if (!get_clean_line(input, curr_line))
150 error("Nothing after CODEPAGE statement");
151
152 tokenize(curr_line, token);
153 if (token != "CPINFO")
154 error("Expected CPINFO, got \"" + token + "\" instead");
155 try
156 {
157 tokenize(curr_line, FileHeader.MaximumCharacterSize);
158 tokenize(curr_line, FileHeader.DefaultChar);
159 tokenize(curr_line, FileHeader.UniDefaultChar);
160 }
161 catch(const std::invalid_argument& ia)
162 {
163 error(ia.what());
164 return 1;
165 }
166 if (!curr_line.empty())
167 error("Garbage after CPINFO statement: \"" + curr_line + "\"");
168 if ((FileHeader.MaximumCharacterSize != 1) && (FileHeader.MaximumCharacterSize != 2))
169 error("Expected 1 or 2 as max char size in CPINFO, got \"" + std::to_string(FileHeader.MaximumCharacterSize) + "\" instead");
170 if ((FileHeader.MaximumCharacterSize == 1) && (FileHeader.DefaultChar > std::numeric_limits<uint8_t>::max()))
171 error("Default MB character " + std::to_string(FileHeader.DefaultChar) + " doesn't fit in a 8-bit value");
172
173 /* Setup tables & default values */
174 bool has_mbtable = false;
175 uint16_t mb_table[256] = {0};
176
177 bool has_wctable = false;
178 uint8_t* wc_table = new uint8_t[65536 * FileHeader.MaximumCharacterSize];
179 if (FileHeader.MaximumCharacterSize == 1)
180 {
181 for (int i = 0; i < 65536; i++)
182 wc_table[i] = FileHeader.DefaultChar;
183 }
184 else
185 {
186 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
187 for (int i = 0; i < 65536; i++)
188 wc_table_dbcs[i] = FileHeader.DefaultChar;
189 }
190
191 std::vector<uint16_t> dbcs_table;
192 uint16_t lb_offsets[256] = {0};
193 uint16_t dbcs_range_count = 0;
194
195 uint16_t glyph_table[256] = {0};
196 bool has_glyphs = false;
197
198 /* Now parse */
199 while (get_clean_line(input, curr_line))
200 {
201 tokenize(curr_line, token);
202
203 if (token == "ENDCODEPAGE")
204 {
205 if (!curr_line.empty())
206 error("Garbage after ENDCODEPAGE statement: \"" + curr_line + "\"");
207 break;
208 }
209 else if (token == "MBTABLE")
210 {
212 try
213 {
214 tokenize(curr_line, table_size);
215 }
216 catch(const std::invalid_argument& ia)
217 {
218 error(ia.what());
219 }
220 if (has_mbtable)
221 error("MBTABLE can only be declared once");
222 if (table_size > 256)
223 error("MBTABLE size can't be larger than 256");
224 if (!curr_line.empty())
225 error("Garbage after MBTABLE statement: \"" + curr_line + "\"");
226
227 has_mbtable = true;
228 while (table_size--)
229 {
230 if (!get_clean_line(input, curr_line))
231 error("Expected " + std::to_string(table_size + 1) + " more lines after MBTABLE token");
232
233 uint8_t mb;
234 uint16_t wc;
235
236 try
237 {
238 tokenize(curr_line, mb);
239 tokenize(curr_line, wc);
240 }
241 catch(const std::invalid_argument& ia)
242 {
243 error(ia.what());
244 }
245 if (!curr_line.empty())
246 error("Garbage after MBTABLE entry: \"" + curr_line + "\"");
247 mb_table[mb] = wc;
248 }
249 }
250 else if (token == "WCTABLE")
251 {
253 try
254 {
255 tokenize(curr_line, table_size);
256 }
257 catch(const std::invalid_argument& ia)
258 {
259 error(ia.what());
260 }
261 if (has_wctable)
262 error("WCTABLE can only be declared once");
263 if (!curr_line.empty())
264 error("Garbage after WCTABLE statement: \"" + curr_line + "\"");
265 if (table_size > 65536)
266 error("WCTABLE size can't be larger than 65536");
267
268 has_wctable = true;
269
270 if (FileHeader.MaximumCharacterSize == 1)
271 {
272 while (table_size--)
273 {
274 if (!get_clean_line(input, curr_line))
275 error("Expected " + std::to_string(table_size + 1) + " more lines after WCTABLE token");
276
277 uint8_t mb;
278 uint16_t wc;
279
280 try
281 {
282 tokenize(curr_line, wc);
283 tokenize(curr_line, mb);
284 }
285 catch(const std::invalid_argument& ia)
286 {
287 error(ia.what());
288 }
289 if (!curr_line.empty())
290 error("Garbage after WCTABLE entry: \"" + curr_line + "\"");
291 wc_table[wc] = mb;
292 }
293 }
294 else
295 {
296 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
297 while (table_size--)
298 {
299 if (!get_clean_line(input, curr_line))
300 error("Expected " + std::to_string(table_size + 1) + " more lines after WCTABLE token");
301 uint16_t mb;
302 uint16_t wc;
303
304 try
305 {
306 tokenize(curr_line, wc);
307 tokenize(curr_line, mb);
308 }
309 catch(const std::invalid_argument& ia)
310 {
311 error(ia.what());
312 }
313 if (!curr_line.empty())
314 error("Garbage after MBTABLE entry: \"" + curr_line + "\"");
315 wc_table_dbcs[wc] = mb;
316 }
317 }
318 }
319 else if (token == "DBCSRANGE")
320 {
321 if (dbcs_range_count != 0)
322 error("DBCSRANGE can only be declared once");
323
324 try
325 {
326 tokenize(curr_line, dbcs_range_count);
327 }
328 catch(const std::invalid_argument& ia)
329 {
330 error(ia.what());
331 }
332 if (dbcs_range_count > (MAXIMUM_LEADBYTES / 2))
333 error("DBCSRANGE count can't exceed " + std::to_string(MAXIMUM_LEADBYTES / 2));
334 if (!curr_line.empty())
335 error("Garbage after DBCSRANGE token");
336
337 std::size_t current_offset = 0;
338
339 uint16_t range_count = dbcs_range_count;
340 uint16_t current_range = 0;
341 while (range_count--)
342 {
343 if (!get_clean_line(input, curr_line))
344 error("Expected new range after DBCSRANGE");
345
346 uint8_t RangeStart, RangeEnd;
347 try
348 {
349 tokenize(curr_line, RangeStart);
350 tokenize(curr_line, RangeEnd);
351 }
352 catch(const std::invalid_argument& ia)
353 {
354 error(ia.what());
355 }
356 if (!curr_line.empty())
357 error("Garbage after DBCS range declaration");
358
359 if (RangeStart > RangeEnd)
360 error("Invalid range specified for DBCSRANGE");
361
362 FileHeader.LeadByte[current_range*2] = RangeStart;
363 FileHeader.LeadByte[current_range*2+1] = RangeEnd;
364 current_range++;
365
366 dbcs_table.resize(dbcs_table.size() + 256 * (RangeEnd - RangeStart + 1), FileHeader.UniDefaultChar);
367
368 for (uint8_t LeadByte = RangeStart; LeadByte <= RangeEnd; LeadByte++)
369 {
370 if (!get_clean_line(input, curr_line))
371 error("Expected new DBCSTABLE after DBCS range declaration");
372
373 tokenize(curr_line, token);
374 if (token != "DBCSTABLE")
375 error("Expected new DBCSTABLE after DBCS range declaration");
376
378 try
379 {
380 tokenize(curr_line, table_size);
381 }
382 catch(const std::invalid_argument& ia)
383 {
384 error(ia.what());
385 }
386 if (table_size > 256)
387 error("DBCSTABLE can't have more than 256 entries");
388 while (table_size--)
389 {
390 if (!get_clean_line(input, curr_line))
391 error("Expected " + std::to_string(table_size + 1) + " more lines after DBCSTABLE token");
392
393 uint8_t mb;
394 uint16_t wc;
395
396 try
397 {
398 tokenize(curr_line, mb);
399 tokenize(curr_line, wc);
400 }
401 catch(const std::invalid_argument& ia)
402 {
403 error(ia.what());
404 }
405 if (!curr_line.empty())
406 error("Garbage after DBCSTABLE entry: \"" + curr_line + "\"");
407
408 dbcs_table[current_offset + mb] = wc;
409 }
410 current_offset += 256;
411 /* Offsets start at 256 for the offset table. */
412 lb_offsets[LeadByte] = current_offset;
413 }
414 }
415 }
416 else if (token == "GLYPHTABLE")
417 {
419 try
420 {
421 tokenize(curr_line, table_size);
422 }
423 catch(const std::invalid_argument& ia)
424 {
425 error(ia.what());
426 }
427 if (has_glyphs)
428 error("GLYPHTABLE can only be declared once");
429 if (table_size > 256)
430 error("GLYPHTABLE size can't be larger than 256");
431 if (!curr_line.empty())
432 error("Garbage after GLYPHTABLE statement: \"" + curr_line + "\"");
433 has_glyphs = true;
434
435 while (table_size--)
436 {
437 if (!get_clean_line(input, curr_line))
438 error("Expected " + std::to_string(table_size + 1) + " more lines after GLYPHTABLE token");
439
440 uint8_t mb;
441 uint16_t wc;
442
443 try
444 {
445 tokenize(curr_line, mb);
446 tokenize(curr_line, wc);
447 }
448 catch(const std::invalid_argument& ia)
449 {
450 error(ia.what());
451 }
452 if (!curr_line.empty())
453 error("Garbage after GLYPHTABLE entry: \"" + curr_line + "\"");
454 glyph_table[mb] = wc;
455 }
456 }
457 else
458 {
459 error("Unexpected token \"" + token + "\"");
460 }
461 }
462
463 if (token != "ENDCODEPAGE")
464 error("Expected last token to be \"ENDCODEPAGE\"");
465
466 input.close();
467
468 /* Ensure this is minimally workable */
469 if (!has_mbtable)
470 error("File has no MBTABLE statement");
471 if (!has_wctable)
472 error("File has no WCTABLE statement");
473
474 /* Glyph table fixup */
475 if (has_glyphs)
476 {
477 for(int i = 0; i < 256; i++)
478 {
479 if (glyph_table[i] == 0)
480 glyph_table[i] = mb_table[i];
481 }
482 }
483
484 /* Translated default char fixup */
485 if (FileHeader.MaximumCharacterSize == 1)
486 {
487 FileHeader.TransDefaultChar = mb_table[FileHeader.DefaultChar];
488 FileHeader.TransUniDefaultChar = wc_table[FileHeader.UniDefaultChar];
489 }
490 else
491 {
492 if (FileHeader.DefaultChar > 0xFF)
493 {
494 uint16_t offset = lb_offsets[FileHeader.DefaultChar >> 8];
495 if (!offset)
496 error("Default MB char is not translatable!");
497 FileHeader.TransDefaultChar = dbcs_table[(FileHeader.DefaultChar & 0xFF) + (offset - 256)];
498 }
499 else
500 {
501 FileHeader.TransDefaultChar = mb_table[FileHeader.DefaultChar];
502 }
503 uint16_t* wc_table_dbcs = reinterpret_cast<uint16_t*>(wc_table);
504 FileHeader.TransUniDefaultChar = wc_table_dbcs[FileHeader.UniDefaultChar];
505 }
506 FileHeader.HeaderSize = sizeof(NLS_FILE_HEADER) / sizeof(uint16_t);
507
508 std::ofstream output(argv[2], std::ios_base::binary);
509
510 output.write(reinterpret_cast<char*>(&FileHeader), sizeof(FileHeader));
511
512 uint16_t wc_table_offset = sizeof(mb_table) / sizeof(uint16_t)
513 + 1 /* size of glyph table */
514 + (has_glyphs ? 256 : 0) /* Glyph table */
515 + 1 /* Number of DBCS LeadByte ranges */
516 + (dbcs_range_count ? 256 : 0) /* offsets of lead byte sub tables */
517 + dbcs_table.size() /* LeadByte sub tables */
518 + 1; /* Unknown flag */
519
520 output.write(reinterpret_cast<char*>(&wc_table_offset), sizeof(wc_table_offset));
521
522 output.write(reinterpret_cast<char*>(mb_table), sizeof(mb_table));
523
524 uint16_t glyph_table_size = has_glyphs ? 256 : 0;
525 output.write(reinterpret_cast<char*>(&glyph_table_size), sizeof(glyph_table_size));
526 if (has_glyphs)
527 output.write(reinterpret_cast<char*>(glyph_table), sizeof(glyph_table));
528
529 output.write(reinterpret_cast<char*>(&dbcs_range_count), sizeof(dbcs_range_count));
530 if (dbcs_range_count)
531 {
532 output.write(reinterpret_cast<char*>(lb_offsets), sizeof(lb_offsets));
533 }
534 if (dbcs_table.size())
535 {
536 output.write(reinterpret_cast<char*>(dbcs_table.data()), dbcs_table.size() * sizeof(uint16_t));
537 }
538
539 uint16_t unknown_flag = FileHeader.MaximumCharacterSize == 1 ? 0 : 4;
540 output.write(reinterpret_cast<char*>(&unknown_flag), sizeof(unknown_flag));
541
542 output.write(reinterpret_cast<char*>(wc_table), 65536 * FileHeader.MaximumCharacterSize);
543
544 output.close();
545 delete[] wc_table;
546
547 return 0;
548}
static int argc
Definition: ServiceArgs.c:12
unsigned short int uint16_t
Definition: acefiex.h:54
UINT32 uint32_t
Definition: types.h:75
int main()
Definition: test.c:6
GLuint GLfloat * val
Definition: glext.h:7180
GLenum GLenum GLenum input
Definition: glext.h:9031
GLintptr offset
Definition: glext.h:5920
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat token
Definition: glfuncs.h:210
#define T
Definition: mbstring.h:31
#define error(str)
Definition: mkdosfs.c:1605
#define argv
Definition: mplay32.c:18
BYTE uint8_t
Definition: msvideo1.c:66
#define uint16_t
Definition: nsiface.idl:60
#define err(...)
const WCHAR * str
#define memset(x, y, z)
Definition: compat.h:39
LOCAL int table_size
Definition: write.c:65
#define MAXIMUM_LEADBYTES
Definition: main.cpp:20
static std::istream & get_clean_line(std::istream &stream, std::string &str)
Definition: main.cpp:35
static const char whitespaces[]
Definition: main.cpp:16
static long line_number
Definition: main.cpp:17
static void tokenize(std::string &str, std::string &token)
Definition: main.cpp:63
uint16_t TransUniDefaultChar
Definition: main.cpp:29
uint16_t TransDefaultChar
Definition: main.cpp:28
uint16_t MaximumCharacterSize
Definition: main.cpp:25
uint16_t UniDefaultChar
Definition: main.cpp:27
uint16_t CodePage
Definition: main.cpp:24
uint8_t LeadByte[MAXIMUM_LEADBYTES]
Definition: main.cpp:30
uint16_t DefaultChar
Definition: main.cpp:26
uint16_t HeaderSize
Definition: main.cpp:23
Definition: parse.h:23
int ret