ReactOS 0.4.16-dev-122-g325d74c
text.c
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS Notepad
3 * LICENSE: LGPL-2.1-or-later (https://spdx.org/licenses/LGPL-2.1-or-later)
4 * PURPOSE: Providing a Windows-compatible simple text editor for ReactOS
5 * COPYRIGHT: Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
6 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
7 * Copyright 2002 Andriy Palamarchuk
8 * Copyright 2019-2023 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
9 */
10
11#include "notepad.h"
12#include <assert.h>
13
15{
16 const signed char *pch = pText;
17 while (dwSize-- > 0)
18 {
19 if (*pch <= 0)
20 return FALSE;
21
22 ++pch;
23 }
24 return TRUE;
25}
26
28{
30
31 if (IsTextNonZeroASCII(pBytes, dwSize))
32 return ENCODING_DEFAULT;
33
34 if (IsTextUnicode(pBytes, dwSize, &flags))
35 return ENCODING_UTF16LE;
36
38 return ENCODING_UTF16BE;
39
40 /* is it UTF-8? */
42 return ENCODING_UTF8;
43
44 return ENCODING_ANSI;
45}
46
47static VOID
48ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
49{
50 BOOL bPrevCR = FALSE;
51 SIZE_T ichNew, ichOld;
52
53 for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld)
54 {
55 WCHAR ch = pszOld[ichOld];
56
57 if (ch == L'\n')
58 {
59 if (!bPrevCR)
60 {
61 pszNew[ichNew++] = L'\r';
62 pszNew[ichNew++] = L'\n';
63 }
64 }
65 else if (ch == '\r')
66 {
67 pszNew[ichNew++] = L'\r';
68 pszNew[ichNew++] = L'\n';
69 }
70 else
71 {
72 pszNew[ichNew++] = ch;
73 }
74
75 bPrevCR = (ch == L'\r');
76 }
77
78 pszNew[ichNew] = UNICODE_NULL;
79 assert(ichNew == cchNew);
80}
81
82static BOOL
83ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln)
84{
85 SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs;
86 LPWSTR pszText = *ppszText;
87 EOLN iEoln;
88 BOOL bPrevCR = FALSE;
89
90 /* Replace '\0' with SPACE. Count newlines. */
91 for (ich = 0; ich < cchText; ++ich)
92 {
93 WCHAR ch = pszText[ich];
94 if (ch == UNICODE_NULL)
95 pszText[ich] = L' ';
96
97 if (ch == L'\n')
98 {
99 if (bPrevCR)
100 {
101 adwEolnCount[EOLN_CR]--;
102 adwEolnCount[EOLN_CRLF]++;
103 }
104 else
105 {
106 adwEolnCount[EOLN_LF]++;
107 }
108 }
109 else if (ch == '\r')
110 {
111 adwEolnCount[EOLN_CR]++;
112 }
113
114 bPrevCR = (ch == L'\r');
115 }
116
117 /* Choose the newline code */
118 if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF])
119 iEoln = EOLN_CR;
120 else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF])
121 iEoln = EOLN_LF;
122 else
123 iEoln = EOLN_CRLF;
124
125 cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF];
126 if (cNonCRLFs != 0)
127 {
128 /* Allocate a buffer for EM_SETHANDLE */
129 SIZE_T cchNew = cchText + cNonCRLFs;
130 HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR));
131 LPWSTR pszNew = LocalLock(hLocal);
132 if (!pszNew)
133 {
134 LocalFree(hLocal);
135 return FALSE; /* Failure */
136 }
137
138 ReplaceNewLines(pszNew, cchNew, pszText, cchText);
139
140 /* Replace with new data */
141 LocalUnlock(*phLocal);
142 LocalFree(*phLocal);
143 *phLocal = hLocal;
144 *ppszText = pszNew;
145 *pcchText = cchNew;
146 }
147
148 *piEoln = iEoln;
149 return TRUE;
150}
151
152BOOL
153ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
154{
155 LPBYTE pBytes = NULL;
156 LPWSTR pszText, pszNewText = NULL;
157 DWORD dwSize, dwPos;
158 SIZE_T i, cchText, cbContent;
160 ENCODING encFile;
161 UINT iCodePage;
162 HANDLE hMapping = INVALID_HANDLE_VALUE;
163 HLOCAL hNewLocal;
164
167 goto done;
168
169 if (dwSize == 0) // If file is empty
170 {
171 hNewLocal = LocalReAlloc(*phLocal, sizeof(UNICODE_NULL), LMEM_MOVEABLE);
172 pszNewText = LocalLock(hNewLocal);
173 if (hNewLocal == NULL || pszNewText == NULL)
174 goto done;
175
176 *pszNewText = UNICODE_NULL;
177 LocalUnlock(hNewLocal);
178
179 *phLocal = hNewLocal;
180 *piEoln = EOLN_CRLF;
181 *pencFile = ENCODING_DEFAULT;
182 return TRUE;
183 }
184
185 hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
186 if (hMapping == NULL)
187 goto done;
188
189 pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize);
190 if (!pBytes)
191 goto done;
192
193 /* Look for Byte Order Marks */
194 dwPos = 0;
195 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
196 {
197 encFile = ENCODING_UTF16LE;
198 dwPos += 2;
199 }
200 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
201 {
202 encFile = ENCODING_UTF16BE;
203 dwPos += 2;
204 }
205 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
206 {
207 encFile = ENCODING_UTF8BOM;
208 dwPos += 3;
209 }
210 else
211 {
212 encFile = AnalyzeEncoding(pBytes, dwSize);
213 }
214
215 switch(encFile)
216 {
217 case ENCODING_UTF16BE:
218 case ENCODING_UTF16LE:
219 {
220 /* Re-allocate the buffer for EM_SETHANDLE */
221 pszText = (LPWSTR) &pBytes[dwPos];
222 cchText = (dwSize - dwPos) / sizeof(WCHAR);
223 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
224 pszNewText = LocalLock(hNewLocal);
225 if (pszNewText == NULL)
226 goto done;
227
228 *phLocal = hNewLocal;
229 CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR));
230
231 if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */
232 {
233 BYTE tmp, *pb = (LPBYTE)pszNewText;
234 for (i = 0; i < cchText * 2; i += 2)
235 {
236 tmp = pb[i];
237 pb[i] = pb[i + 1];
238 pb[i + 1] = tmp;
239 }
240 }
241 break;
242 }
243
244 case ENCODING_ANSI:
245 case ENCODING_UTF8:
246 case ENCODING_UTF8BOM:
247 {
248 iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8 : CP_ACP);
249
250 /* Get ready for ANSI-to-Wide conversion */
251 cbContent = dwSize - dwPos;
252 cchText = 0;
253 if (cbContent > 0)
254 {
255 cchText = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent, NULL, 0);
256 if (cchText == 0)
257 goto done;
258 }
259
260 /* Re-allocate the buffer for EM_SETHANDLE */
261 hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
262 pszNewText = LocalLock(hNewLocal);
263 if (!pszNewText)
264 goto done;
265 *phLocal = hNewLocal;
266
267 /* Do ANSI-to-Wide conversion */
268 if (cbContent > 0)
269 {
270 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], (INT)cbContent,
271 pszNewText, (INT)cchText))
272 {
273 goto done;
274 }
275 }
276 break;
277 }
278
280 }
281
282 pszNewText[cchText] = UNICODE_NULL;
283
284 if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln))
285 goto done;
286
287 *pencFile = encFile;
288 bSuccess = TRUE;
289
290done:
291 if (pBytes)
292 UnmapViewOfFile(pBytes);
293 if (hMapping != INVALID_HANDLE_VALUE)
294 CloseHandle(hMapping);
295 if (pszNewText)
296 LocalUnlock(*phLocal);
297 return bSuccess;
298}
299
300static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
301{
302 LPBYTE pBytes = NULL;
303 LPBYTE pAllocBuffer = NULL;
304 DWORD dwPos = 0;
305 DWORD dwByteCount;
306 BYTE buffer[1024];
307 UINT iCodePage = 0;
308 DWORD dwDummy, i;
310 int iBufferSize, iRequiredBytes;
311 BYTE b;
312
313 while(dwPos < dwTextLen)
314 {
315 switch(encFile)
316 {
317 case ENCODING_UTF16LE:
318 pBytes = (LPBYTE) &pszText[dwPos];
319 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
320 dwPos = dwTextLen;
321 break;
322
323 case ENCODING_UTF16BE:
324 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
325 if (dwByteCount > sizeof(buffer))
326 dwByteCount = sizeof(buffer);
327
328 memcpy(buffer, &pszText[dwPos], dwByteCount);
329 for (i = 0; i < dwByteCount; i += 2)
330 {
331 b = buffer[i+0];
332 buffer[i+0] = buffer[i+1];
333 buffer[i+1] = b;
334 }
335 pBytes = (LPBYTE) &buffer[dwPos];
336 dwPos += dwByteCount / sizeof(WCHAR);
337 break;
338
339 case ENCODING_ANSI:
340 case ENCODING_UTF8:
341 case ENCODING_UTF8BOM:
342 if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
343 iCodePage = CP_UTF8;
344 else
345 iCodePage = CP_ACP;
346
347 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
348 if (iRequiredBytes <= 0)
349 {
350 goto done;
351 }
352 else if (iRequiredBytes < sizeof(buffer))
353 {
354 pBytes = buffer;
355 iBufferSize = sizeof(buffer);
356 }
357 else
358 {
359 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
360 if (!pAllocBuffer)
361 return FALSE;
362 pBytes = pAllocBuffer;
363 iBufferSize = iRequiredBytes;
364 }
365
366 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
367 if (!dwByteCount)
368 goto done;
369
370 dwPos = dwTextLen;
371 break;
372
373 default:
374 goto done;
375 }
376
377 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
378 goto done;
379
380 /* free the buffer, if we have allocated one */
381 if (pAllocBuffer)
382 {
383 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
384 pAllocBuffer = NULL;
385 }
386 }
387 bSuccess = TRUE;
388
389done:
390 if (pAllocBuffer)
391 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
392 return bSuccess;
393}
394
395BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln)
396{
397 WCHAR wcBom;
398 LPCWSTR pszLF = L"\n";
399 DWORD dwPos, dwNext;
400
401 /* Write the proper byte order marks if not ANSI or UTF-8 without BOM */
402 if (encFile != ENCODING_ANSI && encFile != ENCODING_UTF8)
403 {
404 wcBom = 0xFEFF;
405 if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
406 return FALSE;
407 }
408
409 dwPos = 0;
410
411 /* pszText eoln are always \r\n */
412
413 do
414 {
415 /* Find the next eoln */
416 dwNext = dwPos;
417 while(dwNext < dwTextLen)
418 {
419 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
420 break;
421 dwNext++;
422 }
423
424 if (dwNext != dwTextLen)
425 {
426 switch (iEoln)
427 {
428 case EOLN_LF:
429 /* Write text (without eoln) */
430 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
431 return FALSE;
432 /* Write eoln */
433 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
434 return FALSE;
435 break;
436 case EOLN_CR:
437 /* Write text (including \r as eoln) */
438 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
439 return FALSE;
440 break;
441 case EOLN_CRLF:
442 /* Write text (including \r\n as eoln) */
443 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
444 return FALSE;
445 break;
446 default:
447 return FALSE;
448 }
449 }
450 else
451 {
452 /* Write text (without eoln, since this is the end of the file) */
453 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
454 return FALSE;
455 }
456
457 /* Skip \r\n */
458 dwPos = dwNext + 2;
459 }
460 while (dwPos < dwTextLen);
461
462 return TRUE;
463}
BOOL ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
Definition: text.c:153
static BOOL ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln)
Definition: text.c:83
static VOID ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
Definition: text.c:48
static BOOL IsTextNonZeroASCII(LPCVOID pText, DWORD dwSize)
Definition: text.c:14
static ENCODING AnalyzeEncoding(const BYTE *pBytes, DWORD dwSize)
Definition: text.c:27
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln)
Definition: text.c:395
static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
Definition: text.c:300
#define NULL
Definition: types.h:112
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
BOOL WINAPI IsTextUnicode(IN CONST VOID *lpv, IN INT iSize, IN OUT LPINT lpiResult OPTIONAL)
Definition: unicode.c:27
#define CloseHandle
Definition: compat.h:739
#define GetProcessHeap()
Definition: compat.h:736
#define PAGE_READONLY
Definition: compat.h:138
#define UnmapViewOfFile
Definition: compat.h:746
#define CP_ACP
Definition: compat.h:109
#define INVALID_HANDLE_VALUE
Definition: compat.h:731
#define HeapAlloc
Definition: compat.h:733
#define CreateFileMappingW(a, b, c, d, e, f)
Definition: compat.h:744
#define HeapFree(x, y, z)
Definition: compat.h:735
#define FILE_MAP_READ
Definition: compat.h:776
#define WideCharToMultiByte
Definition: compat.h:111
#define MapViewOfFile
Definition: compat.h:745
#define MultiByteToWideChar
Definition: compat.h:110
DWORD WINAPI GetFileSize(HANDLE hFile, LPDWORD lpFileSizeHigh)
Definition: fileinfo.c:331
BOOL WINAPI WriteFile(IN HANDLE hFile, IN LPCVOID lpBuffer, IN DWORD nNumberOfBytesToWrite OPTIONAL, OUT LPDWORD lpNumberOfBytesWritten, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:24
#define assert(x)
Definition: debug.h:53
static BOOLEAN bSuccess
Definition: drive.cpp:477
unsigned int BOOL
Definition: ntddk_ex.h:94
unsigned long DWORD
Definition: ntddk_ex.h:95
GLuint buffer
Definition: glext.h:5915
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLbitfield flags
Definition: glext.h:7161
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
HLOCAL NTAPI LocalReAlloc(HLOCAL hMem, SIZE_T dwBytes, UINT uFlags)
Definition: heapmem.c:1625
HLOCAL NTAPI LocalAlloc(UINT uFlags, SIZE_T dwBytes)
Definition: heapmem.c:1390
LPVOID NTAPI LocalLock(HLOCAL hMem)
Definition: heapmem.c:1616
BOOL NTAPI LocalUnlock(HLOCAL hMem)
Definition: heapmem.c:1805
HLOCAL NTAPI LocalFree(HLOCAL hMem)
Definition: heapmem.c:1594
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
#define b
Definition: ke_i.h:79
#define pch(ap)
Definition: match.c:418
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
PSDBQUERYRESULT_VISTA PVOID DWORD * dwSize
Definition: env.c:56
ENCODING
Definition: more.c:492
@ ENCODING_UTF16BE
Definition: more.c:495
@ ENCODING_UTF8
Definition: more.c:496
@ ENCODING_UTF16LE
Definition: more.c:494
@ ENCODING_ANSI
Definition: more.c:493
_In_ HANDLE hFile
Definition: mswsock.h:90
unsigned int UINT
Definition: ndis.h:50
#define ENCODING_DEFAULT
Definition: notepad.h:49
@ ENCODING_UTF8BOM
Definition: notepad.h:46
EOLN
Definition: notepad.h:52
@ EOLN_LF
Definition: notepad.h:54
@ EOLN_CR
Definition: notepad.h:55
@ EOLN_CRLF
Definition: notepad.h:53
#define DEFAULT_UNREACHABLE
#define UNICODE_NULL
#define L(x)
Definition: ntvdm.h:50
#define CP_UTF8
Definition: nls.h:20
LPCSTR pText
Definition: txtscale.cpp:79
unsigned char * LPBYTE
Definition: typedefs.h:53
ULONG_PTR SIZE_T
Definition: typedefs.h:80
int32_t INT
Definition: typedefs.h:58
#define LMEM_MOVEABLE
Definition: winbase.h:369
#define CopyMemory
Definition: winbase.h:1710
#define INVALID_FILE_SIZE
Definition: winbase.h:548
CONST void * LPCVOID
Definition: windef.h:191
#define IS_TEXT_UNICODE_REVERSE_MASK
Definition: winnt_old.h:899
#define IS_TEXT_UNICODE_STATISTICS
Definition: winnt_old.h:888
#define IS_TEXT_UNICODE_REVERSE_STATISTICS
Definition: winnt_old.h:889
_In_ int cchText
Definition: winuser.h:4468
const char * LPCSTR
Definition: xmlstorage.h:183
char * LPSTR
Definition: xmlstorage.h:182
__wchar_t WCHAR
Definition: xmlstorage.h:180
WCHAR * LPWSTR
Definition: xmlstorage.h:184
const WCHAR * LPCWSTR
Definition: xmlstorage.h:185
unsigned char BYTE
Definition: xxhash.c:193