ReactOS  0.4.15-dev-5452-g3c95c95
text.c
Go to the documentation of this file.
1 /*
2  * Notepad (text.c)
3  *
4  * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  * Copyright 2002 Andriy Palamarchuk
7  * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "notepad.h"
25 
26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
27 {
28  LPWSTR pszNewText;
29 
30  if (dwAppendLen > 0)
31  {
32  if (*ppszText)
33  {
34  pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
35  }
36  else
37  {
38  pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
39  }
40 
41  if (!pszNewText)
42  return FALSE;
43 
44  memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
45  *ppszText = pszNewText;
46  *pdwTextLen += dwAppendLen;
47  }
48  return TRUE;
49 }
50 
52 {
53  const signed char *pBytes = pText;
54  while (dwSize-- > 0)
55  {
56  if (*pBytes <= 0)
57  return FALSE;
58 
59  ++pBytes;
60  }
61  return TRUE;
62 }
63 
64 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
65 {
67 
68  if (dwSize <= 1)
69  return ENCODING_ANSI;
70 
71  if (IsTextNonZeroASCII(pBytes, dwSize))
72  {
73  return ENCODING_ANSI;
74  }
75 
76  if (IsTextUnicode(pBytes, dwSize, &flags))
77  {
78  return ENCODING_UTF16LE;
79  }
80 
82  {
83  return ENCODING_UTF16BE;
84  }
85 
86  /* is it UTF-8? */
88  {
89  return ENCODING_UTF8;
90  }
91 
92  return ENCODING_ANSI;
93 }
94 
95 BOOL
96 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
97 {
98  DWORD dwSize;
99  LPBYTE pBytes = NULL;
100  LPWSTR pszText;
101  LPWSTR pszAllocText = NULL;
102  DWORD dwPos, i;
103  DWORD dwCharCount;
104  BOOL bSuccess = FALSE;
105  BYTE b = 0;
106  ENCODING encFile = ENCODING_ANSI;
107  int iCodePage = 0;
108  WCHAR szCrlf[2] = {'\r', '\n'};
109  DWORD adwEolnCount[3] = {0, 0, 0};
110 
111  *ppszText = NULL;
112  *pdwTextLen = 0;
113 
115  if (dwSize == INVALID_FILE_SIZE)
116  goto done;
117 
118  pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
119  if (!pBytes)
120  goto done;
121 
122  if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
123  goto done;
124  dwPos = 0;
125 
126  /* Make sure that there is a NUL character at the end, in any encoding */
127  pBytes[dwSize + 0] = '\0';
128  pBytes[dwSize + 1] = '\0';
129 
130  /* Look for Byte Order Marks */
131  if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
132  {
133  encFile = ENCODING_UTF16LE;
134  dwPos += 2;
135  }
136  else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
137  {
138  encFile = ENCODING_UTF16BE;
139  dwPos += 2;
140  }
141  else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
142  {
143  encFile = ENCODING_UTF8BOM;
144  dwPos += 3;
145  }
146  else
147  {
148  encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
149  }
150 
151  switch(encFile)
152  {
153  case ENCODING_UTF16BE:
154  for (i = dwPos; i < dwSize-1; i += 2)
155  {
156  b = pBytes[i+0];
157  pBytes[i+0] = pBytes[i+1];
158  pBytes[i+1] = b;
159  }
160  /* fall through */
161 
162  case ENCODING_UTF16LE:
163  pszText = (LPWSTR) &pBytes[dwPos];
164  dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
165  break;
166 
167  case ENCODING_ANSI:
168  case ENCODING_UTF8:
169  case ENCODING_UTF8BOM:
170  if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
171  iCodePage = CP_UTF8;
172  else
173  iCodePage = CP_ACP;
174 
175  if ((dwSize - dwPos) > 0)
176  {
177  dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
178  if (dwCharCount == 0)
179  goto done;
180  }
181  else
182  {
183  /* special case for files with no characters (other than BOMs) */
184  dwCharCount = 0;
185  }
186 
187  pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
188  if (!pszAllocText)
189  goto done;
190 
191  if ((dwSize - dwPos) > 0)
192  {
193  if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
194  goto done;
195  }
196 
197  pszAllocText[dwCharCount] = '\0';
198  pszText = pszAllocText;
199  break;
201  }
202 
203  dwPos = 0;
204  for (i = 0; i < dwCharCount; i++)
205  {
206  switch(pszText[i])
207  {
208  case '\r':
209  if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
210  {
211  i++;
212  adwEolnCount[EOLN_CRLF]++;
213  break;
214  }
215  /* fall through */
216 
217  case '\n':
218  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
219  return FALSE;
220  if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
221  return FALSE;
222  dwPos = i + 1;
223 
224  if (pszText[i] == '\r')
225  adwEolnCount[EOLN_CR]++;
226  else
227  adwEolnCount[EOLN_LF]++;
228  break;
229 
230  case '\0':
231  pszText[i] = ' ';
232  break;
233  }
234  }
235 
236  if (!*ppszText && (pszText == pszAllocText))
237  {
238  /* special case; don't need to reallocate */
239  *ppszText = pszAllocText;
240  *pdwTextLen = dwCharCount;
241  pszAllocText = NULL;
242  }
243  else
244  {
245  /* append last remaining text */
246  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
247  return FALSE;
248  }
249 
250  /* chose which eoln to use */
251  *piEoln = EOLN_CRLF;
252  if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
253  *piEoln = EOLN_LF;
254  if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
255  *piEoln = EOLN_CR;
256  *pencFile = encFile;
257 
258  bSuccess = TRUE;
259 
260 done:
261  if (pBytes)
262  HeapFree(GetProcessHeap(), 0, pBytes);
263  if (pszAllocText)
264  HeapFree(GetProcessHeap(), 0, pszAllocText);
265 
266  if (!bSuccess && *ppszText)
267  {
268  HeapFree(GetProcessHeap(), 0, *ppszText);
269  *ppszText = NULL;
270  *pdwTextLen = 0;
271  }
272  return bSuccess;
273 }
274 
275 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
276 {
277  LPBYTE pBytes = NULL;
278  LPBYTE pAllocBuffer = NULL;
279  DWORD dwPos = 0;
280  DWORD dwByteCount;
281  BYTE buffer[1024];
282  UINT iCodePage = 0;
283  DWORD dwDummy, i;
284  BOOL bSuccess = FALSE;
285  int iBufferSize, iRequiredBytes;
286  BYTE b;
287 
288  while(dwPos < dwTextLen)
289  {
290  switch(encFile)
291  {
292  case ENCODING_UTF16LE:
293  pBytes = (LPBYTE) &pszText[dwPos];
294  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
295  dwPos = dwTextLen;
296  break;
297 
298  case ENCODING_UTF16BE:
299  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
300  if (dwByteCount > sizeof(buffer))
301  dwByteCount = sizeof(buffer);
302 
303  memcpy(buffer, &pszText[dwPos], dwByteCount);
304  for (i = 0; i < dwByteCount; i += 2)
305  {
306  b = buffer[i+0];
307  buffer[i+0] = buffer[i+1];
308  buffer[i+1] = b;
309  }
310  pBytes = (LPBYTE) &buffer[dwPos];
311  dwPos += dwByteCount / sizeof(WCHAR);
312  break;
313 
314  case ENCODING_ANSI:
315  case ENCODING_UTF8:
316  case ENCODING_UTF8BOM:
317  if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
318  iCodePage = CP_UTF8;
319  else
320  iCodePage = CP_ACP;
321 
322  iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
323  if (iRequiredBytes <= 0)
324  {
325  goto done;
326  }
327  else if (iRequiredBytes < sizeof(buffer))
328  {
329  pBytes = buffer;
330  iBufferSize = sizeof(buffer);
331  }
332  else
333  {
334  pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
335  if (!pAllocBuffer)
336  return FALSE;
337  pBytes = pAllocBuffer;
338  iBufferSize = iRequiredBytes;
339  }
340 
341  dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
342  if (!dwByteCount)
343  goto done;
344 
345  dwPos = dwTextLen;
346  break;
347 
348  default:
349  goto done;
350  }
351 
352  if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
353  goto done;
354 
355  /* free the buffer, if we have allocated one */
356  if (pAllocBuffer)
357  {
358  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
359  pAllocBuffer = NULL;
360  }
361  }
362  bSuccess = TRUE;
363 
364 done:
365  if (pAllocBuffer)
366  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
367  return bSuccess;
368 }
369 
370 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
371 {
372  WCHAR wcBom;
373  LPCWSTR pszLF = L"\n";
374  DWORD dwPos, dwNext;
375 
376  /* Write the proper byte order marks if not ANSI or UTF-8 without BOM */
377  if (encFile != ENCODING_ANSI && encFile != ENCODING_UTF8)
378  {
379  wcBom = 0xFEFF;
380  if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
381  return FALSE;
382  }
383 
384  dwPos = 0;
385 
386  /* pszText eoln are always \r\n */
387 
388  do
389  {
390  /* Find the next eoln */
391  dwNext = dwPos;
392  while(dwNext < dwTextLen)
393  {
394  if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
395  break;
396  dwNext++;
397  }
398 
399  if (dwNext != dwTextLen)
400  {
401  switch (iEoln)
402  {
403  case EOLN_LF:
404  /* Write text (without eoln) */
405  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
406  return FALSE;
407  /* Write eoln */
408  if (!WriteEncodedText(hFile, pszLF, 1, encFile))
409  return FALSE;
410  break;
411  case EOLN_CR:
412  /* Write text (including \r as eoln) */
413  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
414  return FALSE;
415  break;
416  case EOLN_CRLF:
417  /* Write text (including \r\n as eoln) */
418  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
419  return FALSE;
420  break;
421  default:
422  return FALSE;
423  }
424  }
425  else
426  {
427  /* Write text (without eoln, since this is the end of the file) */
428  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
429  return FALSE;
430  }
431 
432  /* Skip \r\n */
433  dwPos = dwNext + 2;
434  }
435  while (dwPos < dwTextLen);
436 
437  return TRUE;
438 }
#define DEFAULT_UNREACHABLE
BOOL WINAPI WriteFile(IN HANDLE hFile, IN LPCVOID lpBuffer, IN DWORD nNumberOfBytesToWrite OPTIONAL, OUT LPDWORD lpNumberOfBytesWritten, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:24
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
LPCSTR pText
Definition: txtscale.cpp:79
#define WideCharToMultiByte
Definition: compat.h:111
const WCHAR * LPCWSTR
Definition: xmlstorage.h:185
#define EOLN_CR
Definition: main.h:52
#define TRUE
Definition: types.h:120
#define CP_ACP
Definition: compat.h:109
GLuint buffer
Definition: glext.h:5915
static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
Definition: text.c:26
#define INVALID_FILE_SIZE
Definition: winbase.h:548
char * LPSTR
Definition: xmlstorage.h:182
int32_t INT
Definition: typedefs.h:58
static BOOLEAN bSuccess
Definition: drive.cpp:433
static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
Definition: text.c:275
#define L(x)
Definition: ntvdm.h:50
unsigned char * LPBYTE
Definition: typedefs.h:53
#define CP_UTF8
Definition: nls.h:20
#define FALSE
Definition: types.h:117
unsigned int BOOL
Definition: ntddk_ex.h:94
#define IS_TEXT_UNICODE_STATISTICS
Definition: winnt_old.h:922
const char * LPCSTR
Definition: xmlstorage.h:183
#define b
Definition: ke_i.h:79
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
BOOL WINAPI IsTextUnicode(IN CONST VOID *lpv, IN INT iSize, IN OUT LPINT lpiResult OPTIONAL)
Definition: unicode.c:27
#define GetProcessHeap()
Definition: compat.h:736
PVOID WINAPI HeapAlloc(HANDLE, DWORD, SIZE_T)
__wchar_t WCHAR
Definition: xmlstorage.h:180
unsigned long DWORD
Definition: ntddk_ex.h:95
#define EOLN_CRLF
Definition: main.h:50
DWORD WINAPI GetFileSize(HANDLE hFile, LPDWORD lpFileSizeHigh)
Definition: fileinfo.c:331
GLbitfield flags
Definition: glext.h:7161
#define EOLN_LF
Definition: main.h:51
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
_In_ HANDLE hFile
Definition: mswsock.h:90
unsigned char BYTE
Definition: xxhash.c:193
#define IS_TEXT_UNICODE_ILLEGAL_CHARS
Definition: winnt_old.h:928
BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
Definition: text.c:96
ENCODING
Definition: more.c:491
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define ARRAY_SIZE(a)
Definition: main.h:24
#define HeapReAlloc
Definition: compat.h:734
#define ReadFile(a, b, c, d, e)
Definition: compat.h:742
unsigned int UINT
Definition: ndis.h:50
#define NULL
Definition: types.h:112
#define MultiByteToWideChar
Definition: compat.h:110
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
Definition: text.c:64
#define IS_TEXT_UNICODE_REVERSE_MASK
Definition: winnt_old.h:933
BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
Definition: text.c:51
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
Definition: text.c:370
WCHAR * LPWSTR
Definition: xmlstorage.h:184
#define HeapFree(x, y, z)
Definition: compat.h:735
PSDBQUERYRESULT_VISTA PVOID DWORD * dwSize
Definition: env.c:56