ReactOS  0.4.14-dev-552-g2fad488
text.c
Go to the documentation of this file.
1 /*
2  * Notepad (text.c)
3  *
4  * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  * Copyright 2002 Andriy Palamarchuk
7  * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "notepad.h"
25 
26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
27 {
28  LPWSTR pszNewText;
29 
30  if (dwAppendLen > 0)
31  {
32  if (*ppszText)
33  {
34  pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
35  }
36  else
37  {
38  pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
39  }
40 
41  if (!pszNewText)
42  return FALSE;
43 
44  memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
45  *ppszText = pszNewText;
46  *pdwTextLen += dwAppendLen;
47  }
48  return TRUE;
49 }
50 
52 {
53  const signed char *pBytes = pText;
54  while (dwSize-- > 0)
55  {
56  if (*pBytes <= 0)
57  return FALSE;
58 
59  ++pBytes;
60  }
61  return TRUE;
62 }
63 
64 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
65 {
67 
68  if (dwSize <= 1)
69  return ENCODING_ANSI;
70 
71  if (IsTextNonZeroASCII(pBytes, dwSize))
72  {
73  return ENCODING_ANSI;
74  }
75 
76  if (IsTextUnicode(pBytes, dwSize, &flags))
77  {
78  return ENCODING_UTF16LE;
79  }
80 
82  {
83  return ENCODING_UTF16BE;
84  }
85 
86  /* is it UTF-8? */
88  {
89  return ENCODING_UTF8;
90  }
91 
92  return ENCODING_ANSI;
93 }
94 
95 BOOL
96 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
97 {
98  DWORD dwSize;
99  LPBYTE pBytes = NULL;
100  LPWSTR pszText;
101  LPWSTR pszAllocText = NULL;
102  DWORD dwPos, i;
103  DWORD dwCharCount;
104  BOOL bSuccess = FALSE;
105  BYTE b = 0;
106  ENCODING encFile = ENCODING_ANSI;
107  int iCodePage = 0;
108  WCHAR szCrlf[2] = {'\r', '\n'};
109  DWORD adwEolnCount[3] = {0, 0, 0};
110 
111  *ppszText = NULL;
112  *pdwTextLen = 0;
113 
115  if (dwSize == INVALID_FILE_SIZE)
116  goto done;
117 
118  pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
119  if (!pBytes)
120  goto done;
121 
122  if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
123  goto done;
124  dwPos = 0;
125 
126  /* Make sure that there is a NUL character at the end, in any encoding */
127  pBytes[dwSize + 0] = '\0';
128  pBytes[dwSize + 1] = '\0';
129 
130  /* Look for Byte Order Marks */
131  if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
132  {
133  encFile = ENCODING_UTF16LE;
134  dwPos += 2;
135  }
136  else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
137  {
138  encFile = ENCODING_UTF16BE;
139  dwPos += 2;
140  }
141  else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
142  {
143  encFile = ENCODING_UTF8;
144  dwPos += 3;
145  }
146  else
147  {
148  encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
149  }
150 
151  switch(encFile)
152  {
153  case ENCODING_UTF16BE:
154  for (i = dwPos; i < dwSize-1; i += 2)
155  {
156  b = pBytes[i+0];
157  pBytes[i+0] = pBytes[i+1];
158  pBytes[i+1] = b;
159  }
160  /* fall through */
161 
162  case ENCODING_UTF16LE:
163  pszText = (LPWSTR) &pBytes[dwPos];
164  dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
165  break;
166 
167  case ENCODING_ANSI:
168  case ENCODING_UTF8:
169  if (encFile == ENCODING_ANSI)
170  iCodePage = CP_ACP;
171  else if (encFile == ENCODING_UTF8)
172  iCodePage = CP_UTF8;
173 
174  if ((dwSize - dwPos) > 0)
175  {
176  dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
177  if (dwCharCount == 0)
178  goto done;
179  }
180  else
181  {
182  /* special case for files with no characters (other than BOMs) */
183  dwCharCount = 0;
184  }
185 
186  pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
187  if (!pszAllocText)
188  goto done;
189 
190  if ((dwSize - dwPos) > 0)
191  {
192  if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
193  goto done;
194  }
195 
196  pszAllocText[dwCharCount] = '\0';
197  pszText = pszAllocText;
198  break;
200  }
201 
202  dwPos = 0;
203  for (i = 0; i < dwCharCount; i++)
204  {
205  switch(pszText[i])
206  {
207  case '\r':
208  if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
209  {
210  i++;
211  adwEolnCount[EOLN_CRLF]++;
212  break;
213  }
214  /* fall through */
215 
216  case '\n':
217  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
218  return FALSE;
219  if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
220  return FALSE;
221  dwPos = i + 1;
222 
223  if (pszText[i] == '\r')
224  adwEolnCount[EOLN_CR]++;
225  else
226  adwEolnCount[EOLN_LF]++;
227  break;
228 
229  case '\0':
230  pszText[i] = ' ';
231  break;
232  }
233  }
234 
235  if (!*ppszText && (pszText == pszAllocText))
236  {
237  /* special case; don't need to reallocate */
238  *ppszText = pszAllocText;
239  *pdwTextLen = dwCharCount;
240  pszAllocText = NULL;
241  }
242  else
243  {
244  /* append last remaining text */
245  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
246  return FALSE;
247  }
248 
249  /* chose which eoln to use */
250  *piEoln = EOLN_CRLF;
251  if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
252  *piEoln = EOLN_LF;
253  if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
254  *piEoln = EOLN_CR;
255  *pencFile = encFile;
256 
257  bSuccess = TRUE;
258 
259 done:
260  if (pBytes)
261  HeapFree(GetProcessHeap(), 0, pBytes);
262  if (pszAllocText)
263  HeapFree(GetProcessHeap(), 0, pszAllocText);
264 
265  if (!bSuccess && *ppszText)
266  {
267  HeapFree(GetProcessHeap(), 0, *ppszText);
268  *ppszText = NULL;
269  *pdwTextLen = 0;
270  }
271  return bSuccess;
272 }
273 
274 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
275 {
276  LPBYTE pBytes = NULL;
277  LPBYTE pAllocBuffer = NULL;
278  DWORD dwPos = 0;
279  DWORD dwByteCount;
280  BYTE buffer[1024];
281  UINT iCodePage = 0;
282  DWORD dwDummy, i;
283  BOOL bSuccess = FALSE;
284  int iBufferSize, iRequiredBytes;
285  BYTE b;
286 
287  while(dwPos < dwTextLen)
288  {
289  switch(encFile)
290  {
291  case ENCODING_UTF16LE:
292  pBytes = (LPBYTE) &pszText[dwPos];
293  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
294  dwPos = dwTextLen;
295  break;
296 
297  case ENCODING_UTF16BE:
298  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
299  if (dwByteCount > sizeof(buffer))
300  dwByteCount = sizeof(buffer);
301 
302  memcpy(buffer, &pszText[dwPos], dwByteCount);
303  for (i = 0; i < dwByteCount; i += 2)
304  {
305  b = buffer[i+0];
306  buffer[i+0] = buffer[i+1];
307  buffer[i+1] = b;
308  }
309  pBytes = (LPBYTE) &buffer[dwPos];
310  dwPos += dwByteCount / sizeof(WCHAR);
311  break;
312 
313  case ENCODING_ANSI:
314  case ENCODING_UTF8:
315  if (encFile == ENCODING_ANSI)
316  iCodePage = CP_ACP;
317  else if (encFile == ENCODING_UTF8)
318  iCodePage = CP_UTF8;
319 
320  iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
321  if (iRequiredBytes <= 0)
322  {
323  goto done;
324  }
325  else if (iRequiredBytes < sizeof(buffer))
326  {
327  pBytes = buffer;
328  iBufferSize = sizeof(buffer);
329  }
330  else
331  {
332  pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
333  if (!pAllocBuffer)
334  return FALSE;
335  pBytes = pAllocBuffer;
336  iBufferSize = iRequiredBytes;
337  }
338 
339  dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
340  if (!dwByteCount)
341  goto done;
342 
343  dwPos = dwTextLen;
344  break;
345 
346  default:
347  goto done;
348  }
349 
350  if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
351  goto done;
352 
353  /* free the buffer, if we have allocated one */
354  if (pAllocBuffer)
355  {
356  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
357  pAllocBuffer = NULL;
358  }
359  }
360  bSuccess = TRUE;
361 
362 done:
363  if (pAllocBuffer)
364  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
365  return bSuccess;
366 }
367 
368 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
369 {
370  WCHAR wcBom;
371  LPCWSTR pszLF = L"\n";
372  DWORD dwPos, dwNext;
373 
374  /* Write the proper byte order marks if not ANSI */
375  if (encFile != ENCODING_ANSI)
376  {
377  wcBom = 0xFEFF;
378  if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
379  return FALSE;
380  }
381 
382  dwPos = 0;
383 
384  /* pszText eoln are always \r\n */
385 
386  do
387  {
388  /* Find the next eoln */
389  dwNext = dwPos;
390  while(dwNext < dwTextLen)
391  {
392  if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
393  break;
394  dwNext++;
395  }
396 
397  if (dwNext != dwTextLen)
398  {
399  switch (iEoln)
400  {
401  case EOLN_LF:
402  /* Write text (without eoln) */
403  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
404  return FALSE;
405  /* Write eoln */
406  if (!WriteEncodedText(hFile, pszLF, 1, encFile))
407  return FALSE;
408  break;
409  case EOLN_CR:
410  /* Write text (including \r as eoln) */
411  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
412  return FALSE;
413  break;
414  case EOLN_CRLF:
415  /* Write text (including \r\n as eoln) */
416  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
417  return FALSE;
418  break;
419  default:
420  return FALSE;
421  }
422  }
423  else
424  {
425  /* Write text (without eoln, since this is the end of the file) */
426  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
427  return FALSE;
428  }
429 
430  /* Skip \r\n */
431  dwPos = dwNext + 2;
432  }
433  while (dwPos < dwTextLen);
434 
435  return TRUE;
436 }
#define DEFAULT_UNREACHABLE
BOOL WINAPI WriteFile(IN HANDLE hFile, IN LPCVOID lpBuffer, IN DWORD nNumberOfBytesToWrite OPTIONAL, OUT LPDWORD lpNumberOfBytesWritten, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:24
#define TRUE
Definition: types.h:120
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
LPCSTR pText
Definition: txtscale.cpp:79
#define WideCharToMultiByte
Definition: compat.h:101
const WCHAR * LPCWSTR
Definition: xmlstorage.h:185
#define EOLN_CR
Definition: main.h:51
#define CP_ACP
Definition: compat.h:99
GLuint buffer
Definition: glext.h:5915
static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
Definition: text.c:26
#define INVALID_FILE_SIZE
Definition: winbase.h:529
char * LPSTR
Definition: xmlstorage.h:182
int32_t INT
Definition: typedefs.h:56
static BOOLEAN bSuccess
Definition: drive.cpp:419
static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
Definition: text.c:274
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
unsigned char * LPBYTE
Definition: typedefs.h:52
#define CP_UTF8
Definition: nls.h:20
unsigned int BOOL
Definition: ntddk_ex.h:94
#define IS_TEXT_UNICODE_STATISTICS
Definition: winnt_old.h:921
smooth NULL
Definition: ftsmooth.c:416
const char * LPCSTR
Definition: xmlstorage.h:183
#define b
Definition: ke_i.h:79
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
BOOL WINAPI IsTextUnicode(IN CONST VOID *lpv, IN INT iSize, IN OUT LPINT lpiResult OPTIONAL)
Definition: unicode.c:27
#define GetProcessHeap()
Definition: compat.h:403
PVOID WINAPI HeapAlloc(HANDLE, DWORD, SIZE_T)
__wchar_t WCHAR
Definition: xmlstorage.h:180
unsigned long DWORD
Definition: ntddk_ex.h:95
#define EOLN_CRLF
Definition: main.h:49
DWORD WINAPI GetFileSize(HANDLE hFile, LPDWORD lpFileSizeHigh)
Definition: fileinfo.c:481
GLbitfield flags
Definition: glext.h:7161
#define EOLN_LF
Definition: main.h:50
static const WCHAR L[]
Definition: oid.c:1250
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
unsigned char BYTE
Definition: mem.h:68
_In_ HANDLE hFile
Definition: mswsock.h:90
#define IS_TEXT_UNICODE_ILLEGAL_CHARS
Definition: winnt_old.h:927
BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
Definition: text.c:96
ENCODING
Definition: more.c:155
#define ARRAY_SIZE(a)
Definition: main.h:24
#define HeapReAlloc
Definition: compat.h:401
unsigned int UINT
Definition: ndis.h:50
#define MultiByteToWideChar
Definition: compat.h:100
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
Definition: text.c:64
#define IS_TEXT_UNICODE_REVERSE_MASK
Definition: winnt_old.h:932
BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
Definition: text.c:51
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
Definition: text.c:368
WCHAR * LPWSTR
Definition: xmlstorage.h:184
BOOL WINAPI ReadFile(IN HANDLE hFile, IN LPVOID lpBuffer, IN DWORD nNumberOfBytesToRead, OUT LPDWORD lpNumberOfBytesRead OPTIONAL, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:123
#define HeapFree(x, y, z)
Definition: compat.h:402
PSDBQUERYRESULT_VISTA PVOID DWORD * dwSize
Definition: env.c:54