ReactOS  0.4.14-dev-52-g6116262
text.c
Go to the documentation of this file.
1 /*
2  * Notepad (text.c)
3  *
4  * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5  * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6  * Copyright 2002 Andriy Palamarchuk
7  * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "notepad.h"
25 
26 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
27 {
28  LPWSTR pszNewText;
29 
30  if (dwAppendLen > 0)
31  {
32  if (*ppszText)
33  {
34  pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
35  }
36  else
37  {
38  pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
39  }
40 
41  if (!pszNewText)
42  return FALSE;
43 
44  memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
45  *ppszText = pszNewText;
46  *pdwTextLen += dwAppendLen;
47  }
48  return TRUE;
49 }
50 
51 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
52 {
54 
55  if (dwSize <= 1)
56  return ENCODING_ANSI;
57 
58  if (IsTextUnicode(pBytes, dwSize, &flags))
59  {
60  return ENCODING_UTF16LE;
61  }
62 
64  {
65  return ENCODING_UTF16BE;
66  }
67 
68  /* is it UTF-8? */
70  {
71  return ENCODING_UTF8;
72  }
73 
74  return ENCODING_ANSI;
75 }
76 
77 BOOL
78 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
79 {
80  DWORD dwSize;
81  LPBYTE pBytes = NULL;
82  LPWSTR pszText;
83  LPWSTR pszAllocText = NULL;
84  DWORD dwPos, i;
85  DWORD dwCharCount;
87  BYTE b = 0;
88  ENCODING encFile = ENCODING_ANSI;
89  int iCodePage = 0;
90  WCHAR szCrlf[2] = {'\r', '\n'};
91  DWORD adwEolnCount[3] = {0, 0, 0};
92 
93  *ppszText = NULL;
94  *pdwTextLen = 0;
95 
98  goto done;
99 
100  pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
101  if (!pBytes)
102  goto done;
103 
104  if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
105  goto done;
106  dwPos = 0;
107 
108  /* Make sure that there is a NUL character at the end, in any encoding */
109  pBytes[dwSize + 0] = '\0';
110  pBytes[dwSize + 1] = '\0';
111 
112  /* Look for Byte Order Marks */
113  if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
114  {
115  encFile = ENCODING_UTF16LE;
116  dwPos += 2;
117  }
118  else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
119  {
120  encFile = ENCODING_UTF16BE;
121  dwPos += 2;
122  }
123  else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
124  {
125  encFile = ENCODING_UTF8;
126  dwPos += 3;
127  }
128  else
129  {
130  encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
131  }
132 
133  switch(encFile)
134  {
135  case ENCODING_UTF16BE:
136  for (i = dwPos; i < dwSize-1; i += 2)
137  {
138  b = pBytes[i+0];
139  pBytes[i+0] = pBytes[i+1];
140  pBytes[i+1] = b;
141  }
142  /* fall through */
143 
144  case ENCODING_UTF16LE:
145  pszText = (LPWSTR) &pBytes[dwPos];
146  dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
147  break;
148 
149  case ENCODING_ANSI:
150  case ENCODING_UTF8:
151  if (encFile == ENCODING_ANSI)
152  iCodePage = CP_ACP;
153  else if (encFile == ENCODING_UTF8)
154  iCodePage = CP_UTF8;
155 
156  if ((dwSize - dwPos) > 0)
157  {
158  dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
159  if (dwCharCount == 0)
160  goto done;
161  }
162  else
163  {
164  /* special case for files with no characters (other than BOMs) */
165  dwCharCount = 0;
166  }
167 
168  pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
169  if (!pszAllocText)
170  goto done;
171 
172  if ((dwSize - dwPos) > 0)
173  {
174  if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
175  goto done;
176  }
177 
178  pszAllocText[dwCharCount] = '\0';
179  pszText = pszAllocText;
180  break;
182  }
183 
184  dwPos = 0;
185  for (i = 0; i < dwCharCount; i++)
186  {
187  switch(pszText[i])
188  {
189  case '\r':
190  if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
191  {
192  i++;
193  adwEolnCount[EOLN_CRLF]++;
194  break;
195  }
196  /* fall through */
197 
198  case '\n':
199  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
200  return FALSE;
201  if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
202  return FALSE;
203  dwPos = i + 1;
204 
205  if (pszText[i] == '\r')
206  adwEolnCount[EOLN_CR]++;
207  else
208  adwEolnCount[EOLN_LF]++;
209  break;
210 
211  case '\0':
212  pszText[i] = ' ';
213  break;
214  }
215  }
216 
217  if (!*ppszText && (pszText == pszAllocText))
218  {
219  /* special case; don't need to reallocate */
220  *ppszText = pszAllocText;
221  *pdwTextLen = dwCharCount;
222  pszAllocText = NULL;
223  }
224  else
225  {
226  /* append last remaining text */
227  if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
228  return FALSE;
229  }
230 
231  /* chose which eoln to use */
232  *piEoln = EOLN_CRLF;
233  if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
234  *piEoln = EOLN_LF;
235  if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
236  *piEoln = EOLN_CR;
237  *pencFile = encFile;
238 
239  bSuccess = TRUE;
240 
241 done:
242  if (pBytes)
243  HeapFree(GetProcessHeap(), 0, pBytes);
244  if (pszAllocText)
245  HeapFree(GetProcessHeap(), 0, pszAllocText);
246 
247  if (!bSuccess && *ppszText)
248  {
249  HeapFree(GetProcessHeap(), 0, *ppszText);
250  *ppszText = NULL;
251  *pdwTextLen = 0;
252  }
253  return bSuccess;
254 }
255 
256 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
257 {
258  LPBYTE pBytes = NULL;
259  LPBYTE pAllocBuffer = NULL;
260  DWORD dwPos = 0;
261  DWORD dwByteCount;
262  BYTE buffer[1024];
263  UINT iCodePage = 0;
264  DWORD dwDummy, i;
265  BOOL bSuccess = FALSE;
266  int iBufferSize, iRequiredBytes;
267  BYTE b;
268 
269  while(dwPos < dwTextLen)
270  {
271  switch(encFile)
272  {
273  case ENCODING_UTF16LE:
274  pBytes = (LPBYTE) &pszText[dwPos];
275  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
276  dwPos = dwTextLen;
277  break;
278 
279  case ENCODING_UTF16BE:
280  dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
281  if (dwByteCount > sizeof(buffer))
282  dwByteCount = sizeof(buffer);
283 
284  memcpy(buffer, &pszText[dwPos], dwByteCount);
285  for (i = 0; i < dwByteCount; i += 2)
286  {
287  b = buffer[i+0];
288  buffer[i+0] = buffer[i+1];
289  buffer[i+1] = b;
290  }
291  pBytes = (LPBYTE) &buffer[dwPos];
292  dwPos += dwByteCount / sizeof(WCHAR);
293  break;
294 
295  case ENCODING_ANSI:
296  case ENCODING_UTF8:
297  if (encFile == ENCODING_ANSI)
298  iCodePage = CP_ACP;
299  else if (encFile == ENCODING_UTF8)
300  iCodePage = CP_UTF8;
301 
302  iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
303  if (iRequiredBytes <= 0)
304  {
305  goto done;
306  }
307  else if (iRequiredBytes < sizeof(buffer))
308  {
309  pBytes = buffer;
310  iBufferSize = sizeof(buffer);
311  }
312  else
313  {
314  pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
315  if (!pAllocBuffer)
316  return FALSE;
317  pBytes = pAllocBuffer;
318  iBufferSize = iRequiredBytes;
319  }
320 
321  dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
322  if (!dwByteCount)
323  goto done;
324 
325  dwPos = dwTextLen;
326  break;
327 
328  default:
329  goto done;
330  }
331 
332  if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
333  goto done;
334 
335  /* free the buffer, if we have allocated one */
336  if (pAllocBuffer)
337  {
338  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
339  pAllocBuffer = NULL;
340  }
341  }
342  bSuccess = TRUE;
343 
344 done:
345  if (pAllocBuffer)
346  HeapFree(GetProcessHeap(), 0, pAllocBuffer);
347  return bSuccess;
348 }
349 
350 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
351 {
352  WCHAR wcBom;
353  LPCWSTR pszLF = L"\n";
354  DWORD dwPos, dwNext;
355 
356  /* Write the proper byte order marks if not ANSI */
357  if (encFile != ENCODING_ANSI)
358  {
359  wcBom = 0xFEFF;
360  if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
361  return FALSE;
362  }
363 
364  dwPos = 0;
365 
366  /* pszText eoln are always \r\n */
367 
368  do
369  {
370  /* Find the next eoln */
371  dwNext = dwPos;
372  while(dwNext < dwTextLen)
373  {
374  if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
375  break;
376  dwNext++;
377  }
378 
379  if (dwNext != dwTextLen)
380  {
381  switch (iEoln)
382  {
383  case EOLN_LF:
384  /* Write text (without eoln) */
385  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
386  return FALSE;
387  /* Write eoln */
388  if (!WriteEncodedText(hFile, pszLF, 1, encFile))
389  return FALSE;
390  break;
391  case EOLN_CR:
392  /* Write text (including \r as eoln) */
393  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
394  return FALSE;
395  break;
396  case EOLN_CRLF:
397  /* Write text (including \r\n as eoln) */
398  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
399  return FALSE;
400  break;
401  default:
402  return FALSE;
403  }
404  }
405  else
406  {
407  /* Write text (without eoln, since this is the end of the file) */
408  if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
409  return FALSE;
410  }
411 
412  /* Skip \r\n */
413  dwPos = dwNext + 2;
414  }
415  while (dwPos < dwTextLen);
416 
417  return TRUE;
418 }
#define DEFAULT_UNREACHABLE
BOOL WINAPI WriteFile(IN HANDLE hFile, IN LPCVOID lpBuffer, IN DWORD nNumberOfBytesToWrite OPTIONAL, OUT LPDWORD lpNumberOfBytesWritten, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:24
#define TRUE
Definition: types.h:120
#define MB_ERR_INVALID_CHARS
Definition: unicode.h:41
#define WideCharToMultiByte
Definition: compat.h:101
const WCHAR * LPCWSTR
Definition: xmlstorage.h:185
#define EOLN_CR
Definition: main.h:51
#define CP_ACP
Definition: compat.h:99
GLuint buffer
Definition: glext.h:5915
static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
Definition: text.c:26
#define INVALID_FILE_SIZE
Definition: winbase.h:529
char * LPSTR
Definition: xmlstorage.h:182
int32_t INT
Definition: typedefs.h:56
static BOOLEAN bSuccess
Definition: drive.cpp:417
static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
Definition: text.c:256
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
unsigned char * LPBYTE
Definition: typedefs.h:52
#define CP_UTF8
Definition: nls.h:20
unsigned int BOOL
Definition: ntddk_ex.h:94
#define IS_TEXT_UNICODE_STATISTICS
Definition: winnt_old.h:921
smooth NULL
Definition: ftsmooth.c:416
const char * LPCSTR
Definition: xmlstorage.h:183
#define b
Definition: ke_i.h:79
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
BOOL WINAPI IsTextUnicode(IN CONST VOID *lpv, IN INT iSize, IN OUT LPINT lpiResult OPTIONAL)
Definition: unicode.c:27
#define GetProcessHeap()
Definition: compat.h:395
PVOID WINAPI HeapAlloc(HANDLE, DWORD, SIZE_T)
__wchar_t WCHAR
Definition: xmlstorage.h:180
unsigned long DWORD
Definition: ntddk_ex.h:95
#define EOLN_CRLF
Definition: main.h:49
DWORD WINAPI GetFileSize(HANDLE hFile, LPDWORD lpFileSizeHigh)
Definition: fileinfo.c:481
GLbitfield flags
Definition: glext.h:7161
#define EOLN_LF
Definition: main.h:50
static const WCHAR L[]
Definition: oid.c:1250
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
unsigned char BYTE
Definition: mem.h:68
_In_ HANDLE hFile
Definition: mswsock.h:90
#define IS_TEXT_UNICODE_ILLEGAL_CHARS
Definition: winnt_old.h:927
BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
Definition: text.c:78
ENCODING
Definition: more.c:155
#define ARRAY_SIZE(a)
Definition: main.h:24
#define HeapReAlloc
Definition: compat.h:393
unsigned int UINT
Definition: ndis.h:50
#define MultiByteToWideChar
Definition: compat.h:100
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
Definition: text.c:51
#define IS_TEXT_UNICODE_REVERSE_MASK
Definition: winnt_old.h:932
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
Definition: text.c:350
WCHAR * LPWSTR
Definition: xmlstorage.h:184
BOOL WINAPI ReadFile(IN HANDLE hFile, IN LPVOID lpBuffer, IN DWORD nNumberOfBytesToRead, OUT LPDWORD lpNumberOfBytesRead OPTIONAL, IN LPOVERLAPPED lpOverlapped OPTIONAL)
Definition: rw.c:123
#define HeapFree(x, y, z)
Definition: compat.h:394
PSDBQUERYRESULT_VISTA PVOID DWORD * dwSize
Definition: env.c:54