ReactOS 0.4.16-dev-297-gc569aee
encoding.c File Reference
#include "libxml.h"
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <stdlib.h>
#include <libxml/encoding.h>
#include <libxml/xmlmemory.h>
#include <libxml/globals.h>
#include <libxml/xmlerror.h>
#include "buf.h"
#include "enc.h"
Include dependency graph for encoding.c:

Go to the source code of this file.

Classes

struct  _xmlCharEncodingAlias
 

Macros

#define IN_LIBXML
 

Typedefs

typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias
 
typedef xmlCharEncodingAliasxmlCharEncodingAliasPtr
 

Functions

static void xmlEncodingErrMemory (const char *extra)
 
static void LIBXML_ATTR_FORMAT (2, 0)
 
static int asciiToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
int isolat1ToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
static int UTF8ToUTF8 (unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
 
static int UTF16LEToUTF8 (unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
 
static int UTF16BEToUTF8 (unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
 
xmlCharEncoding xmlDetectCharEncoding (const unsigned char *in, int len)
 
void xmlCleanupEncodingAliases (void)
 
const charxmlGetEncodingAlias (const char *alias)
 
. Existing alias

will be overwritten.

Returns 0 in case of success, -1 in case of error

int xmlAddEncodingAlias (const char *name, const char *alias)
 
int xmlDelEncodingAlias (const char *alias)
 
: the encoding name, in UTF-8 format (ASCII actually)

xmlNewCharEncodingHandler:

@input: the xmlCharEncodingInputFunc to read that encoding @output: the xmlCharEncodingOutputFunc to write that encoding

Create and registers an xmlCharEncodingHandler.

Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).

xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
 
void xmlInitCharEncodingHandlers (void)
 
void xmlCleanupCharEncodingHandlers (void)
 
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler)
 
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc)
 
: a string describing the char encoding.

xmlFindCharEncodingHandler:

Search in the registered set the handler able to read/write that encoding.

Returns the handler or NULL if not found

xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char *name)
 
static int xmlEncInputChunk (xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush)
 
static int xmlEncOutputChunk (xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
int xmlCharEncFirstLineInt (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in, int len)
 
int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
int xmlCharEncFirstLineInput (xmlParserInputBufferPtr input, int len)
 
int xmlCharEncInput (xmlParserInputBufferPtr input, int flush)
 
int xmlCharEncInFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler)
 
long xmlByteConsumed (xmlParserCtxtPtr ctxt)
 

Variables

static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL
 
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL
 
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL
 
static int xmlCharEncodingAliasesNb = 0
 
static int xmlCharEncodingAliasesMax = 0
 
static int xmlLittleEndian = 1
 

: the encoding name as parsed, in UTF-8 format (ASCII actually)

xmlParseCharEncoding:

Compare the string to the encoding schemes already known. Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.

Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE if not recognized.

#define MAX_ENCODING_HANDLERS   50
 
static xmlCharEncodingHandlerPtrhandlers = NULL
 
static int nbCharEncodingHandler = 0
 
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL
 
xmlCharEncoding xmlParseCharEncoding (const char *name)
 
const charxmlGetCharEncodingName (xmlCharEncoding enc)
 

Macro Definition Documentation

◆ IN_LIBXML

#define IN_LIBXML

Definition at line 23 of file encoding.c.

◆ MAX_ENCODING_HANDLERS

#define MAX_ENCODING_HANDLERS   50

Definition at line 1316 of file encoding.c.

Typedef Documentation

◆ xmlCharEncodingAlias

Definition at line 63 of file encoding.c.

◆ xmlCharEncodingAliasPtr

Definition at line 64 of file encoding.c.

Function Documentation

◆ asciiToUTF8()

static int asciiToUTF8 ( unsigned char out,
int outlen,
const unsigned char in,
int inlen 
)
static

asciiToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of ASCII chars @inlen: the length of @in

Take a block of ASCII chars in and try to convert it to an UTF-8 block of chars out. Returns 0 if success, or -1 otherwise The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 185 of file encoding.c.

186 {
187 unsigned char* outstart = out;
188 const unsigned char* base = in;
189 const unsigned char* processed = in;
190 unsigned char* outend = out + *outlen;
191 const unsigned char* inend;
192 unsigned int c;
193
194 inend = in + (*inlen);
195 while ((in < inend) && (out - outstart + 5 < *outlen)) {
196 c= *in++;
197
198 if (out >= outend)
199 break;
200 if (c < 0x80) {
201 *out++ = c;
202 } else {
203 *outlen = out - outstart;
204 *inlen = processed - base;
205 return(-1);
206 }
207
208 processed = (const unsigned char*) in;
209 }
210 *outlen = out - outstart;
211 *inlen = processed - base;
212 return(*outlen);
213}
const GLubyte * c
Definition: glext.h:8905
GLuint in
Definition: glext.h:9616
#define c
Definition: ke_i.h:80
static FILE * out
Definition: regtests2xml.c:44
static int processed(const type_t *type)
Definition: typegen.c:2254

Referenced by xmlInitCharEncodingHandlers().

◆ isolat1ToUTF8()

int isolat1ToUTF8 ( unsigned char out,
int outlen,
const unsigned char in,
int inlen 
)

isolat1ToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of ISO Latin 1 chars @inlen: the length of @in

Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out. Returns the number of bytes written if success, or -1 otherwise The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 316 of file encoding.c.

317 {
318 unsigned char* outstart = out;
319 const unsigned char* base = in;
320 unsigned char* outend;
321 const unsigned char* inend;
322 const unsigned char* instop;
323
324 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325 return(-1);
326
327 outend = out + *outlen;
328 inend = in + (*inlen);
329 instop = inend;
330
331 while ((in < inend) && (out < outend - 1)) {
332 if (*in >= 0x80) {
333 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
334 *out++ = ((*in) & 0x3F) | 0x80;
335 ++in;
336 }
337 if ((instop - in) > (outend - out)) instop = in + (outend - out);
338 while ((in < instop) && (*in < 0x80)) {
339 *out++ = *in++;
340 }
341 }
342 if ((in < inend) && (out < outend) && (*in < 0x80)) {
343 *out++ = *in++;
344 }
345 *outlen = out - outstart;
346 *inlen = in - base;
347 return(*outlen);
348}
#define NULL
Definition: types.h:112

Referenced by xmlInitCharEncodingHandlers().

◆ LIBXML_ATTR_FORMAT()

static void LIBXML_ATTR_FORMAT ( ,
 
)
static

xmlErrEncoding: @error: the error number @msg: the error message

n encoding error

Definition at line 105 of file encoding.c.

107{
108 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
110 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
111}
#define msg(x)
Definition: auth_time.c:54
GLuint GLfloat * val
Definition: glext.h:7180
#define error(str)
Definition: mkdosfs.c:1605
@ XML_ERR_FATAL
Definition: xmlerror.h:28
@ XML_FROM_I18N
Definition: xmlerror.h:64

◆ UTF16BEToUTF8()

static int UTF16BEToUTF8 ( unsigned char out,
int outlen,
const unsigned char inb,
int inlenb 
)
static

UTF16BEToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @inb: a pointer to an array of UTF-16 passed as a byte array @inlenb: the length of @in in UTF-16 chars

Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 block of chars out. This function assumes the endian property is the same between the native type of this machine and the inputed one.

Returns the number of bytes written, or -1 if lack of space, or -2 if the transcoding fails (if *in is not a valid utf16 string) The value of *inlen after return is the number of octets consumed if the return value is positive, else unpredictable.

Definition at line 751 of file encoding.c.

753{
754 unsigned char* outstart = out;
755 const unsigned char* processed = inb;
756 unsigned char* outend;
757 unsigned short* in = (unsigned short*) inb;
758 unsigned short* inend;
759 unsigned int c, d, inlen;
760 unsigned char *tmp;
761 int bits;
762
763 if (*outlen == 0) {
764 *inlenb = 0;
765 return(0);
766 }
767 outend = out + *outlen;
768 if ((*inlenb % 2) == 1)
769 (*inlenb)--;
770 inlen = *inlenb / 2;
771 inend= in + inlen;
772 while ((in < inend) && (out - outstart + 5 < *outlen)) {
773 if (xmlLittleEndian) {
774 tmp = (unsigned char *) in;
775 c = *tmp++;
776 c = (c << 8) | (unsigned int) *tmp;
777 in++;
778 } else {
779 c= *in++;
780 }
781 if ((c & 0xFC00) == 0xD800) { /* surrogates */
782 if (in >= inend) { /* handle split mutli-byte characters */
783 break;
784 }
785 if (xmlLittleEndian) {
786 tmp = (unsigned char *) in;
787 d = *tmp++;
788 d = (d << 8) | (unsigned int) *tmp;
789 in++;
790 } else {
791 d= *in++;
792 }
793 if ((d & 0xFC00) == 0xDC00) {
794 c &= 0x03FF;
795 c <<= 10;
796 c |= d & 0x03FF;
797 c += 0x10000;
798 }
799 else {
800 *outlen = out - outstart;
801 *inlenb = processed - inb;
802 return(-2);
803 }
804 }
805
806 /* assertion: c is a single UTF-4 value */
807 if (out >= outend)
808 break;
809 if (c < 0x80) { *out++= c; bits= -6; }
810 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
811 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
812 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
813
814 for ( ; bits >= 0; bits-= 6) {
815 if (out >= outend)
816 break;
817 *out++= ((c >> bits) & 0x3F) | 0x80;
818 }
819 processed = (const unsigned char*) in;
820 }
821 *outlen = out - outstart;
822 *inlenb = processed - inb;
823 return(*outlen);
824}
static int xmlLittleEndian
Definition: encoding.c:84
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:10929
#define bits
Definition: infblock.c:15
#define d
Definition: ke_i.h:81

Referenced by xmlInitCharEncodingHandlers().

◆ UTF16LEToUTF8()

static int UTF16LEToUTF8 ( unsigned char out,
int outlen,
const unsigned char inb,
int inlenb 
)
static

UTF16LEToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @inb: a pointer to an array of UTF-16LE passwd as a byte array @inlenb: the length of @in in UTF-16LE chars

Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 block of chars out. This function assumes the endian property is the same between the native type of this machine and the inputed one.

Returns the number of bytes written, or -1 if lack of space, or -2 if the transcoding fails (if *in is not a valid utf16 string) The value of *inlen after return is the number of octets consumed if the return value is positive, else unpredictable.

Definition at line 508 of file encoding.c.

510{
511 unsigned char* outstart = out;
512 const unsigned char* processed = inb;
513 unsigned char* outend;
514 unsigned short* in = (unsigned short*) inb;
515 unsigned short* inend;
516 unsigned int c, d, inlen;
517 unsigned char *tmp;
518 int bits;
519
520 if (*outlen == 0) {
521 *inlenb = 0;
522 return(0);
523 }
524 outend = out + *outlen;
525 if ((*inlenb % 2) == 1)
526 (*inlenb)--;
527 inlen = *inlenb / 2;
528 inend = in + inlen;
529 while ((in < inend) && (out - outstart + 5 < *outlen)) {
530 if (xmlLittleEndian) {
531 c= *in++;
532 } else {
533 tmp = (unsigned char *) in;
534 c = *tmp++;
535 c = c | (((unsigned int)*tmp) << 8);
536 in++;
537 }
538 if ((c & 0xFC00) == 0xD800) { /* surrogates */
539 if (in >= inend) { /* handle split mutli-byte characters */
540 break;
541 }
542 if (xmlLittleEndian) {
543 d = *in++;
544 } else {
545 tmp = (unsigned char *) in;
546 d = *tmp++;
547 d = d | (((unsigned int)*tmp) << 8);
548 in++;
549 }
550 if ((d & 0xFC00) == 0xDC00) {
551 c &= 0x03FF;
552 c <<= 10;
553 c |= d & 0x03FF;
554 c += 0x10000;
555 }
556 else {
557 *outlen = out - outstart;
558 *inlenb = processed - inb;
559 return(-2);
560 }
561 }
562
563 /* assertion: c is a single UTF-4 value */
564 if (out >= outend)
565 break;
566 if (c < 0x80) { *out++= c; bits= -6; }
567 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
568 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
569 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
570
571 for ( ; bits >= 0; bits-= 6) {
572 if (out >= outend)
573 break;
574 *out++= ((c >> bits) & 0x3F) | 0x80;
575 }
576 processed = (const unsigned char*) in;
577 }
578 *outlen = out - outstart;
579 *inlenb = processed - inb;
580 return(*outlen);
581}
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31

Referenced by xmlInitCharEncodingHandlers().

◆ UTF8ToUTF8()

static int UTF8ToUTF8 ( unsigned char out,
int outlen,
const unsigned char inb,
int inlenb 
)
static

UTF8ToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @inb: a pointer to an array of UTF-8 chars @inlenb: the length of @in in UTF-8 chars

No op copy operation for UTF8 handling.

Returns the number of bytes written, or -1 if lack of space. The value of *inlen after return is the number of octets consumed if the return value is positive, else unpredictable.

Definition at line 364 of file encoding.c.

366{
367 int len;
368
369 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
370 return(-1);
371 if (inb == NULL) {
372 /* inb == NULL means output is initialized. */
373 *outlen = 0;
374 *inlenb = 0;
375 return(0);
376 }
377 if (*outlen > *inlenb) {
378 len = *inlenb;
379 } else {
380 len = *outlen;
381 }
382 if (len < 0)
383 return(-1);
384
385 /*
386 * FIXME: Conversion functions must assure valid UTF-8, so we have
387 * to check for UTF-8 validity. Preferably, this converter shouldn't
388 * be used at all.
389 */
390 memcpy(out, inb, len);
391
392 *outlen = len;
393 *inlenb = len;
394 return(*outlen);
395}
GLenum GLsizei len
Definition: glext.h:6722
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878

Referenced by xmlInitCharEncodingHandlers().

◆ xmlAddEncodingAlias()

int xmlAddEncodingAlias ( const char name,
const char alias 
)

Definition at line 1077 of file encoding.c.

1077 {
1078 int i;
1079 char upper[100];
1080
1081 if ((name == NULL) || (alias == NULL))
1082 return(-1);
1083
1084 for (i = 0;i < 99;i++) {
1085 upper[i] = toupper(alias[i]);
1086 if (upper[i] == 0) break;
1087 }
1088 upper[i] = 0;
1089
1096 return(-1);
1102 }
1103 /*
1104 * Walk down the list looking for a definition of the alias
1105 */
1106 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108 /*
1109 * Replace the definition.
1110 */
1113 return(0);
1114 }
1115 }
1116 /*
1117 * Add the definition
1118 */
1122 return(0);
1123}
int strcmp(const char *String1, const char *String2)
Definition: utclib.c:469
int toupper(int c)
Definition: utclib.c:881
const WCHAR * alias
Definition: main.c:67
xmlCharEncodingAlias * xmlCharEncodingAliasPtr
Definition: encoding.c:64
static xmlCharEncodingAliasPtr xmlCharEncodingAliases
Definition: encoding.c:70
static int xmlCharEncodingAliasesMax
Definition: encoding.c:72
static int xmlCharEncodingAliasesNb
Definition: encoding.c:71
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
XMLPUBVAR xmlStrdupFunc xmlMemStrdup
Definition: globals.h:252
XMLPUBVAR xmlMallocFunc xmlMalloc
Definition: globals.h:248
XMLPUBVAR xmlFreeFunc xmlFree
Definition: globals.h:251
XMLPUBVAR xmlReallocFunc xmlRealloc
Definition: globals.h:250
const char * name
Definition: encoding.c:66
const char * alias
Definition: encoding.c:67
Definition: name.c:39

Referenced by main().

◆ xmlByteConsumed()

long xmlByteConsumed ( xmlParserCtxtPtr  ctxt)

xmlByteConsumed: @ctxt: an XML parser context

This function provides the current index of the parser relative to the start of the current entity. This function is computed in bytes from the beginning starting at zero and finishing at the size in byte of the file if parsing a file. The function is of constant cost if the input is UTF-8 but can be costly if run on non-UTF-8 input.

Returns the index in bytes from the beginning of the entity or -1 in case the index could not be computed.

Definition at line 2882 of file encoding.c.

2882 {
2884
2885 if (ctxt == NULL) return(-1);
2886 in = ctxt->input;
2887 if (in == NULL) return(-1);
2888 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2889 unsigned int unused = 0;
2890 xmlCharEncodingHandler * handler = in->buf->encoder;
2891 /*
2892 * Encoding conversion, compute the number of unused original
2893 * bytes from the input not consumed and subtract that from
2894 * the raw consumed value, this is not a cheap operation
2895 */
2896 if (in->end - in->cur > 0) {
2897 unsigned char convbuf[32000];
2898 const unsigned char *cur = (const unsigned char *)in->cur;
2899 int toconv = in->end - in->cur, written = 32000;
2900
2901 int ret;
2902
2903 do {
2904 toconv = in->end - cur;
2905 written = 32000;
2906 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2907 cur, &toconv);
2908 if (ret < 0) {
2909 if (written > 0)
2910 ret = -2;
2911 else
2912 return(-1);
2913 }
2914 unused += written;
2915 cur += toconv;
2916 } while (ret == -2);
2917 }
2918 if (in->buf->rawconsumed < unused)
2919 return(-1);
2920 return(in->buf->rawconsumed - unused);
2921 }
2922 return(in->consumed + (in->cur - in->base));
2923}
UINT(* handler)(MSIPACKAGE *)
Definition: action.c:7512
static int xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:2029
FxCollectionEntry * cur
WORD unused[29]
Definition: crypt.c:1155
xmlParserInputPtr input
Definition: parser.h:199
int ret

◆ xmlCharEncCloseFunc()

int xmlCharEncCloseFunc ( xmlCharEncodingHandler handler)

xmlCharEncCloseFunc: @handler: char encoding transformation data structure

Generic front-end for encoding handler close function

Returns 0 if success, or -1 in case of error

Definition at line 2796 of file encoding.c.

2796 {
2797 int ret = 0;
2798 int tofree = 0;
2799 int i, handler_in_list = 0;
2800
2801 /* Avoid unused variable warning if features are disabled. */
2802 (void) handler_in_list;
2803
2804 if (handler == NULL) return(-1);
2805 if (handler->name == NULL) return(-1);
2806 if (handlers != NULL) {
2807 for (i = 0;i < nbCharEncodingHandler; i++) {
2808 if (handler == handlers[i]) {
2809 handler_in_list = 1;
2810 break;
2811 }
2812 }
2813 }
2814#ifdef LIBXML_ICONV_ENABLED
2815 /*
2816 * Iconv handlers can be used only once, free the whole block.
2817 * and the associated icon resources.
2818 */
2819 if ((handler_in_list == 0) &&
2820 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821 tofree = 1;
2822 if (handler->iconv_out != NULL) {
2823 if (iconv_close(handler->iconv_out))
2824 ret = -1;
2825 handler->iconv_out = NULL;
2826 }
2827 if (handler->iconv_in != NULL) {
2828 if (iconv_close(handler->iconv_in))
2829 ret = -1;
2830 handler->iconv_in = NULL;
2831 }
2832 }
2833#endif /* LIBXML_ICONV_ENABLED */
2834#ifdef LIBXML_ICU_ENABLED
2835 if ((handler_in_list == 0) &&
2836 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837 tofree = 1;
2838 if (handler->uconv_out != NULL) {
2839 closeIcuConverter(handler->uconv_out);
2840 handler->uconv_out = NULL;
2841 }
2842 if (handler->uconv_in != NULL) {
2843 closeIcuConverter(handler->uconv_in);
2844 handler->uconv_in = NULL;
2845 }
2846 }
2847#endif
2848 if (tofree) {
2849 /* free up only dynamic handlers iconv/uconv */
2850 if (handler->name != NULL)
2851 xmlFree(handler->name);
2852 handler->name = NULL;
2854 }
2855#ifdef DEBUG_ENCODING
2856 if (ret)
2858 "failed to close the encoding handler\n");
2859 else
2861 "closed the encoding handler\n");
2862#endif
2863
2864 return(ret);
2865}
static xmlCharEncodingHandlerPtr * handlers
Definition: encoding.c:1317
static int nbCharEncodingHandler
Definition: encoding.c:1318
int iconv_close(iconv_t cd)
Definition: win_iconv.c:756
XMLPUBVAR void * xmlGenericErrorContext
Definition: globals.h:353
XMLPUBVAR xmlGenericErrorFunc xmlGenericError
Definition: globals.h:337

Referenced by xmlSwitchInputEncodingInt().

◆ xmlCharEncFirstLine()

int xmlCharEncFirstLine ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncFirstLine: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2160 of file encoding.c.

2161 {
2162 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163}
int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in, int len)
Definition: encoding.c:2074

◆ xmlCharEncFirstLineInput()

int xmlCharEncFirstLineInput ( xmlParserInputBufferPtr  input,
int  len 
)

xmlCharEncFirstLineInput: @input: a parser input buffer @len: number of bytes to convert for the first line, or -1

Front-end for the encoding handler input function, but handle only the very first line. Point is that this is based on autodetection of the encoding and once that first line is converted we may find out that a different decoder is needed to process the input.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2181 of file encoding.c.

2182{
2183 int ret;
2184 size_t written;
2185 size_t toconv;
2186 int c_in;
2187 int c_out;
2188 xmlBufPtr in;
2189 xmlBufPtr out;
2190
2191 if ((input == NULL) || (input->encoder == NULL) ||
2192 (input->buffer == NULL) || (input->raw == NULL))
2193 return (-1);
2194 out = input->buffer;
2195 in = input->raw;
2196
2197 toconv = xmlBufUse(in);
2198 if (toconv == 0)
2199 return (0);
2200 written = xmlBufAvail(out);
2201 /*
2202 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2203 * 45 chars should be sufficient to reach the end of the encoding
2204 * declaration without going too far inside the document content.
2205 * on UTF-16 this means 90bytes, on UCS4 this means 180
2206 * The actual value depending on guessed encoding is passed as @len
2207 * if provided
2208 */
2209 if (len >= 0) {
2210 if (toconv > (unsigned int) len)
2211 toconv = len;
2212 } else {
2213 if (toconv > 180)
2214 toconv = 180;
2215 }
2216 if (toconv * 2 >= written) {
2217 xmlBufGrow(out, toconv * 2);
2218 written = xmlBufAvail(out);
2219 }
2220 if (written > 360)
2221 written = 360;
2222
2223 c_in = toconv;
2224 c_out = written;
2225 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2226 xmlBufContent(in), &c_in, 0);
2227 xmlBufShrink(in, c_in);
2228 xmlBufAddLen(out, c_out);
2229 if (ret == -1)
2230 ret = -3;
2231
2232 switch (ret) {
2233 case 0:
2234#ifdef DEBUG_ENCODING
2236 "converted %d bytes to %d bytes of input\n",
2237 c_in, c_out);
2238#endif
2239 break;
2240 case -1:
2241#ifdef DEBUG_ENCODING
2243 "converted %d bytes to %d bytes of input, %d left\n",
2244 c_in, c_out, (int)xmlBufUse(in));
2245#endif
2246 break;
2247 case -3:
2248#ifdef DEBUG_ENCODING
2250 "converted %d bytes to %d bytes of input, %d left\n",
2251 c_in, c_out, (int)xmlBufUse(in));
2252#endif
2253 break;
2254 case -2: {
2255 char buf[50];
2256 const xmlChar *content = xmlBufContent(in);
2257
2258 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2259 content[0], content[1],
2260 content[2], content[3]);
2261 buf[49] = 0;
2262 xmlEncodingErr(XML_I18N_CONV_FAILED,
2263 "input conversion failed due to input error, bytes %s\n",
2264 buf);
2265 }
2266 }
2267 /*
2268 * Ignore when input buffer is not on a boundary
2269 */
2270 if (ret == -3) ret = 0;
2271 if (ret == -1) ret = 0;
2272 return(c_out ? c_out : ret);
2273}
size_t xmlBufAvail(const xmlBufPtr buf)
Definition: buf.c:655
int xmlBufAddLen(xmlBufPtr buf, size_t len)
Definition: buf.c:592
int xmlBufGrow(xmlBufPtr buf, int len)
Definition: buf.c:498
content
Definition: atl_ax.c:994
static int xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush)
Definition: encoding.c:1979
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLenum GLenum GLenum input
Definition: glext.h:9031
XMLPUBFUN xmlChar *XMLCALL xmlBufEnd(xmlBufPtr buf)
Definition: buf.c:571
XMLPUBFUN size_t XMLCALL xmlBufUse(const xmlBufPtr buf)
Definition: buf.c:633
XMLPUBFUN size_t XMLCALL xmlBufShrink(xmlBufPtr buf, size_t len)
Definition: buf.c:381
XMLPUBFUN xmlChar *XMLCALL xmlBufContent(const xmlBuf *buf)
Definition: buf.c:553
Definition: buf.c:43
#define snprintf
Definition: wintirpc.h:48
@ XML_I18N_CONV_FAILED
Definition: xmlerror.h:833
unsigned char xmlChar
Definition: xmlstring.h:28

Referenced by xmlSwitchInputEncodingInt().

◆ xmlCharEncFirstLineInt()

int xmlCharEncFirstLineInt ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in,
int  len 
)

xmlCharEncFirstLineInt: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input @len: number of bytes to convert for the first line, or -1

Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2074 of file encoding.c.

2075 {
2076 int ret;
2077 int written;
2078 int toconv;
2079
2080 if (handler == NULL) return(-1);
2081 if (out == NULL) return(-1);
2082 if (in == NULL) return(-1);
2083
2084 /* calculate space available */
2085 written = out->size - out->use - 1; /* count '\0' */
2086 toconv = in->use;
2087 /*
2088 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2089 * 45 chars should be sufficient to reach the end of the encoding
2090 * declaration without going too far inside the document content.
2091 * on UTF-16 this means 90bytes, on UCS4 this means 180
2092 * The actual value depending on guessed encoding is passed as @len
2093 * if provided
2094 */
2095 if (len >= 0) {
2096 if (toconv > len)
2097 toconv = len;
2098 } else {
2099 if (toconv > 180)
2100 toconv = 180;
2101 }
2102 if (toconv * 2 >= written) {
2103 xmlBufferGrow(out, toconv * 2);
2104 written = out->size - out->use - 1;
2105 }
2106
2107 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2108 in->content, &toconv, 0);
2109 xmlBufferShrink(in, toconv);
2110 out->use += written;
2111 out->content[out->use] = 0;
2112 if (ret == -1) ret = -3;
2113
2114#ifdef DEBUG_ENCODING
2115 switch (ret) {
2116 case 0:
2118 "converted %d bytes to %d bytes of input\n",
2119 toconv, written);
2120 break;
2121 case -1:
2122 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2123 toconv, written, in->use);
2124 break;
2125 case -2:
2127 "input conversion failed due to input error\n");
2128 break;
2129 case -3:
2130 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2131 toconv, written, in->use);
2132 break;
2133 default:
2134 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2135 }
2136#endif /* DEBUG_ENCODING */
2137 /*
2138 * Ignore when input buffer is not on a boundary
2139 */
2140 if (ret == -3) ret = 0;
2141 if (ret == -1) ret = 0;
2142 return(written ? written : ret);
2143}
XMLPUBFUN int XMLCALL xmlBufferShrink(xmlBufferPtr buf, unsigned int len)
XMLPUBFUN int XMLCALL xmlBufferGrow(xmlBufferPtr buf, unsigned int len)

Referenced by xmlCharEncFirstLine().

◆ xmlCharEncInFunc()

int xmlCharEncInFunc ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncInFunc: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Generic front-end for the encoding handler input function

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2383 of file encoding.c.

2385{
2386 int ret;
2387 int written;
2388 int toconv;
2389
2390 if (handler == NULL)
2391 return (-1);
2392 if (out == NULL)
2393 return (-1);
2394 if (in == NULL)
2395 return (-1);
2396
2397 toconv = in->use;
2398 if (toconv == 0)
2399 return (0);
2400 written = out->size - out->use -1; /* count '\0' */
2401 if (toconv * 2 >= written) {
2402 xmlBufferGrow(out, out->size + toconv * 2);
2403 written = out->size - out->use - 1;
2404 }
2405 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2406 in->content, &toconv, 1);
2407 xmlBufferShrink(in, toconv);
2408 out->use += written;
2409 out->content[out->use] = 0;
2410 if (ret == -1)
2411 ret = -3;
2412
2413 switch (ret) {
2414 case 0:
2415#ifdef DEBUG_ENCODING
2417 "converted %d bytes to %d bytes of input\n",
2418 toconv, written);
2419#endif
2420 break;
2421 case -1:
2422#ifdef DEBUG_ENCODING
2424 "converted %d bytes to %d bytes of input, %d left\n",
2425 toconv, written, in->use);
2426#endif
2427 break;
2428 case -3:
2429#ifdef DEBUG_ENCODING
2431 "converted %d bytes to %d bytes of input, %d left\n",
2432 toconv, written, in->use);
2433#endif
2434 break;
2435 case -2: {
2436 char buf[50];
2437
2438 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439 in->content[0], in->content[1],
2440 in->content[2], in->content[3]);
2441 buf[49] = 0;
2442 xmlEncodingErr(XML_I18N_CONV_FAILED,
2443 "input conversion failed due to input error, bytes %s\n",
2444 buf);
2445 }
2446 }
2447 /*
2448 * Ignore when input buffer is not on a boundary
2449 */
2450 if (ret == -3)
2451 ret = 0;
2452 return (written? written : ret);
2453}

◆ xmlCharEncInput()

int xmlCharEncInput ( xmlParserInputBufferPtr  input,
int  flush 
)

xmlCharEncInput: @input: a parser input buffer @flush: try to flush all the raw buffer

Generic front-end for the encoding handler on parser input

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2288 of file encoding.c.

2289{
2290 int ret;
2291 size_t written;
2292 size_t toconv;
2293 int c_in;
2294 int c_out;
2295 xmlBufPtr in;
2296 xmlBufPtr out;
2297
2298 if ((input == NULL) || (input->encoder == NULL) ||
2299 (input->buffer == NULL) || (input->raw == NULL))
2300 return (-1);
2301 out = input->buffer;
2302 in = input->raw;
2303
2304 toconv = xmlBufUse(in);
2305 if (toconv == 0)
2306 return (0);
2307 if ((toconv > 64 * 1024) && (flush == 0))
2308 toconv = 64 * 1024;
2309 written = xmlBufAvail(out);
2310 if (toconv * 2 >= written) {
2311 xmlBufGrow(out, toconv * 2);
2312 written = xmlBufAvail(out);
2313 }
2314 if ((written > 128 * 1024) && (flush == 0))
2315 written = 128 * 1024;
2316
2317 c_in = toconv;
2318 c_out = written;
2319 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2320 xmlBufContent(in), &c_in, flush);
2321 xmlBufShrink(in, c_in);
2322 xmlBufAddLen(out, c_out);
2323 if (ret == -1)
2324 ret = -3;
2325
2326 switch (ret) {
2327 case 0:
2328#ifdef DEBUG_ENCODING
2330 "converted %d bytes to %d bytes of input\n",
2331 c_in, c_out);
2332#endif
2333 break;
2334 case -1:
2335#ifdef DEBUG_ENCODING
2337 "converted %d bytes to %d bytes of input, %d left\n",
2338 c_in, c_out, (int)xmlBufUse(in));
2339#endif
2340 break;
2341 case -3:
2342#ifdef DEBUG_ENCODING
2344 "converted %d bytes to %d bytes of input, %d left\n",
2345 c_in, c_out, (int)xmlBufUse(in));
2346#endif
2347 break;
2348 case -2: {
2349 char buf[50];
2350 const xmlChar *content = xmlBufContent(in);
2351
2352 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2353 content[0], content[1],
2354 content[2], content[3]);
2355 buf[49] = 0;
2356 xmlEncodingErr(XML_I18N_CONV_FAILED,
2357 "input conversion failed due to input error, bytes %s\n",
2358 buf);
2359 }
2360 }
2361 /*
2362 * Ignore when input buffer is not on a boundary
2363 */
2364 if (ret == -3)
2365 ret = 0;
2366 return (c_out? c_out : ret);
2367}

Referenced by xmlSwitchInputEncodingInt().

◆ xmlCharEncOutFunc()

int xmlCharEncOutFunc ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncOutFunc: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Generic front-end for the encoding handler output function a first call with @in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2645 of file encoding.c.

2646 {
2647 int ret;
2648 int written;
2649 int writtentot = 0;
2650 int toconv;
2651
2652 if (handler == NULL) return(-1);
2653 if (out == NULL) return(-1);
2654
2655retry:
2656
2657 written = out->size - out->use;
2658
2659 if (written > 0)
2660 written--; /* Gennady: count '/0' */
2661
2662 /*
2663 * First specific handling of in = NULL, i.e. the initialization call
2664 */
2665 if (in == NULL) {
2666 toconv = 0;
2667 /* TODO: Check return value. */
2668 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669 NULL, &toconv);
2670 out->use += written;
2671 out->content[out->use] = 0;
2672#ifdef DEBUG_ENCODING
2674 "initialized encoder\n");
2675#endif
2676 return(0);
2677 }
2678
2679 /*
2680 * Conversion itself.
2681 */
2682 toconv = in->use;
2683 if (toconv == 0)
2684 return(0);
2685 if (toconv * 4 >= written) {
2686 xmlBufferGrow(out, toconv * 4);
2687 written = out->size - out->use - 1;
2688 }
2689 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690 in->content, &toconv);
2691 xmlBufferShrink(in, toconv);
2692 out->use += written;
2693 writtentot += written;
2694 out->content[out->use] = 0;
2695 if (ret == -1) {
2696 if (written > 0) {
2697 /* Can be a limitation of iconv or uconv */
2698 goto retry;
2699 }
2700 ret = -3;
2701 }
2702
2703 /*
2704 * Attempt to handle error cases
2705 */
2706 switch (ret) {
2707 case 0:
2708#ifdef DEBUG_ENCODING
2710 "converted %d bytes to %d bytes of output\n",
2711 toconv, written);
2712#endif
2713 break;
2714 case -1:
2715#ifdef DEBUG_ENCODING
2717 "output conversion failed by lack of space\n");
2718#endif
2719 break;
2720 case -3:
2721#ifdef DEBUG_ENCODING
2722 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723 toconv, written, in->use);
2724#endif
2725 break;
2726 case -4:
2727 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728 "xmlCharEncOutFunc: no output function !\n", NULL);
2729 ret = -1;
2730 break;
2731 case -2: {
2732 xmlChar charref[20];
2733 int len = in->use;
2734 const xmlChar *utf = (const xmlChar *) in->content;
2735 int cur, charrefLen;
2736
2737 cur = xmlGetUTF8Char(utf, &len);
2738 if (cur <= 0)
2739 break;
2740
2741#ifdef DEBUG_ENCODING
2743 "handling output conversion error\n");
2745 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746 in->content[0], in->content[1],
2747 in->content[2], in->content[3]);
2748#endif
2749 /*
2750 * Removes the UTF8 sequence, and replace it by a charref
2751 * and continue the transcoding phase, hoping the error
2752 * did not mangle the encoder state.
2753 */
2754 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755 "&#%d;", cur);
2757 xmlBufferGrow(out, charrefLen * 4);
2758 written = out->size - out->use - 1;
2759 toconv = charrefLen;
2760 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761 charref, &toconv);
2762
2763 if ((ret < 0) || (toconv != charrefLen)) {
2764 char buf[50];
2765
2766 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767 in->content[0], in->content[1],
2768 in->content[2], in->content[3]);
2769 buf[49] = 0;
2770 xmlEncodingErr(XML_I18N_CONV_FAILED,
2771 "output conversion failed due to conv error, bytes %s\n",
2772 buf);
2773 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774 in->content[0] = ' ';
2775 break;
2776 }
2777
2778 out->use += written;
2779 writtentot += written;
2780 out->content[out->use] = 0;
2781 goto retry;
2782 }
2783 }
2784 return(writtentot ? writtentot : ret);
2785}
@ XML_BUFFER_ALLOC_IMMUTABLE
Definition: tree.h:77
@ XML_I18N_NO_OUTPUT
Definition: xmlerror.h:834
XMLPUBFUN int XMLCALL XMLPUBFUN int XMLCALL XMLPUBFUN int XMLCALL xmlGetUTF8Char(const unsigned char *utf, int *len)
Definition: xmlstring.c:708

◆ xmlCleanupCharEncodingHandlers()

void xmlCleanupCharEncodingHandlers ( void  )

xmlCleanupCharEncodingHandlers:

DEPRECATED: This function will be made private. Call xmlCleanupParser to free global state but see the warnings there. xmlCleanupParser should be only called once at program exit. In most cases, you don't have call cleanup functions at all.

Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.

Definition at line 1483 of file encoding.c.

1483 {
1485
1486 if (handlers == NULL) return;
1487
1488 for (;nbCharEncodingHandler > 0;) {
1494 }
1495 }
1497 handlers = NULL;
1500}
void xmlCleanupEncodingAliases(void)
Definition: encoding.c:1012
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler
Definition: encoding.c:1325

Referenced by xmlCleanupParser().

◆ xmlCleanupEncodingAliases()

void xmlCleanupEncodingAliases ( void  )

xmlCleanupEncodingAliases:

Unregisters all aliases

Definition at line 1012 of file encoding.c.

1012 {
1013 int i;
1014
1016 return;
1017
1018 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 }
1028}

Referenced by xmlCleanupCharEncodingHandlers().

◆ xmlDelEncodingAlias()

int xmlDelEncodingAlias ( const char alias)

xmlDelEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Unregisters an encoding alias @alias

Returns 0 in case of success, -1 in case of error

Definition at line 1134 of file encoding.c.

1134 {
1135 int i;
1136
1137 if (alias == NULL)
1138 return(-1);
1139
1141 return(-1);
1142 /*
1143 * Walk down the list looking for a definition of the alias
1144 */
1145 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1152 return(0);
1153 }
1154 }
1155 return(-1);
1156}
#define memmove(s1, s2, n)
Definition: mkisofs.h:881

◆ xmlDetectCharEncoding()

xmlCharEncoding xmlDetectCharEncoding ( const unsigned char in,
int  len 
)

xmlDetectCharEncoding: @in: a pointer to the first bytes of the XML entity, must be at least 2 bytes long (at least 4 if encoding is UTF4 variant). @len: pointer to the length of the buffer

Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation.

Returns one of the XML_CHAR_ENCODING_... values.

Definition at line 952 of file encoding.c.

953{
954 if (in == NULL)
956 if (len >= 4) {
957 if ((in[0] == 0x00) && (in[1] == 0x00) &&
958 (in[2] == 0x00) && (in[3] == 0x3C))
960 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961 (in[2] == 0x00) && (in[3] == 0x00))
963 if ((in[0] == 0x00) && (in[1] == 0x00) &&
964 (in[2] == 0x3C) && (in[3] == 0x00))
966 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967 (in[2] == 0x00) && (in[3] == 0x00))
969 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970 (in[2] == 0xA7) && (in[3] == 0x94))
972 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973 (in[2] == 0x78) && (in[3] == 0x6D))
975 /*
976 * Although not part of the recommendation, we also
977 * attempt an "auto-recognition" of UTF-16LE and
978 * UTF-16BE encodings.
979 */
980 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981 (in[2] == 0x3F) && (in[3] == 0x00))
983 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984 (in[2] == 0x00) && (in[3] == 0x3F))
986 }
987 if (len >= 3) {
988 /*
989 * Errata on XML-1.0 June 20 2001
990 * We now allow an UTF8 encoded BOM
991 */
992 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993 (in[2] == 0xBF))
995 }
996 /* For UTF-16 we can recognize by the BOM */
997 if (len >= 2) {
998 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1000 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1002 }
1003 return(XML_CHAR_ENCODING_NONE);
1004}
@ XML_CHAR_ENCODING_UTF8
Definition: encoding.h:59
@ XML_CHAR_ENCODING_UTF16BE
Definition: encoding.h:61
@ XML_CHAR_ENCODING_EBCDIC
Definition: encoding.h:64
@ XML_CHAR_ENCODING_UCS4LE
Definition: encoding.h:62
@ XML_CHAR_ENCODING_UCS4_3412
Definition: encoding.h:66
@ XML_CHAR_ENCODING_UCS4BE
Definition: encoding.h:63
@ XML_CHAR_ENCODING_UTF16LE
Definition: encoding.h:60
@ XML_CHAR_ENCODING_NONE
Definition: encoding.h:58
@ XML_CHAR_ENCODING_UCS4_2143
Definition: encoding.h:65

Referenced by xmlCtxtResetPush(), xmlParseDocument(), xmlParseExternalEntityPrivate(), xmlParseExternalSubset(), xmlParseExtParsedEnt(), xmlParsePEReference(), and xmlSAX2ExternalSubset().

◆ xmlEncInputChunk()

static int xmlEncInputChunk ( xmlCharEncodingHandler handler,
unsigned char out,
int outlen,
const unsigned char in,
int inlen,
int  flush 
)
static

xmlEncInputChunk: @handler: encoding handler @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of input bytes @inlen: the length of @in @flush: flush (ICU-related)

Returns 0 if success, or -1 by lack of space, or -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or -3 if there the last byte can't form a single output char.

The value of @inlen after return is the number of octets consumed as the return value is 0, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 1979 of file encoding.c.

1980 {
1981 int ret;
1982 (void)flush;
1983
1984 if (handler->input != NULL) {
1985 ret = handler->input(out, outlen, in, inlen);
1986 if (ret > 0)
1987 ret = 0;
1988 }
1989#ifdef LIBXML_ICONV_ENABLED
1990 else if (handler->iconv_in != NULL) {
1991 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1992 }
1993#endif /* LIBXML_ICONV_ENABLED */
1994#ifdef LIBXML_ICU_ENABLED
1995 else if (handler->uconv_in != NULL) {
1996 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1997 flush);
1998 }
1999#endif /* LIBXML_ICU_ENABLED */
2000 else {
2001 *outlen = 0;
2002 *inlen = 0;
2003 ret = -2;
2004 }
2005
2006 return(ret);
2007}

Referenced by xmlCharEncFirstLineInput(), xmlCharEncFirstLineInt(), xmlCharEncInFunc(), and xmlCharEncInput().

◆ xmlEncodingErrMemory()

static void xmlEncodingErrMemory ( const char extra)
static

xmlEncodingErrMemory: @extra: extra information

Handle an out of memory condition

Definition at line 93 of file encoding.c.

94{
95 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
96}
@ extra
Definition: id3.c:95
@ XML_ERR_NO_MEMORY
Definition: xmlerror.h:102

Referenced by xmlInitCharEncodingHandlers(), and xmlNewCharEncodingHandler().

◆ xmlEncOutputChunk()

static int xmlEncOutputChunk ( xmlCharEncodingHandler handler,
unsigned char out,
int outlen,
const unsigned char in,
int inlen 
)
static

xmlEncOutputChunk: @handler: encoding handler @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of input bytes @inlen: the length of @in

Returns 0 if success, or -1 by lack of space, or -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or -3 if there the last byte can't form a single output char. -4 if no output function was found.

The value of @inlen after return is the number of octets consumed as the return value is 0, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 2029 of file encoding.c.

2030 {
2031 int ret;
2032
2033 if (handler->output != NULL) {
2034 ret = handler->output(out, outlen, in, inlen);
2035 if (ret > 0)
2036 ret = 0;
2037 }
2038#ifdef LIBXML_ICONV_ENABLED
2039 else if (handler->iconv_out != NULL) {
2040 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2041 }
2042#endif /* LIBXML_ICONV_ENABLED */
2043#ifdef LIBXML_ICU_ENABLED
2044 else if (handler->uconv_out != NULL) {
2045 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2046 1);
2047 }
2048#endif /* LIBXML_ICU_ENABLED */
2049 else {
2050 *outlen = 0;
2051 *inlen = 0;
2052 ret = -4;
2053 }
2054
2055 return(ret);
2056}

Referenced by xmlByteConsumed(), and xmlCharEncOutFunc().

◆ xmlFindCharEncodingHandler()

xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler ( const char name)

Definition at line 1678 of file encoding.c.

1678 {
1679 const char *nalias;
1680 const char *norig;
1682#ifdef LIBXML_ICONV_ENABLED
1684 iconv_t icv_in, icv_out;
1685#endif /* LIBXML_ICONV_ENABLED */
1686#ifdef LIBXML_ICU_ENABLED
1688 uconv_t *ucv_in, *ucv_out;
1689#endif /* LIBXML_ICU_ENABLED */
1690 char upper[100];
1691 int i;
1692
1694 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696
1697 /*
1698 * Do the alias resolution
1699 */
1700 norig = name;
1701 nalias = xmlGetEncodingAlias(name);
1702 if (nalias != NULL)
1703 name = nalias;
1704
1705 /*
1706 * Check first for directly registered encoding names
1707 */
1708 for (i = 0;i < 99;i++) {
1709 upper[i] = toupper(name[i]);
1710 if (upper[i] == 0) break;
1711 }
1712 upper[i] = 0;
1713
1714 if (handlers != NULL) {
1715 for (i = 0;i < nbCharEncodingHandler; i++) {
1716 if (!strcmp(upper, handlers[i]->name)) {
1717#ifdef DEBUG_ENCODING
1719 "Found registered handler for encoding %s\n", name);
1720#endif
1721 return(handlers[i]);
1722 }
1723 }
1724 }
1725
1726#ifdef LIBXML_ICONV_ENABLED
1727 /* check whether iconv can handle this */
1728 icv_in = iconv_open("UTF-8", name);
1729 icv_out = iconv_open(name, "UTF-8");
1730 if (icv_in == (iconv_t) -1) {
1731 icv_in = iconv_open("UTF-8", upper);
1732 }
1733 if (icv_out == (iconv_t) -1) {
1734 icv_out = iconv_open(upper, "UTF-8");
1735 }
1736 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1739 if (enc == NULL) {
1740 iconv_close(icv_in);
1741 iconv_close(icv_out);
1742 return(NULL);
1743 }
1744 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745 enc->name = xmlMemStrdup(name);
1746 enc->input = NULL;
1747 enc->output = NULL;
1748 enc->iconv_in = icv_in;
1749 enc->iconv_out = icv_out;
1750#ifdef DEBUG_ENCODING
1752 "Found iconv handler for encoding %s\n", name);
1753#endif
1754 return enc;
1755 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757 "iconv : problems with filters for '%s'\n", name);
1758 if (icv_in != (iconv_t) -1)
1759 iconv_close(icv_in);
1760 else
1761 iconv_close(icv_out);
1762 }
1763#endif /* LIBXML_ICONV_ENABLED */
1764#ifdef LIBXML_ICU_ENABLED
1765 /* check whether icu can handle this */
1766 ucv_in = openIcuConverter(name, 1);
1767 ucv_out = openIcuConverter(name, 0);
1768 if (ucv_in != NULL && ucv_out != NULL) {
1771 if (encu == NULL) {
1772 closeIcuConverter(ucv_in);
1773 closeIcuConverter(ucv_out);
1774 return(NULL);
1775 }
1776 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777 encu->name = xmlMemStrdup(name);
1778 encu->input = NULL;
1779 encu->output = NULL;
1780 encu->uconv_in = ucv_in;
1781 encu->uconv_out = ucv_out;
1782#ifdef DEBUG_ENCODING
1784 "Found ICU converter handler for encoding %s\n", name);
1785#endif
1786 return encu;
1787 } else if (ucv_in != NULL || ucv_out != NULL) {
1788 closeIcuConverter(ucv_in);
1789 closeIcuConverter(ucv_out);
1790 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791 "ICU converter : problems with filters for '%s'\n", name);
1792 }
1793#endif /* LIBXML_ICU_ENABLED */
1794
1795#ifdef DEBUG_ENCODING
1797 "No handler found for encoding %s\n", name);
1798#endif
1799
1800 /*
1801 * Fallback using the canonical names
1802 */
1803 alias = xmlParseCharEncoding(norig);
1805 const char* canon;
1807 if ((canon != NULL) && (strcmp(name, canon))) {
1808 return(xmlFindCharEncodingHandler(canon));
1809 }
1810 }
1811
1812 /* If "none of the above", give up */
1813 return(NULL);
1814}
const char * xmlGetCharEncodingName(xmlCharEncoding enc)
Definition: encoding.c:1254
void xmlInitCharEncodingHandlers(void)
Definition: encoding.c:1420
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name)
Definition: encoding.c:1678
xmlCharEncoding xmlParseCharEncoding(const char *name)
Definition: encoding.c:1170
const char * xmlGetEncodingAlias(const char *alias)
Definition: encoding.c:1039
xmlCharEncoding
Definition: encoding.h:56
@ XML_CHAR_ENCODING_ERROR
Definition: encoding.h:57
xmlCharEncodingHandler * xmlCharEncodingHandlerPtr
Definition: encoding.h:130
iconv_t iconv_open(const char *tocode, const char *fromcode)
Definition: win_iconv.c:730
#define memset(x, y, z)
Definition: compat.h:39
xmlCharEncodingOutputFunc output
Definition: encoding.h:134
xmlCharEncodingInputFunc input
Definition: encoding.h:133
@ XML_ERR_INTERNAL_ERROR
Definition: xmlerror.h:101

Referenced by xmlCtxtResetPush(), xmlDoRead(), xmlFindCharEncodingHandler(), xmlGetCharEncodingHandler(), xmlParseEncodingDecl(), xmlParseInNodeContext(), xsltSaveResultToFd(), xsltSaveResultToFile(), xsltSaveResultToFilename(), and xsltSaveResultToString().

◆ xmlGetCharEncodingHandler()

xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler ( xmlCharEncoding  enc)

xmlGetCharEncodingHandler: @enc: an xmlCharEncoding value.

Search in the registered set the handler able to read/write that encoding.

Returns the handler or NULL if not found

Definition at line 1544 of file encoding.c.

1544 {
1546
1548 switch (enc) {
1550 return(NULL);
1552 return(NULL);
1554 return(NULL);
1556 return(xmlUTF16LEHandler);
1558 return(xmlUTF16BEHandler);
1561 if (handler != NULL) return(handler);
1563 if (handler != NULL) return(handler);
1564 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565 if (handler != NULL) return(handler);
1567 if (handler != NULL) return(handler);
1568 break;
1570 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571 if (handler != NULL) return(handler);
1573 if (handler != NULL) return(handler);
1575 if (handler != NULL) return(handler);
1576 break;
1578 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579 if (handler != NULL) return(handler);
1581 if (handler != NULL) return(handler);
1583 if (handler != NULL) return(handler);
1584 break;
1586 break;
1588 break;
1590 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591 if (handler != NULL) return(handler);
1593 if (handler != NULL) return(handler);
1595 if (handler != NULL) return(handler);
1596 break;
1597
1598 /*
1599 * We used to keep ISO Latin encodings native in the
1600 * generated data. This led to so many problems that
1601 * this has been removed. One can still change this
1602 * back by registering no-ops encoders for those
1603 */
1605 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606 if (handler != NULL) return(handler);
1607 break;
1609 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610 if (handler != NULL) return(handler);
1611 break;
1613 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614 if (handler != NULL) return(handler);
1615 break;
1617 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618 if (handler != NULL) return(handler);
1619 break;
1621 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622 if (handler != NULL) return(handler);
1623 break;
1625 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626 if (handler != NULL) return(handler);
1627 break;
1629 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630 if (handler != NULL) return(handler);
1631 break;
1633 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634 if (handler != NULL) return(handler);
1635 break;
1637 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638 if (handler != NULL) return(handler);
1639 break;
1640
1641
1643 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644 if (handler != NULL) return(handler);
1645 break;
1647 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648 if (handler != NULL) return(handler);
1649 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650 if (handler != NULL) return(handler);
1651 handler = xmlFindCharEncodingHandler("Shift_JIS");
1652 if (handler != NULL) return(handler);
1653 break;
1656 if (handler != NULL) return(handler);
1657 break;
1658 default:
1659 break;
1660 }
1661
1662#ifdef DEBUG_ENCODING
1664 "No handler found for encoding %d\n", enc);
1665#endif
1666 return(NULL);
1667}
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler
Definition: encoding.c:60
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler
Definition: encoding.c:61
@ XML_CHAR_ENCODING_8859_6
Definition: encoding.h:73
@ XML_CHAR_ENCODING_8859_7
Definition: encoding.h:74
@ XML_CHAR_ENCODING_8859_2
Definition: encoding.h:69
@ XML_CHAR_ENCODING_8859_4
Definition: encoding.h:71
@ XML_CHAR_ENCODING_8859_8
Definition: encoding.h:75
@ XML_CHAR_ENCODING_UCS2
Definition: encoding.h:67
@ XML_CHAR_ENCODING_2022_JP
Definition: encoding.h:77
@ XML_CHAR_ENCODING_8859_3
Definition: encoding.h:70
@ XML_CHAR_ENCODING_8859_1
Definition: encoding.h:68
@ XML_CHAR_ENCODING_8859_9
Definition: encoding.h:76
@ XML_CHAR_ENCODING_8859_5
Definition: encoding.h:72
@ XML_CHAR_ENCODING_SHIFT_JIS
Definition: encoding.h:78
@ XML_CHAR_ENCODING_EUC_JP
Definition: encoding.h:79

Referenced by xmlSwitchEncoding().

◆ xmlGetCharEncodingName()

const char * xmlGetCharEncodingName ( xmlCharEncoding  enc)

xmlGetCharEncodingName: @enc: the encoding

The "canonical" name for XML encoding. C.f. http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities

Returns the canonical name for the given encoding

Definition at line 1254 of file encoding.c.

1254 {
1255 switch (enc) {
1257 return(NULL);
1259 return(NULL);
1261 return("UTF-8");
1263 return("UTF-16");
1265 return("UTF-16");
1267 return("EBCDIC");
1269 return("ISO-10646-UCS-4");
1271 return("ISO-10646-UCS-4");
1273 return("ISO-10646-UCS-4");
1275 return("ISO-10646-UCS-4");
1277 return("ISO-10646-UCS-2");
1279 return("ISO-8859-1");
1281 return("ISO-8859-2");
1283 return("ISO-8859-3");
1285 return("ISO-8859-4");
1287 return("ISO-8859-5");
1289 return("ISO-8859-6");
1291 return("ISO-8859-7");
1293 return("ISO-8859-8");
1295 return("ISO-8859-9");
1297 return("ISO-2022-JP");
1299 return("Shift-JIS");
1301 return("EUC-JP");
1303 return(NULL);
1304 }
1305 return(NULL);
1306}
@ XML_CHAR_ENCODING_ASCII
Definition: encoding.h:80

Referenced by xmlFindCharEncodingHandler(), xmlSwitchEncoding(), and xsltSaveResultTo().

◆ xmlGetEncodingAlias()

const char * xmlGetEncodingAlias ( const char alias)

xmlGetEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Lookup an encoding name for the given alias.

Returns NULL if not found, otherwise the original name

Definition at line 1039 of file encoding.c.

1039 {
1040 int i;
1041 char upper[100];
1042
1043 if (alias == NULL)
1044 return(NULL);
1045
1047 return(NULL);
1048
1049 for (i = 0;i < 99;i++) {
1050 upper[i] = toupper(alias[i]);
1051 if (upper[i] == 0) break;
1052 }
1053 upper[i] = 0;
1054
1055 /*
1056 * Walk down the list looking for a definition of the alias
1057 */
1058 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060 return(xmlCharEncodingAliases[i].name);
1061 }
1062 }
1063 return(NULL);
1064}

Referenced by xmlFindCharEncodingHandler(), xmlNewCharEncodingHandler(), and xmlParseCharEncoding().

◆ xmlInitCharEncodingHandlers()

void xmlInitCharEncodingHandlers ( void  )

xmlInitCharEncodingHandlers:

DEPRECATED: This function will be made private. Call xmlInitParser to initialize the library.

Initialize the char encoding support, it registers the default encoding supported. NOTE: while public, this function usually doesn't need to be called in normal processing.

Definition at line 1420 of file encoding.c.

1420 {
1421 unsigned short int tst = 0x1234;
1422 unsigned char *ptr = (unsigned char *) &tst;
1423
1424 if (handlers != NULL) return;
1425
1428
1429 if (*ptr == 0x12) xmlLittleEndian = 0;
1430 else if (*ptr == 0x34) xmlLittleEndian = 1;
1431 else {
1432 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433 "Odd problem at endianness detection\n", NULL);
1434 }
1435
1436 if (handlers == NULL) {
1437 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438 return;
1439 }
1441#ifdef LIBXML_OUTPUT_ENABLED
1443 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1445 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450#ifdef LIBXML_HTML_ENABLED
1451 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452#endif
1453#else
1462#endif /* LIBXML_OUTPUT_ENABLED */
1463#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464#ifdef LIBXML_ISO8859X_ENABLED
1465 xmlRegisterCharEncodingHandlersISO8859x ();
1466#endif
1467#endif
1468
1469}
int isolat1ToUTF8(unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:316
xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
Definition: encoding.c:1338
static int UTF16LEToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:508
static void xmlEncodingErrMemory(const char *extra)
Definition: encoding.c:93
#define MAX_ENCODING_HANDLERS
Definition: encoding.c:1316
static int UTF8ToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:364
static int UTF16BEToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:751
static int asciiToUTF8(unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:185
static PVOID ptr
Definition: dispmode.c:27

Referenced by xmlFindCharEncodingHandler(), xmlGetCharEncodingHandler(), xmlInitParser(), and xmlRegisterCharEncodingHandler().

◆ xmlNewCharEncodingHandler()

xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler ( const char name,
xmlCharEncodingInputFunc  input,
xmlCharEncodingOutputFunc  output 
)

Definition at line 1338 of file encoding.c.

1340 {
1342 const char *alias;
1343 char upper[500];
1344 int i;
1345 char *up = NULL;
1346
1347 /*
1348 * Do the alias resolution
1349 */
1351 if (alias != NULL)
1352 name = alias;
1353
1354 /*
1355 * Keep only the uppercase version of the encoding.
1356 */
1357 if (name == NULL) {
1358 xmlEncodingErr(XML_I18N_NO_NAME,
1359 "xmlNewCharEncodingHandler : no name !\n", NULL);
1360 return(NULL);
1361 }
1362 for (i = 0;i < 499;i++) {
1363 upper[i] = toupper(name[i]);
1364 if (upper[i] == 0) break;
1365 }
1366 upper[i] = 0;
1367 up = xmlMemStrdup(upper);
1368 if (up == NULL) {
1369 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370 return(NULL);
1371 }
1372
1373 /*
1374 * allocate and fill-up an handler block.
1375 */
1378 if (handler == NULL) {
1379 xmlFree(up);
1380 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381 return(NULL);
1382 }
1384 handler->input = input;
1385 handler->output = output;
1386 handler->name = up;
1387
1388#ifdef LIBXML_ICONV_ENABLED
1389 handler->iconv_in = NULL;
1390 handler->iconv_out = NULL;
1391#endif
1392#ifdef LIBXML_ICU_ENABLED
1393 handler->uconv_in = NULL;
1394 handler->uconv_out = NULL;
1395#endif
1396
1397 /*
1398 * registers and returns the handler.
1399 */
1401#ifdef DEBUG_ENCODING
1403 "Registered encoding handler for %s\n", name);
1404#endif
1405 return(handler);
1406}
void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)
Definition: encoding.c:1509
#define up(mutex)
Definition: glue.h:30
@ XML_I18N_NO_NAME
Definition: xmlerror.h:830

Referenced by xmlInitCharEncodingHandlers().

◆ xmlParseCharEncoding()

xmlCharEncoding xmlParseCharEncoding ( const char name)

Definition at line 1170 of file encoding.c.

1171{
1172 const char *alias;
1173 char upper[500];
1174 int i;
1175
1176 if (name == NULL)
1177 return(XML_CHAR_ENCODING_NONE);
1178
1179 /*
1180 * Do the alias resolution
1181 */
1183 if (alias != NULL)
1184 name = alias;
1185
1186 for (i = 0;i < 499;i++) {
1187 upper[i] = toupper(name[i]);
1188 if (upper[i] == 0) break;
1189 }
1190 upper[i] = 0;
1191
1192 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195
1196 /*
1197 * NOTE: if we were able to parse this, the endianness of UTF16 is
1198 * already found and in use
1199 */
1200 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
1203 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206
1207 /*
1208 * NOTE: if we were able to parse this, the endianness of UCS4 is
1209 * already found and in use
1210 */
1211 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
1215
1216 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219
1220 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223
1224 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231
1232 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235
1236#ifdef DEBUG_ENCODING
1237 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238#endif
1240}

Referenced by xmlFindCharEncodingHandler().

◆ xmlRegisterCharEncodingHandler()

void xmlRegisterCharEncodingHandler ( xmlCharEncodingHandlerPtr  handler)

xmlRegisterCharEncodingHandler: @handler: the xmlCharEncodingHandlerPtr handler block

Register the char encoding handler, surprising, isn't it ?

Definition at line 1509 of file encoding.c.

1509 {
1511 if ((handler == NULL) || (handlers == NULL)) {
1512 xmlEncodingErr(XML_I18N_NO_HANDLER,
1513 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514 goto free_handler;
1515 }
1516
1518 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520 "MAX_ENCODING_HANDLERS");
1521 goto free_handler;
1522 }
1524 return;
1525
1526free_handler:
1527 if (handler != NULL) {
1528 if (handler->name != NULL) {
1529 xmlFree(handler->name);
1530 }
1532 }
1533}
@ XML_I18N_NO_HANDLER
Definition: xmlerror.h:831
@ XML_I18N_EXCESS_HANDLER
Definition: xmlerror.h:832

Referenced by xmlNewCharEncodingHandler().

Variable Documentation

◆ handlers

◆ nbCharEncodingHandler

int nbCharEncodingHandler = 0
static

◆ xmlCharEncodingAliases

xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL
static

◆ xmlCharEncodingAliasesMax

int xmlCharEncodingAliasesMax = 0
static

Definition at line 72 of file encoding.c.

Referenced by xmlAddEncodingAlias(), and xmlCleanupEncodingAliases().

◆ xmlCharEncodingAliasesNb

int xmlCharEncodingAliasesNb = 0
static

◆ xmlDefaultCharEncodingHandler

xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL
static

Definition at line 1325 of file encoding.c.

Referenced by xmlCleanupCharEncodingHandlers(), and xmlFindCharEncodingHandler().

◆ xmlLittleEndian

int xmlLittleEndian = 1
static

Definition at line 84 of file encoding.c.

Referenced by UTF16BEToUTF8(), UTF16LEToUTF8(), and xmlInitCharEncodingHandlers().

◆ xmlUTF16BEHandler

xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL
static

Definition at line 61 of file encoding.c.

Referenced by xmlGetCharEncodingHandler(), and xmlInitCharEncodingHandlers().

◆ xmlUTF16LEHandler

xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL
static

Definition at line 60 of file encoding.c.

Referenced by xmlGetCharEncodingHandler(), and xmlInitCharEncodingHandlers().