ReactOS 0.4.16-dev-340-g0540c21
encoding.h File Reference
#include <libxml/xmlversion.h>
#include <libxml/tree.h>
Include dependency graph for encoding.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  _xmlCharEncodingHandler
 

Typedefs

typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler
 
typedef xmlCharEncodingHandlerxmlCharEncodingHandlerPtr
 

Enumerations

enum  xmlCharEncoding {
  XML_CHAR_ENCODING_ERROR = -1 , XML_CHAR_ENCODING_NONE = 0 , XML_CHAR_ENCODING_UTF8 = 1 , XML_CHAR_ENCODING_UTF16LE = 2 ,
  XML_CHAR_ENCODING_UTF16BE = 3 , XML_CHAR_ENCODING_UCS4LE = 4 , XML_CHAR_ENCODING_UCS4BE = 5 , XML_CHAR_ENCODING_EBCDIC = 6 ,
  XML_CHAR_ENCODING_UCS4_2143 =7 , XML_CHAR_ENCODING_UCS4_3412 =8 , XML_CHAR_ENCODING_UCS2 = 9 , XML_CHAR_ENCODING_8859_1 = 10 ,
  XML_CHAR_ENCODING_8859_2 = 11 , XML_CHAR_ENCODING_8859_3 = 12 , XML_CHAR_ENCODING_8859_4 = 13 , XML_CHAR_ENCODING_8859_5 = 14 ,
  XML_CHAR_ENCODING_8859_6 = 15 , XML_CHAR_ENCODING_8859_7 = 16 , XML_CHAR_ENCODING_8859_8 = 17 , XML_CHAR_ENCODING_8859_9 = 18 ,
  XML_CHAR_ENCODING_2022_JP = 19 , XML_CHAR_ENCODING_SHIFT_JIS =20 , XML_CHAR_ENCODING_EUC_JP = 21 , XML_CHAR_ENCODING_ASCII = 22
}
 

Functions

XML_DEPRECATED XMLPUBFUN void XMLCALL xmlInitCharEncodingHandlers (void)
 
XML_DEPRECATED XMLPUBFUN void XMLCALL xmlCleanupCharEncodingHandlers (void)
 
XMLPUBFUN void XMLCALL xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler)
 
XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlGetCharEncodingHandler (xmlCharEncoding enc)
 
XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlFindCharEncodingHandler (const char *name)
 
XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
 
XMLPUBFUN int XMLCALL xmlAddEncodingAlias (const char *name, const char *alias)
 
XMLPUBFUN int XMLCALL xmlDelEncodingAlias (const char *alias)
 
XMLPUBFUN const char *XMLCALL xmlGetEncodingAlias (const char *alias)
 
XMLPUBFUN void XMLCALL xmlCleanupEncodingAliases (void)
 
XMLPUBFUN xmlCharEncoding XMLCALL xmlParseCharEncoding (const char *name)
 
XMLPUBFUN const char *XMLCALL xmlGetCharEncodingName (xmlCharEncoding enc)
 
XMLPUBFUN xmlCharEncoding XMLCALL xmlDetectCharEncoding (const unsigned char *in, int len)
 
XMLPUBFUN int XMLCALL xmlCharEncOutFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
XMLPUBFUN int XMLCALL xmlCharEncInFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
XMLPUBFUN int XMLCALL xmlCharEncFirstLine (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in)
 
XMLPUBFUN int XMLCALL xmlCharEncCloseFunc (xmlCharEncodingHandler *handler)
 
XMLPUBFUN int XMLCALL isolat1ToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 

Typedef Documentation

◆ xmlCharEncodingHandler

◆ xmlCharEncodingHandlerPtr

Definition at line 130 of file encoding.h.

◆ xmlCharEncodingInputFunc

typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)

xmlCharEncodingInputFunc: @out: a pointer to an array of bytes to store the UTF-8 result @outlen: the length of @out @in: a pointer to an array of chars in the original encoding @inlen: the length of @in

Take a block of chars in the original encoding and try to convert it to an UTF-8 block of chars out.

Returns the number of bytes written, -1 if lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictiable. The value of @outlen after return is the number of octets consumed.

Definition at line 99 of file encoding.h.

◆ xmlCharEncodingOutputFunc

typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)

xmlCharEncodingOutputFunc: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of UTF-8 chars @inlen: the length of @in

Take a block of UTF-8 chars in and try to convert it to another encoding. Note: a first call designed to produce heading info is called with in = NULL. If stateful this should also initialize the encoder state.

Returns the number of bytes written, -1 if lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictiable. The value of @outlen after return is the number of octets produced.

Definition at line 121 of file encoding.h.

Enumeration Type Documentation

◆ xmlCharEncoding

Enumerator
XML_CHAR_ENCODING_ERROR 
XML_CHAR_ENCODING_NONE 
XML_CHAR_ENCODING_UTF8 
XML_CHAR_ENCODING_UTF16LE 
XML_CHAR_ENCODING_UTF16BE 
XML_CHAR_ENCODING_UCS4LE 
XML_CHAR_ENCODING_UCS4BE 
XML_CHAR_ENCODING_EBCDIC 
XML_CHAR_ENCODING_UCS4_2143 
XML_CHAR_ENCODING_UCS4_3412 
XML_CHAR_ENCODING_UCS2 
XML_CHAR_ENCODING_8859_1 
XML_CHAR_ENCODING_8859_2 
XML_CHAR_ENCODING_8859_3 
XML_CHAR_ENCODING_8859_4 
XML_CHAR_ENCODING_8859_5 
XML_CHAR_ENCODING_8859_6 
XML_CHAR_ENCODING_8859_7 
XML_CHAR_ENCODING_8859_8 
XML_CHAR_ENCODING_8859_9 
XML_CHAR_ENCODING_2022_JP 
XML_CHAR_ENCODING_SHIFT_JIS 
XML_CHAR_ENCODING_EUC_JP 
XML_CHAR_ENCODING_ASCII 

Definition at line 56 of file encoding.h.

56 {
57 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
58 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */
59 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */
60 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */
61 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */
62 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */
63 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */
64 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */
65 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
66 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
67 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */
68 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */
69 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */
70 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */
71 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */
72 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */
73 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */
74 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */
75 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */
76 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */
77 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */
78 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
79 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */
80 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */
xmlCharEncoding
Definition: encoding.h:56
@ XML_CHAR_ENCODING_8859_6
Definition: encoding.h:73
@ XML_CHAR_ENCODING_UTF8
Definition: encoding.h:59
@ XML_CHAR_ENCODING_8859_7
Definition: encoding.h:74
@ XML_CHAR_ENCODING_8859_2
Definition: encoding.h:69
@ XML_CHAR_ENCODING_8859_4
Definition: encoding.h:71
@ XML_CHAR_ENCODING_8859_8
Definition: encoding.h:75
@ XML_CHAR_ENCODING_UTF16BE
Definition: encoding.h:61
@ XML_CHAR_ENCODING_UCS2
Definition: encoding.h:67
@ XML_CHAR_ENCODING_2022_JP
Definition: encoding.h:77
@ XML_CHAR_ENCODING_8859_3
Definition: encoding.h:70
@ XML_CHAR_ENCODING_EBCDIC
Definition: encoding.h:64
@ XML_CHAR_ENCODING_UCS4LE
Definition: encoding.h:62
@ XML_CHAR_ENCODING_ERROR
Definition: encoding.h:57
@ XML_CHAR_ENCODING_UCS4_3412
Definition: encoding.h:66
@ XML_CHAR_ENCODING_UCS4BE
Definition: encoding.h:63
@ XML_CHAR_ENCODING_8859_1
Definition: encoding.h:68
@ XML_CHAR_ENCODING_8859_9
Definition: encoding.h:76
@ XML_CHAR_ENCODING_UTF16LE
Definition: encoding.h:60
@ XML_CHAR_ENCODING_NONE
Definition: encoding.h:58
@ XML_CHAR_ENCODING_8859_5
Definition: encoding.h:72
@ XML_CHAR_ENCODING_ASCII
Definition: encoding.h:80
@ XML_CHAR_ENCODING_SHIFT_JIS
Definition: encoding.h:78
@ XML_CHAR_ENCODING_UCS4_2143
Definition: encoding.h:65
@ XML_CHAR_ENCODING_EUC_JP
Definition: encoding.h:79

Function Documentation

◆ isolat1ToUTF8()

XMLPUBFUN int XMLCALL isolat1ToUTF8 ( unsigned char out,
int outlen,
const unsigned char in,
int inlen 
)

isolat1ToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of ISO Latin 1 chars @inlen: the length of @in

Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out. Returns the number of bytes written if success, or -1 otherwise The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 316 of file encoding.c.

317 {
318 unsigned char* outstart = out;
319 const unsigned char* base = in;
320 unsigned char* outend;
321 const unsigned char* inend;
322 const unsigned char* instop;
323
324 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
325 return(-1);
326
327 outend = out + *outlen;
328 inend = in + (*inlen);
329 instop = inend;
330
331 while ((in < inend) && (out < outend - 1)) {
332 if (*in >= 0x80) {
333 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
334 *out++ = ((*in) & 0x3F) | 0x80;
335 ++in;
336 }
337 if ((instop - in) > (outend - out)) instop = in + (outend - out);
338 while ((in < instop) && (*in < 0x80)) {
339 *out++ = *in++;
340 }
341 }
342 if ((in < inend) && (out < outend) && (*in < 0x80)) {
343 *out++ = *in++;
344 }
345 *outlen = out - outstart;
346 *inlen = in - base;
347 return(*outlen);
348}
#define NULL
Definition: types.h:112
GLuint in
Definition: glext.h:9616
static FILE * out
Definition: regtests2xml.c:44

Referenced by xmlInitCharEncodingHandlers().

◆ xmlAddEncodingAlias()

XMLPUBFUN int XMLCALL xmlAddEncodingAlias ( const char name,
const char alias 
)

Definition at line 1077 of file encoding.c.

1077 {
1078 int i;
1079 char upper[100];
1080
1081 if ((name == NULL) || (alias == NULL))
1082 return(-1);
1083
1084 for (i = 0;i < 99;i++) {
1085 upper[i] = toupper(alias[i]);
1086 if (upper[i] == 0) break;
1087 }
1088 upper[i] = 0;
1089
1096 return(-1);
1102 }
1103 /*
1104 * Walk down the list looking for a definition of the alias
1105 */
1106 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1107 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1108 /*
1109 * Replace the definition.
1110 */
1113 return(0);
1114 }
1115 }
1116 /*
1117 * Add the definition
1118 */
1122 return(0);
1123}
int strcmp(const char *String1, const char *String2)
Definition: utclib.c:469
int toupper(int c)
Definition: utclib.c:881
const WCHAR * alias
Definition: main.c:67
xmlCharEncodingAlias * xmlCharEncodingAliasPtr
Definition: encoding.c:64
static xmlCharEncodingAliasPtr xmlCharEncodingAliases
Definition: encoding.c:70
static int xmlCharEncodingAliasesMax
Definition: encoding.c:72
static int xmlCharEncodingAliasesNb
Definition: encoding.c:71
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
XMLPUBVAR xmlStrdupFunc xmlMemStrdup
Definition: globals.h:252
XMLPUBVAR xmlMallocFunc xmlMalloc
Definition: globals.h:248
XMLPUBVAR xmlFreeFunc xmlFree
Definition: globals.h:251
XMLPUBVAR xmlReallocFunc xmlRealloc
Definition: globals.h:250
const char * name
Definition: encoding.c:66
const char * alias
Definition: encoding.c:67
Definition: name.c:39

Referenced by main().

◆ xmlCharEncCloseFunc()

XMLPUBFUN int XMLCALL xmlCharEncCloseFunc ( xmlCharEncodingHandler handler)

xmlCharEncCloseFunc: @handler: char encoding transformation data structure

Generic front-end for encoding handler close function

Returns 0 if success, or -1 in case of error

Definition at line 2796 of file encoding.c.

2796 {
2797 int ret = 0;
2798 int tofree = 0;
2799 int i, handler_in_list = 0;
2800
2801 /* Avoid unused variable warning if features are disabled. */
2802 (void) handler_in_list;
2803
2804 if (handler == NULL) return(-1);
2805 if (handler->name == NULL) return(-1);
2806 if (handlers != NULL) {
2807 for (i = 0;i < nbCharEncodingHandler; i++) {
2808 if (handler == handlers[i]) {
2809 handler_in_list = 1;
2810 break;
2811 }
2812 }
2813 }
2814#ifdef LIBXML_ICONV_ENABLED
2815 /*
2816 * Iconv handlers can be used only once, free the whole block.
2817 * and the associated icon resources.
2818 */
2819 if ((handler_in_list == 0) &&
2820 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2821 tofree = 1;
2822 if (handler->iconv_out != NULL) {
2823 if (iconv_close(handler->iconv_out))
2824 ret = -1;
2825 handler->iconv_out = NULL;
2826 }
2827 if (handler->iconv_in != NULL) {
2828 if (iconv_close(handler->iconv_in))
2829 ret = -1;
2830 handler->iconv_in = NULL;
2831 }
2832 }
2833#endif /* LIBXML_ICONV_ENABLED */
2834#ifdef LIBXML_ICU_ENABLED
2835 if ((handler_in_list == 0) &&
2836 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2837 tofree = 1;
2838 if (handler->uconv_out != NULL) {
2839 closeIcuConverter(handler->uconv_out);
2840 handler->uconv_out = NULL;
2841 }
2842 if (handler->uconv_in != NULL) {
2843 closeIcuConverter(handler->uconv_in);
2844 handler->uconv_in = NULL;
2845 }
2846 }
2847#endif
2848 if (tofree) {
2849 /* free up only dynamic handlers iconv/uconv */
2850 if (handler->name != NULL)
2851 xmlFree(handler->name);
2852 handler->name = NULL;
2854 }
2855#ifdef DEBUG_ENCODING
2856 if (ret)
2858 "failed to close the encoding handler\n");
2859 else
2861 "closed the encoding handler\n");
2862#endif
2863
2864 return(ret);
2865}
UINT(* handler)(MSIPACKAGE *)
Definition: action.c:7512
static xmlCharEncodingHandlerPtr * handlers
Definition: encoding.c:1317
static int nbCharEncodingHandler
Definition: encoding.c:1318
int iconv_close(iconv_t cd)
Definition: win_iconv.c:756
XMLPUBVAR void * xmlGenericErrorContext
Definition: globals.h:353
XMLPUBVAR xmlGenericErrorFunc xmlGenericError
Definition: globals.h:337
int ret

Referenced by xmlSwitchInputEncodingInt().

◆ xmlCharEncFirstLine()

XMLPUBFUN int XMLCALL xmlCharEncFirstLine ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncFirstLine: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2160 of file encoding.c.

2161 {
2162 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2163}
int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in, int len)
Definition: encoding.c:2074

◆ xmlCharEncInFunc()

XMLPUBFUN int XMLCALL xmlCharEncInFunc ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncInFunc: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Generic front-end for the encoding handler input function

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2383 of file encoding.c.

2385{
2386 int ret;
2387 int written;
2388 int toconv;
2389
2390 if (handler == NULL)
2391 return (-1);
2392 if (out == NULL)
2393 return (-1);
2394 if (in == NULL)
2395 return (-1);
2396
2397 toconv = in->use;
2398 if (toconv == 0)
2399 return (0);
2400 written = out->size - out->use -1; /* count '\0' */
2401 if (toconv * 2 >= written) {
2402 xmlBufferGrow(out, out->size + toconv * 2);
2403 written = out->size - out->use - 1;
2404 }
2405 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2406 in->content, &toconv, 1);
2407 xmlBufferShrink(in, toconv);
2408 out->use += written;
2409 out->content[out->use] = 0;
2410 if (ret == -1)
2411 ret = -3;
2412
2413 switch (ret) {
2414 case 0:
2415#ifdef DEBUG_ENCODING
2417 "converted %d bytes to %d bytes of input\n",
2418 toconv, written);
2419#endif
2420 break;
2421 case -1:
2422#ifdef DEBUG_ENCODING
2424 "converted %d bytes to %d bytes of input, %d left\n",
2425 toconv, written, in->use);
2426#endif
2427 break;
2428 case -3:
2429#ifdef DEBUG_ENCODING
2431 "converted %d bytes to %d bytes of input, %d left\n",
2432 toconv, written, in->use);
2433#endif
2434 break;
2435 case -2: {
2436 char buf[50];
2437
2438 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2439 in->content[0], in->content[1],
2440 in->content[2], in->content[3]);
2441 buf[49] = 0;
2442 xmlEncodingErr(XML_I18N_CONV_FAILED,
2443 "input conversion failed due to input error, bytes %s\n",
2444 buf);
2445 }
2446 }
2447 /*
2448 * Ignore when input buffer is not on a boundary
2449 */
2450 if (ret == -3)
2451 ret = 0;
2452 return (written? written : ret);
2453}
static int xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush)
Definition: encoding.c:1979
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
XMLPUBFUN int XMLCALL xmlBufferShrink(xmlBufferPtr buf, unsigned int len)
XMLPUBFUN int XMLCALL xmlBufferGrow(xmlBufferPtr buf, unsigned int len)
#define snprintf
Definition: wintirpc.h:48
@ XML_I18N_CONV_FAILED
Definition: xmlerror.h:833

◆ xmlCharEncOutFunc()

XMLPUBFUN int XMLCALL xmlCharEncOutFunc ( xmlCharEncodingHandler handler,
xmlBufferPtr  out,
xmlBufferPtr  in 
)

xmlCharEncOutFunc: @handler: char encoding transformation data structure @out: an xmlBuffer for the output. @in: an xmlBuffer for the input

Generic front-end for the encoding handler output function a first call with @in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.

Returns the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or

Definition at line 2645 of file encoding.c.

2646 {
2647 int ret;
2648 int written;
2649 int writtentot = 0;
2650 int toconv;
2651
2652 if (handler == NULL) return(-1);
2653 if (out == NULL) return(-1);
2654
2655retry:
2656
2657 written = out->size - out->use;
2658
2659 if (written > 0)
2660 written--; /* Gennady: count '/0' */
2661
2662 /*
2663 * First specific handling of in = NULL, i.e. the initialization call
2664 */
2665 if (in == NULL) {
2666 toconv = 0;
2667 /* TODO: Check return value. */
2668 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2669 NULL, &toconv);
2670 out->use += written;
2671 out->content[out->use] = 0;
2672#ifdef DEBUG_ENCODING
2674 "initialized encoder\n");
2675#endif
2676 return(0);
2677 }
2678
2679 /*
2680 * Conversion itself.
2681 */
2682 toconv = in->use;
2683 if (toconv == 0)
2684 return(0);
2685 if (toconv * 4 >= written) {
2686 xmlBufferGrow(out, toconv * 4);
2687 written = out->size - out->use - 1;
2688 }
2689 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2690 in->content, &toconv);
2691 xmlBufferShrink(in, toconv);
2692 out->use += written;
2693 writtentot += written;
2694 out->content[out->use] = 0;
2695 if (ret == -1) {
2696 if (written > 0) {
2697 /* Can be a limitation of iconv or uconv */
2698 goto retry;
2699 }
2700 ret = -3;
2701 }
2702
2703 /*
2704 * Attempt to handle error cases
2705 */
2706 switch (ret) {
2707 case 0:
2708#ifdef DEBUG_ENCODING
2710 "converted %d bytes to %d bytes of output\n",
2711 toconv, written);
2712#endif
2713 break;
2714 case -1:
2715#ifdef DEBUG_ENCODING
2717 "output conversion failed by lack of space\n");
2718#endif
2719 break;
2720 case -3:
2721#ifdef DEBUG_ENCODING
2722 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2723 toconv, written, in->use);
2724#endif
2725 break;
2726 case -4:
2727 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2728 "xmlCharEncOutFunc: no output function !\n", NULL);
2729 ret = -1;
2730 break;
2731 case -2: {
2732 xmlChar charref[20];
2733 int len = in->use;
2734 const xmlChar *utf = (const xmlChar *) in->content;
2735 int cur, charrefLen;
2736
2737 cur = xmlGetUTF8Char(utf, &len);
2738 if (cur <= 0)
2739 break;
2740
2741#ifdef DEBUG_ENCODING
2743 "handling output conversion error\n");
2745 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2746 in->content[0], in->content[1],
2747 in->content[2], in->content[3]);
2748#endif
2749 /*
2750 * Removes the UTF8 sequence, and replace it by a charref
2751 * and continue the transcoding phase, hoping the error
2752 * did not mangle the encoder state.
2753 */
2754 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2755 "&#%d;", cur);
2757 xmlBufferGrow(out, charrefLen * 4);
2758 written = out->size - out->use - 1;
2759 toconv = charrefLen;
2760 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2761 charref, &toconv);
2762
2763 if ((ret < 0) || (toconv != charrefLen)) {
2764 char buf[50];
2765
2766 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2767 in->content[0], in->content[1],
2768 in->content[2], in->content[3]);
2769 buf[49] = 0;
2770 xmlEncodingErr(XML_I18N_CONV_FAILED,
2771 "output conversion failed due to conv error, bytes %s\n",
2772 buf);
2773 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2774 in->content[0] = ' ';
2775 break;
2776 }
2777
2778 out->use += written;
2779 writtentot += written;
2780 out->content[out->use] = 0;
2781 goto retry;
2782 }
2783 }
2784 return(writtentot ? writtentot : ret);
2785}
static int xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:2029
FxCollectionEntry * cur
GLenum GLsizei len
Definition: glext.h:6722
@ XML_BUFFER_ALLOC_IMMUTABLE
Definition: tree.h:77
@ XML_I18N_NO_OUTPUT
Definition: xmlerror.h:834
XMLPUBFUN int XMLCALL XMLPUBFUN int XMLCALL XMLPUBFUN int XMLCALL xmlGetUTF8Char(const unsigned char *utf, int *len)
Definition: xmlstring.c:708
unsigned char xmlChar
Definition: xmlstring.h:28

◆ xmlCleanupCharEncodingHandlers()

XML_DEPRECATED XMLPUBFUN void XMLCALL xmlCleanupCharEncodingHandlers ( void  )

xmlCleanupCharEncodingHandlers:

DEPRECATED: This function will be made private. Call xmlCleanupParser to free global state but see the warnings there. xmlCleanupParser should be only called once at program exit. In most cases, you don't have call cleanup functions at all.

Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.

Definition at line 1483 of file encoding.c.

1483 {
1485
1486 if (handlers == NULL) return;
1487
1488 for (;nbCharEncodingHandler > 0;) {
1494 }
1495 }
1497 handlers = NULL;
1500}
void xmlCleanupEncodingAliases(void)
Definition: encoding.c:1012
static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler
Definition: encoding.c:1325

Referenced by xmlCleanupParser().

◆ xmlCleanupEncodingAliases()

XMLPUBFUN void XMLCALL xmlCleanupEncodingAliases ( void  )

xmlCleanupEncodingAliases:

Unregisters all aliases

Definition at line 1012 of file encoding.c.

1012 {
1013 int i;
1014
1016 return;
1017
1018 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 }
1028}

Referenced by xmlCleanupCharEncodingHandlers().

◆ xmlDelEncodingAlias()

XMLPUBFUN int XMLCALL xmlDelEncodingAlias ( const char alias)

xmlDelEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Unregisters an encoding alias @alias

Returns 0 in case of success, -1 in case of error

Definition at line 1134 of file encoding.c.

1134 {
1135 int i;
1136
1137 if (alias == NULL)
1138 return(-1);
1139
1141 return(-1);
1142 /*
1143 * Walk down the list looking for a definition of the alias
1144 */
1145 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1152 return(0);
1153 }
1154 }
1155 return(-1);
1156}
#define memmove(s1, s2, n)
Definition: mkisofs.h:881

◆ xmlDetectCharEncoding()

XMLPUBFUN xmlCharEncoding XMLCALL xmlDetectCharEncoding ( const unsigned char in,
int  len 
)

xmlDetectCharEncoding: @in: a pointer to the first bytes of the XML entity, must be at least 2 bytes long (at least 4 if encoding is UTF4 variant). @len: pointer to the length of the buffer

Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation.

Returns one of the XML_CHAR_ENCODING_... values.

Definition at line 952 of file encoding.c.

953{
954 if (in == NULL)
956 if (len >= 4) {
957 if ((in[0] == 0x00) && (in[1] == 0x00) &&
958 (in[2] == 0x00) && (in[3] == 0x3C))
960 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961 (in[2] == 0x00) && (in[3] == 0x00))
963 if ((in[0] == 0x00) && (in[1] == 0x00) &&
964 (in[2] == 0x3C) && (in[3] == 0x00))
966 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
967 (in[2] == 0x00) && (in[3] == 0x00))
969 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
970 (in[2] == 0xA7) && (in[3] == 0x94))
972 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
973 (in[2] == 0x78) && (in[3] == 0x6D))
975 /*
976 * Although not part of the recommendation, we also
977 * attempt an "auto-recognition" of UTF-16LE and
978 * UTF-16BE encodings.
979 */
980 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
981 (in[2] == 0x3F) && (in[3] == 0x00))
983 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
984 (in[2] == 0x00) && (in[3] == 0x3F))
986 }
987 if (len >= 3) {
988 /*
989 * Errata on XML-1.0 June 20 2001
990 * We now allow an UTF8 encoded BOM
991 */
992 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
993 (in[2] == 0xBF))
995 }
996 /* For UTF-16 we can recognize by the BOM */
997 if (len >= 2) {
998 if ((in[0] == 0xFE) && (in[1] == 0xFF))
1000 if ((in[0] == 0xFF) && (in[1] == 0xFE))
1002 }
1003 return(XML_CHAR_ENCODING_NONE);
1004}

Referenced by xmlCtxtResetPush(), xmlParseDocument(), xmlParseExternalEntityPrivate(), xmlParseExternalSubset(), xmlParseExtParsedEnt(), xmlParsePEReference(), and xmlSAX2ExternalSubset().

◆ xmlFindCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlFindCharEncodingHandler ( const char name)

Definition at line 1678 of file encoding.c.

1678 {
1679 const char *nalias;
1680 const char *norig;
1682#ifdef LIBXML_ICONV_ENABLED
1684 iconv_t icv_in, icv_out;
1685#endif /* LIBXML_ICONV_ENABLED */
1686#ifdef LIBXML_ICU_ENABLED
1688 uconv_t *ucv_in, *ucv_out;
1689#endif /* LIBXML_ICU_ENABLED */
1690 char upper[100];
1691 int i;
1692
1694 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1695 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1696
1697 /*
1698 * Do the alias resolution
1699 */
1700 norig = name;
1701 nalias = xmlGetEncodingAlias(name);
1702 if (nalias != NULL)
1703 name = nalias;
1704
1705 /*
1706 * Check first for directly registered encoding names
1707 */
1708 for (i = 0;i < 99;i++) {
1709 upper[i] = toupper(name[i]);
1710 if (upper[i] == 0) break;
1711 }
1712 upper[i] = 0;
1713
1714 if (handlers != NULL) {
1715 for (i = 0;i < nbCharEncodingHandler; i++) {
1716 if (!strcmp(upper, handlers[i]->name)) {
1717#ifdef DEBUG_ENCODING
1719 "Found registered handler for encoding %s\n", name);
1720#endif
1721 return(handlers[i]);
1722 }
1723 }
1724 }
1725
1726#ifdef LIBXML_ICONV_ENABLED
1727 /* check whether iconv can handle this */
1728 icv_in = iconv_open("UTF-8", name);
1729 icv_out = iconv_open(name, "UTF-8");
1730 if (icv_in == (iconv_t) -1) {
1731 icv_in = iconv_open("UTF-8", upper);
1732 }
1733 if (icv_out == (iconv_t) -1) {
1734 icv_out = iconv_open(upper, "UTF-8");
1735 }
1736 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1739 if (enc == NULL) {
1740 iconv_close(icv_in);
1741 iconv_close(icv_out);
1742 return(NULL);
1743 }
1744 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1745 enc->name = xmlMemStrdup(name);
1746 enc->input = NULL;
1747 enc->output = NULL;
1748 enc->iconv_in = icv_in;
1749 enc->iconv_out = icv_out;
1750#ifdef DEBUG_ENCODING
1752 "Found iconv handler for encoding %s\n", name);
1753#endif
1754 return enc;
1755 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1756 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1757 "iconv : problems with filters for '%s'\n", name);
1758 if (icv_in != (iconv_t) -1)
1759 iconv_close(icv_in);
1760 else
1761 iconv_close(icv_out);
1762 }
1763#endif /* LIBXML_ICONV_ENABLED */
1764#ifdef LIBXML_ICU_ENABLED
1765 /* check whether icu can handle this */
1766 ucv_in = openIcuConverter(name, 1);
1767 ucv_out = openIcuConverter(name, 0);
1768 if (ucv_in != NULL && ucv_out != NULL) {
1771 if (encu == NULL) {
1772 closeIcuConverter(ucv_in);
1773 closeIcuConverter(ucv_out);
1774 return(NULL);
1775 }
1776 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1777 encu->name = xmlMemStrdup(name);
1778 encu->input = NULL;
1779 encu->output = NULL;
1780 encu->uconv_in = ucv_in;
1781 encu->uconv_out = ucv_out;
1782#ifdef DEBUG_ENCODING
1784 "Found ICU converter handler for encoding %s\n", name);
1785#endif
1786 return encu;
1787 } else if (ucv_in != NULL || ucv_out != NULL) {
1788 closeIcuConverter(ucv_in);
1789 closeIcuConverter(ucv_out);
1790 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1791 "ICU converter : problems with filters for '%s'\n", name);
1792 }
1793#endif /* LIBXML_ICU_ENABLED */
1794
1795#ifdef DEBUG_ENCODING
1797 "No handler found for encoding %s\n", name);
1798#endif
1799
1800 /*
1801 * Fallback using the canonical names
1802 */
1803 alias = xmlParseCharEncoding(norig);
1805 const char* canon;
1807 if ((canon != NULL) && (strcmp(name, canon))) {
1808 return(xmlFindCharEncodingHandler(canon));
1809 }
1810 }
1811
1812 /* If "none of the above", give up */
1813 return(NULL);
1814}
const char * xmlGetCharEncodingName(xmlCharEncoding enc)
Definition: encoding.c:1254
void xmlInitCharEncodingHandlers(void)
Definition: encoding.c:1420
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name)
Definition: encoding.c:1678
xmlCharEncoding xmlParseCharEncoding(const char *name)
Definition: encoding.c:1170
const char * xmlGetEncodingAlias(const char *alias)
Definition: encoding.c:1039
xmlCharEncodingHandler * xmlCharEncodingHandlerPtr
Definition: encoding.h:130
iconv_t iconv_open(const char *tocode, const char *fromcode)
Definition: win_iconv.c:730
#define memset(x, y, z)
Definition: compat.h:39
xmlCharEncodingOutputFunc output
Definition: encoding.h:134
xmlCharEncodingInputFunc input
Definition: encoding.h:133
@ XML_ERR_INTERNAL_ERROR
Definition: xmlerror.h:101

Referenced by xmlCtxtResetPush(), xmlDoRead(), xmlFindCharEncodingHandler(), xmlGetCharEncodingHandler(), xmlParseEncodingDecl(), xmlParseInNodeContext(), xsltSaveResultToFd(), xsltSaveResultToFile(), xsltSaveResultToFilename(), and xsltSaveResultToString().

◆ xmlGetCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlGetCharEncodingHandler ( xmlCharEncoding  enc)

xmlGetCharEncodingHandler: @enc: an xmlCharEncoding value.

Search in the registered set the handler able to read/write that encoding.

Returns the handler or NULL if not found

Definition at line 1544 of file encoding.c.

1544 {
1546
1548 switch (enc) {
1550 return(NULL);
1552 return(NULL);
1554 return(NULL);
1556 return(xmlUTF16LEHandler);
1558 return(xmlUTF16BEHandler);
1561 if (handler != NULL) return(handler);
1563 if (handler != NULL) return(handler);
1564 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1565 if (handler != NULL) return(handler);
1567 if (handler != NULL) return(handler);
1568 break;
1570 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1571 if (handler != NULL) return(handler);
1573 if (handler != NULL) return(handler);
1575 if (handler != NULL) return(handler);
1576 break;
1578 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1579 if (handler != NULL) return(handler);
1581 if (handler != NULL) return(handler);
1583 if (handler != NULL) return(handler);
1584 break;
1586 break;
1588 break;
1590 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1591 if (handler != NULL) return(handler);
1593 if (handler != NULL) return(handler);
1595 if (handler != NULL) return(handler);
1596 break;
1597
1598 /*
1599 * We used to keep ISO Latin encodings native in the
1600 * generated data. This led to so many problems that
1601 * this has been removed. One can still change this
1602 * back by registering no-ops encoders for those
1603 */
1605 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1606 if (handler != NULL) return(handler);
1607 break;
1609 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1610 if (handler != NULL) return(handler);
1611 break;
1613 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1614 if (handler != NULL) return(handler);
1615 break;
1617 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1618 if (handler != NULL) return(handler);
1619 break;
1621 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1622 if (handler != NULL) return(handler);
1623 break;
1625 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1626 if (handler != NULL) return(handler);
1627 break;
1629 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1630 if (handler != NULL) return(handler);
1631 break;
1633 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1634 if (handler != NULL) return(handler);
1635 break;
1637 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1638 if (handler != NULL) return(handler);
1639 break;
1640
1641
1643 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1644 if (handler != NULL) return(handler);
1645 break;
1647 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1648 if (handler != NULL) return(handler);
1649 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1650 if (handler != NULL) return(handler);
1651 handler = xmlFindCharEncodingHandler("Shift_JIS");
1652 if (handler != NULL) return(handler);
1653 break;
1656 if (handler != NULL) return(handler);
1657 break;
1658 default:
1659 break;
1660 }
1661
1662#ifdef DEBUG_ENCODING
1664 "No handler found for encoding %d\n", enc);
1665#endif
1666 return(NULL);
1667}
static xmlCharEncodingHandlerPtr xmlUTF16LEHandler
Definition: encoding.c:60
static xmlCharEncodingHandlerPtr xmlUTF16BEHandler
Definition: encoding.c:61

Referenced by xmlSwitchEncoding().

◆ xmlGetCharEncodingName()

XMLPUBFUN const char *XMLCALL xmlGetCharEncodingName ( xmlCharEncoding  enc)

xmlGetCharEncodingName: @enc: the encoding

The "canonical" name for XML encoding. C.f. http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities

Returns the canonical name for the given encoding

Definition at line 1254 of file encoding.c.

1254 {
1255 switch (enc) {
1257 return(NULL);
1259 return(NULL);
1261 return("UTF-8");
1263 return("UTF-16");
1265 return("UTF-16");
1267 return("EBCDIC");
1269 return("ISO-10646-UCS-4");
1271 return("ISO-10646-UCS-4");
1273 return("ISO-10646-UCS-4");
1275 return("ISO-10646-UCS-4");
1277 return("ISO-10646-UCS-2");
1279 return("ISO-8859-1");
1281 return("ISO-8859-2");
1283 return("ISO-8859-3");
1285 return("ISO-8859-4");
1287 return("ISO-8859-5");
1289 return("ISO-8859-6");
1291 return("ISO-8859-7");
1293 return("ISO-8859-8");
1295 return("ISO-8859-9");
1297 return("ISO-2022-JP");
1299 return("Shift-JIS");
1301 return("EUC-JP");
1303 return(NULL);
1304 }
1305 return(NULL);
1306}

Referenced by xmlFindCharEncodingHandler(), xmlSwitchEncoding(), and xsltSaveResultTo().

◆ xmlGetEncodingAlias()

XMLPUBFUN const char *XMLCALL xmlGetEncodingAlias ( const char alias)

xmlGetEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Lookup an encoding name for the given alias.

Returns NULL if not found, otherwise the original name

Definition at line 1039 of file encoding.c.

1039 {
1040 int i;
1041 char upper[100];
1042
1043 if (alias == NULL)
1044 return(NULL);
1045
1047 return(NULL);
1048
1049 for (i = 0;i < 99;i++) {
1050 upper[i] = toupper(alias[i]);
1051 if (upper[i] == 0) break;
1052 }
1053 upper[i] = 0;
1054
1055 /*
1056 * Walk down the list looking for a definition of the alias
1057 */
1058 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1059 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1060 return(xmlCharEncodingAliases[i].name);
1061 }
1062 }
1063 return(NULL);
1064}

Referenced by xmlFindCharEncodingHandler(), xmlNewCharEncodingHandler(), and xmlParseCharEncoding().

◆ xmlInitCharEncodingHandlers()

XML_DEPRECATED XMLPUBFUN void XMLCALL xmlInitCharEncodingHandlers ( void  )

xmlInitCharEncodingHandlers:

DEPRECATED: This function will be made private. Call xmlInitParser to initialize the library.

Initialize the char encoding support, it registers the default encoding supported. NOTE: while public, this function usually doesn't need to be called in normal processing.

Definition at line 1420 of file encoding.c.

1420 {
1421 unsigned short int tst = 0x1234;
1422 unsigned char *ptr = (unsigned char *) &tst;
1423
1424 if (handlers != NULL) return;
1425
1428
1429 if (*ptr == 0x12) xmlLittleEndian = 0;
1430 else if (*ptr == 0x34) xmlLittleEndian = 1;
1431 else {
1432 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1433 "Odd problem at endianness detection\n", NULL);
1434 }
1435
1436 if (handlers == NULL) {
1437 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1438 return;
1439 }
1441#ifdef LIBXML_OUTPUT_ENABLED
1443 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1445 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1446 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1447 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1448 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1449 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1450#ifdef LIBXML_HTML_ENABLED
1451 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1452#endif
1453#else
1462#endif /* LIBXML_OUTPUT_ENABLED */
1463#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1464#ifdef LIBXML_ISO8859X_ENABLED
1465 xmlRegisterCharEncodingHandlersISO8859x ();
1466#endif
1467#endif
1468
1469}
int isolat1ToUTF8(unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:316
xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
Definition: encoding.c:1338
static int UTF16LEToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:508
static void xmlEncodingErrMemory(const char *extra)
Definition: encoding.c:93
#define MAX_ENCODING_HANDLERS
Definition: encoding.c:1316
static int xmlLittleEndian
Definition: encoding.c:84
static int UTF8ToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:364
static int UTF16BEToUTF8(unsigned char *out, int *outlen, const unsigned char *inb, int *inlenb)
Definition: encoding.c:751
static int asciiToUTF8(unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
Definition: encoding.c:185
static PVOID ptr
Definition: dispmode.c:27

Referenced by xmlFindCharEncodingHandler(), xmlGetCharEncodingHandler(), xmlInitParser(), and xmlRegisterCharEncodingHandler().

◆ xmlNewCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL xmlNewCharEncodingHandler ( const char name,
xmlCharEncodingInputFunc  input,
xmlCharEncodingOutputFunc  output 
)

Definition at line 1338 of file encoding.c.

1340 {
1342 const char *alias;
1343 char upper[500];
1344 int i;
1345 char *up = NULL;
1346
1347 /*
1348 * Do the alias resolution
1349 */
1351 if (alias != NULL)
1352 name = alias;
1353
1354 /*
1355 * Keep only the uppercase version of the encoding.
1356 */
1357 if (name == NULL) {
1358 xmlEncodingErr(XML_I18N_NO_NAME,
1359 "xmlNewCharEncodingHandler : no name !\n", NULL);
1360 return(NULL);
1361 }
1362 for (i = 0;i < 499;i++) {
1363 upper[i] = toupper(name[i]);
1364 if (upper[i] == 0) break;
1365 }
1366 upper[i] = 0;
1367 up = xmlMemStrdup(upper);
1368 if (up == NULL) {
1369 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1370 return(NULL);
1371 }
1372
1373 /*
1374 * allocate and fill-up an handler block.
1375 */
1378 if (handler == NULL) {
1379 xmlFree(up);
1380 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1381 return(NULL);
1382 }
1384 handler->input = input;
1385 handler->output = output;
1386 handler->name = up;
1387
1388#ifdef LIBXML_ICONV_ENABLED
1389 handler->iconv_in = NULL;
1390 handler->iconv_out = NULL;
1391#endif
1392#ifdef LIBXML_ICU_ENABLED
1393 handler->uconv_in = NULL;
1394 handler->uconv_out = NULL;
1395#endif
1396
1397 /*
1398 * registers and returns the handler.
1399 */
1401#ifdef DEBUG_ENCODING
1403 "Registered encoding handler for %s\n", name);
1404#endif
1405 return(handler);
1406}
void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)
Definition: encoding.c:1509
GLenum GLenum GLenum input
Definition: glext.h:9031
#define up(mutex)
Definition: glue.h:30
@ XML_I18N_NO_NAME
Definition: xmlerror.h:830

Referenced by xmlInitCharEncodingHandlers().

◆ xmlParseCharEncoding()

XMLPUBFUN xmlCharEncoding XMLCALL xmlParseCharEncoding ( const char name)

Definition at line 1170 of file encoding.c.

1171{
1172 const char *alias;
1173 char upper[500];
1174 int i;
1175
1176 if (name == NULL)
1177 return(XML_CHAR_ENCODING_NONE);
1178
1179 /*
1180 * Do the alias resolution
1181 */
1183 if (alias != NULL)
1184 name = alias;
1185
1186 for (i = 0;i < 499;i++) {
1187 upper[i] = toupper(name[i]);
1188 if (upper[i] == 0) break;
1189 }
1190 upper[i] = 0;
1191
1192 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1193 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1194 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1195
1196 /*
1197 * NOTE: if we were able to parse this, the endianness of UTF16 is
1198 * already found and in use
1199 */
1200 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1201 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1202
1203 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1204 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1205 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1206
1207 /*
1208 * NOTE: if we were able to parse this, the endianness of UCS4 is
1209 * already found and in use
1210 */
1211 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1212 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1213 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1214
1215
1216 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1217 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1218 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1219
1220 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1221 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1222 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1223
1224 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1225 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1226 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1227 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1228 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1229 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1230 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1231
1232 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1233 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1234 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1235
1236#ifdef DEBUG_ENCODING
1237 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1238#endif
1240}

Referenced by xmlFindCharEncodingHandler().

◆ xmlRegisterCharEncodingHandler()

XMLPUBFUN void XMLCALL xmlRegisterCharEncodingHandler ( xmlCharEncodingHandlerPtr  handler)

xmlRegisterCharEncodingHandler: @handler: the xmlCharEncodingHandlerPtr handler block

Register the char encoding handler, surprising, isn't it ?

Definition at line 1509 of file encoding.c.

1509 {
1511 if ((handler == NULL) || (handlers == NULL)) {
1512 xmlEncodingErr(XML_I18N_NO_HANDLER,
1513 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1514 goto free_handler;
1515 }
1516
1518 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1519 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1520 "MAX_ENCODING_HANDLERS");
1521 goto free_handler;
1522 }
1524 return;
1525
1526free_handler:
1527 if (handler != NULL) {
1528 if (handler->name != NULL) {
1529 xmlFree(handler->name);
1530 }
1532 }
1533}
@ XML_I18N_NO_HANDLER
Definition: xmlerror.h:831
@ XML_I18N_EXCESS_HANDLER
Definition: xmlerror.h:832

Referenced by xmlNewCharEncodingHandler().