ReactOS 0.4.16-dev-2208-g6350669
encoding.h File Reference
Include dependency graph for encoding.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  _xmlCharEncodingHandler
 

Typedefs

typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler
 
typedef xmlCharEncodingHandlerxmlCharEncodingHandlerPtr
 

Enumerations

enum  xmlCharEncError {
  XML_ENC_ERR_SUCCESS = 0 , XML_ENC_ERR_SPACE = -1 , XML_ENC_ERR_INPUT = -2 , XML_ENC_ERR_PARTIAL = -3 ,
  XML_ENC_ERR_INTERNAL = -4 , XML_ENC_ERR_MEMORY = -5
}
 
enum  xmlCharEncoding {
  XML_CHAR_ENCODING_ERROR = -1 , XML_CHAR_ENCODING_NONE = 0 , XML_CHAR_ENCODING_UTF8 = 1 , XML_CHAR_ENCODING_UTF16LE = 2 ,
  XML_CHAR_ENCODING_UTF16BE = 3 , XML_CHAR_ENCODING_UCS4LE = 4 , XML_CHAR_ENCODING_UCS4BE = 5 , XML_CHAR_ENCODING_EBCDIC = 6 ,
  XML_CHAR_ENCODING_UCS4_2143 =7 , XML_CHAR_ENCODING_UCS4_3412 =8 , XML_CHAR_ENCODING_UCS2 = 9 , XML_CHAR_ENCODING_8859_1 = 10 ,
  XML_CHAR_ENCODING_8859_2 = 11 , XML_CHAR_ENCODING_8859_3 = 12 , XML_CHAR_ENCODING_8859_4 = 13 , XML_CHAR_ENCODING_8859_5 = 14 ,
  XML_CHAR_ENCODING_8859_6 = 15 , XML_CHAR_ENCODING_8859_7 = 16 , XML_CHAR_ENCODING_8859_8 = 17 , XML_CHAR_ENCODING_8859_9 = 18 ,
  XML_CHAR_ENCODING_2022_JP = 19 , XML_CHAR_ENCODING_SHIFT_JIS =20 , XML_CHAR_ENCODING_EUC_JP = 21 , XML_CHAR_ENCODING_ASCII = 22
}
 

Functions

XML_DEPRECATED XMLPUBFUN void xmlInitCharEncodingHandlers (void)
 
XML_DEPRECATED XMLPUBFUN void xmlCleanupCharEncodingHandlers (void)
 
XMLPUBFUN void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler)
 
XMLPUBFUN xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc)
 
XMLPUBFUN xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char *name)
 
XMLPUBFUN xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
 
XMLPUBFUN int xmlAddEncodingAlias (const char *name, const char *alias)
 
XMLPUBFUN int xmlDelEncodingAlias (const char *alias)
 
XMLPUBFUN const charxmlGetEncodingAlias (const char *alias)
 
XMLPUBFUN void xmlCleanupEncodingAliases (void)
 
XMLPUBFUN xmlCharEncoding xmlParseCharEncoding (const char *name)
 
XMLPUBFUN const charxmlGetCharEncodingName (xmlCharEncoding enc)
 
XMLPUBFUN xmlCharEncoding xmlDetectCharEncoding (const unsigned char *in, int len)
 
XMLPUBFUN int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, struct _xmlBuffer *out, struct _xmlBuffer *in)
 
XMLPUBFUN int xmlCharEncInFunc (xmlCharEncodingHandler *handler, struct _xmlBuffer *out, struct _xmlBuffer *in)
 
XML_DEPRECATED XMLPUBFUN int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, struct _xmlBuffer *out, struct _xmlBuffer *in)
 
XMLPUBFUN int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler)
 
XMLPUBFUN int isolat1ToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)
 

Typedef Documentation

◆ xmlCharEncodingHandler

◆ xmlCharEncodingHandlerPtr

Definition at line 139 of file encoding.h.

◆ xmlCharEncodingInputFunc

typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)

xmlCharEncodingInputFunc: @out: a pointer to an array of bytes to store the UTF-8 result @outlen: the length of @out @in: a pointer to an array of chars in the original encoding @inlen: the length of @in

Take a block of chars in the original encoding and try to convert it to an UTF-8 block of chars out.

Returns the number of bytes written, -1 if lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictiable. The value of @outlen after return is the number of octets consumed.

Definition at line 108 of file encoding.h.

◆ xmlCharEncodingOutputFunc

typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen)

xmlCharEncodingOutputFunc: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of UTF-8 chars @inlen: the length of @in

Take a block of UTF-8 chars in and try to convert it to another encoding. Note: a first call designed to produce heading info is called with in = NULL. If stateful this should also initialize the encoder state.

Returns the number of bytes written, -1 if lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictiable. The value of @outlen after return is the number of octets produced.

Definition at line 130 of file encoding.h.

Enumeration Type Documentation

◆ xmlCharEncError

Enumerator
XML_ENC_ERR_SUCCESS 
XML_ENC_ERR_SPACE 
XML_ENC_ERR_INPUT 
XML_ENC_ERR_PARTIAL 
XML_ENC_ERR_INTERNAL 
XML_ENC_ERR_MEMORY 

Definition at line 35 of file encoding.h.

35 {
xmlCharEncError
Definition: encoding.h:35
@ XML_ENC_ERR_SPACE
Definition: encoding.h:37
@ XML_ENC_ERR_PARTIAL
Definition: encoding.h:39
@ XML_ENC_ERR_MEMORY
Definition: encoding.h:41
@ XML_ENC_ERR_INPUT
Definition: encoding.h:38
@ XML_ENC_ERR_SUCCESS
Definition: encoding.h:36
@ XML_ENC_ERR_INTERNAL
Definition: encoding.h:40

◆ xmlCharEncoding

Enumerator
XML_CHAR_ENCODING_ERROR 
XML_CHAR_ENCODING_NONE 
XML_CHAR_ENCODING_UTF8 
XML_CHAR_ENCODING_UTF16LE 
XML_CHAR_ENCODING_UTF16BE 
XML_CHAR_ENCODING_UCS4LE 
XML_CHAR_ENCODING_UCS4BE 
XML_CHAR_ENCODING_EBCDIC 
XML_CHAR_ENCODING_UCS4_2143 
XML_CHAR_ENCODING_UCS4_3412 
XML_CHAR_ENCODING_UCS2 
XML_CHAR_ENCODING_8859_1 
XML_CHAR_ENCODING_8859_2 
XML_CHAR_ENCODING_8859_3 
XML_CHAR_ENCODING_8859_4 
XML_CHAR_ENCODING_8859_5 
XML_CHAR_ENCODING_8859_6 
XML_CHAR_ENCODING_8859_7 
XML_CHAR_ENCODING_8859_8 
XML_CHAR_ENCODING_8859_9 
XML_CHAR_ENCODING_2022_JP 
XML_CHAR_ENCODING_SHIFT_JIS 
XML_CHAR_ENCODING_EUC_JP 
XML_CHAR_ENCODING_ASCII 

Definition at line 65 of file encoding.h.

65 {
66 XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
67 XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */
68 XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */
69 XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */
70 XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */
71 XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */
72 XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */
73 XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */
74 XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
75 XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
76 XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */
77 XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */
78 XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */
79 XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */
80 XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */
81 XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */
82 XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */
83 XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */
84 XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */
85 XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */
86 XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */
87 XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
88 XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */
89 XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */
xmlCharEncoding
Definition: encoding.h:65
@ XML_CHAR_ENCODING_8859_6
Definition: encoding.h:82
@ XML_CHAR_ENCODING_UTF8
Definition: encoding.h:68
@ XML_CHAR_ENCODING_8859_7
Definition: encoding.h:83
@ XML_CHAR_ENCODING_8859_2
Definition: encoding.h:78
@ XML_CHAR_ENCODING_8859_4
Definition: encoding.h:80
@ XML_CHAR_ENCODING_8859_8
Definition: encoding.h:84
@ XML_CHAR_ENCODING_UTF16BE
Definition: encoding.h:70
@ XML_CHAR_ENCODING_UCS2
Definition: encoding.h:76
@ XML_CHAR_ENCODING_2022_JP
Definition: encoding.h:86
@ XML_CHAR_ENCODING_8859_3
Definition: encoding.h:79
@ XML_CHAR_ENCODING_EBCDIC
Definition: encoding.h:73
@ XML_CHAR_ENCODING_UCS4LE
Definition: encoding.h:71
@ XML_CHAR_ENCODING_ERROR
Definition: encoding.h:66
@ XML_CHAR_ENCODING_UCS4_3412
Definition: encoding.h:75
@ XML_CHAR_ENCODING_UCS4BE
Definition: encoding.h:72
@ XML_CHAR_ENCODING_8859_1
Definition: encoding.h:77
@ XML_CHAR_ENCODING_8859_9
Definition: encoding.h:85
@ XML_CHAR_ENCODING_UTF16LE
Definition: encoding.h:69
@ XML_CHAR_ENCODING_NONE
Definition: encoding.h:67
@ XML_CHAR_ENCODING_8859_5
Definition: encoding.h:81
@ XML_CHAR_ENCODING_ASCII
Definition: encoding.h:89
@ XML_CHAR_ENCODING_SHIFT_JIS
Definition: encoding.h:87
@ XML_CHAR_ENCODING_UCS4_2143
Definition: encoding.h:74
@ XML_CHAR_ENCODING_EUC_JP
Definition: encoding.h:88

Function Documentation

◆ isolat1ToUTF8()

XMLPUBFUN int isolat1ToUTF8 ( unsigned char out,
int outlen,
const unsigned char in,
int inlen 
)

isolat1ToUTF8: @out: a pointer to an array of bytes to store the result @outlen: the length of @out @in: a pointer to an array of ISO Latin 1 chars @inlen: the length of @in

Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out.

Returns the number of bytes written or an XML_ENC_ERR code.

The value of @inlen after return is the number of octets consumed if the return value is positive, else unpredictable. The value of @outlen after return is the number of octets produced.

Definition at line 283 of file encoding.c.

284 {
285 unsigned char* outstart = out;
286 const unsigned char* base = in;
287 unsigned char* outend;
288 const unsigned char* inend;
289 const unsigned char* instop;
290
291 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
292 return(XML_ENC_ERR_INTERNAL);
293
294 outend = out + *outlen;
295 inend = in + (*inlen);
296 instop = inend;
297
298 while ((in < inend) && (out < outend - 1)) {
299 if (*in >= 0x80) {
300 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
301 *out++ = ((*in) & 0x3F) | 0x80;
302 ++in;
303 }
304 if ((instop - in) > (outend - out)) instop = in + (outend - out);
305 while ((in < instop) && (*in < 0x80)) {
306 *out++ = *in++;
307 }
308 }
309 if ((in < inend) && (out < outend) && (*in < 0x80)) {
310 *out++ = *in++;
311 }
312 *outlen = out - outstart;
313 *inlen = in - base;
314 return(*outlen);
315}
#define NULL
Definition: types.h:112
GLuint in
Definition: glext.h:9616
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
Definition: wcsftime.cpp:383

◆ xmlAddEncodingAlias()

XMLPUBFUN int xmlAddEncodingAlias ( const char name,
const char alias 
)

Definition at line 1041 of file encoding.c.

1041 {
1042 int i;
1043 char upper[100];
1044 char *nameCopy, *aliasCopy;
1045
1046 if ((name == NULL) || (alias == NULL))
1047 return(-1);
1048
1049 for (i = 0;i < 99;i++) {
1050 upper[i] = (char) toupper((unsigned char) alias[i]);
1051 if (upper[i] == 0) break;
1052 }
1053 upper[i] = 0;
1054
1057 size_t newSize = xmlCharEncodingAliasesMax ?
1059 20;
1060
1063 newSize * sizeof(xmlCharEncodingAlias));
1064 if (tmp == NULL)
1065 return(-1);
1067 xmlCharEncodingAliasesMax = newSize;
1068 }
1069
1070 /*
1071 * Walk down the list looking for a definition of the alias
1072 */
1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075 /*
1076 * Replace the definition.
1077 */
1078 nameCopy = xmlMemStrdup(name);
1079 if (nameCopy == NULL)
1080 return(-1);
1082 xmlCharEncodingAliases[i].name = nameCopy;
1083 return(0);
1084 }
1085 }
1086 /*
1087 * Add the definition
1088 */
1089 nameCopy = xmlMemStrdup(name);
1090 if (nameCopy == NULL)
1091 return(-1);
1092 aliasCopy = xmlMemStrdup(upper);
1093 if (aliasCopy == NULL) {
1094 xmlFree(nameCopy);
1095 return(-1);
1096 }
1100 return(0);
1101}
const WCHAR * alias
Definition: main.c:67
int CDECL toupper(int c)
Definition: ctype.c:514
_ACRTIMP int __cdecl strcmp(const char *, const char *)
Definition: string.c:3319
unsigned char
Definition: typeof.h:29
xmlCharEncodingAlias * xmlCharEncodingAliasPtr
Definition: encoding.c:62
static xmlCharEncodingAliasPtr xmlCharEncodingAliases
Definition: encoding.c:68
static int xmlCharEncodingAliasesMax
Definition: encoding.c:70
static int xmlCharEncodingAliasesNb
Definition: encoding.c:69
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
xmlReallocFunc xmlRealloc
Definition: globals.c:214
xmlFreeFunc xmlFree
Definition: globals.c:184
xmlStrdupFunc xmlMemStrdup
Definition: globals.c:235
const char * name
Definition: encoding.c:64
const char * alias
Definition: encoding.c:65
Definition: name.c:39

◆ xmlCharEncCloseFunc()

◆ xmlCharEncFirstLine()

XML_DEPRECATED XMLPUBFUN int xmlCharEncFirstLine ( xmlCharEncodingHandler handler,
struct _xmlBuffer *  out,
struct _xmlBuffer *  in 
)

◆ xmlCharEncInFunc()

XMLPUBFUN int xmlCharEncInFunc ( xmlCharEncodingHandler handler,
struct _xmlBuffer *  out,
struct _xmlBuffer *  in 
)

◆ xmlCharEncOutFunc()

XMLPUBFUN int xmlCharEncOutFunc ( xmlCharEncodingHandler handler,
struct _xmlBuffer *  out,
struct _xmlBuffer *  in 
)

◆ xmlCleanupCharEncodingHandlers()

XML_DEPRECATED XMLPUBFUN void xmlCleanupCharEncodingHandlers ( void  )

xmlCleanupCharEncodingHandlers:

DEPRECATED: This function will be made private. Call xmlCleanupParser to free global state but see the warnings there. xmlCleanupParser should be only called once at program exit. In most cases, you don't have call cleanup functions at all.

Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.

Definition at line 1490 of file encoding.c.

1490 {
1492
1493 if (handlers == NULL) return;
1494
1495 for (;nbCharEncodingHandler > 0;) {
1501 }
1502 }
1504 handlers = NULL;
1506}
static xmlCharEncodingHandlerPtr * handlers
Definition: encoding.c:1380
void xmlCleanupEncodingAliases(void)
Definition: encoding.c:976
static int nbCharEncodingHandler
Definition: encoding.c:1381

Referenced by xmlCleanupParser().

◆ xmlCleanupEncodingAliases()

XMLPUBFUN void xmlCleanupEncodingAliases ( void  )

xmlCleanupEncodingAliases:

Unregisters all aliases

Definition at line 976 of file encoding.c.

976 {
977 int i;
978
980 return;
981
982 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
987 }
992}

Referenced by xmlCleanupCharEncodingHandlers().

◆ xmlDelEncodingAlias()

XMLPUBFUN int xmlDelEncodingAlias ( const char alias)

xmlDelEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Unregisters an encoding alias @alias

Returns 0 in case of success, -1 in case of error

Definition at line 1112 of file encoding.c.

1112 {
1113 int i;
1114
1115 if (alias == NULL)
1116 return(-1);
1117
1119 return(-1);
1120 /*
1121 * Walk down the list looking for a definition of the alias
1122 */
1123 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1130 return(0);
1131 }
1132 }
1133 return(-1);
1134}
#define memmove(s1, s2, n)
Definition: mkisofs.h:881

◆ xmlDetectCharEncoding()

XMLPUBFUN xmlCharEncoding xmlDetectCharEncoding ( const unsigned char in,
int  len 
)

xmlDetectCharEncoding: @in: a pointer to the first bytes of the XML entity, must be at least 2 bytes long (at least 4 if encoding is UTF4 variant). @len: pointer to the length of the buffer

Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation.

Returns one of the XML_CHAR_ENCODING_... values.

Definition at line 916 of file encoding.c.

917{
918 if (in == NULL)
920 if (len >= 4) {
921 if ((in[0] == 0x00) && (in[1] == 0x00) &&
922 (in[2] == 0x00) && (in[3] == 0x3C))
924 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
925 (in[2] == 0x00) && (in[3] == 0x00))
927 if ((in[0] == 0x00) && (in[1] == 0x00) &&
928 (in[2] == 0x3C) && (in[3] == 0x00))
930 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
931 (in[2] == 0x00) && (in[3] == 0x00))
933 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
934 (in[2] == 0xA7) && (in[3] == 0x94))
936 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
937 (in[2] == 0x78) && (in[3] == 0x6D))
939 /*
940 * Although not part of the recommendation, we also
941 * attempt an "auto-recognition" of UTF-16LE and
942 * UTF-16BE encodings.
943 */
944 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
945 (in[2] == 0x3F) && (in[3] == 0x00))
947 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
948 (in[2] == 0x00) && (in[3] == 0x3F))
950 }
951 if (len >= 3) {
952 /*
953 * Errata on XML-1.0 June 20 2001
954 * We now allow an UTF8 encoded BOM
955 */
956 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
957 (in[2] == 0xBF))
959 }
960 /* For UTF-16 we can recognize by the BOM */
961 if (len >= 2) {
962 if ((in[0] == 0xFE) && (in[1] == 0xFF))
964 if ((in[0] == 0xFF) && (in[1] == 0xFE))
966 }
968}
GLenum GLsizei len
Definition: glext.h:6722

Referenced by internal_parseBuffer().

◆ xmlFindCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler ( const char name)

Definition at line 1677 of file encoding.c.

1677 {
1678 const char *nalias;
1679 const char *norig;
1681#ifdef LIBXML_ICONV_ENABLED
1683 iconv_t icv_in, icv_out;
1684#endif /* LIBXML_ICONV_ENABLED */
1685#ifdef LIBXML_ICU_ENABLED
1687 uconv_t *ucv_in, *ucv_out;
1688#endif /* LIBXML_ICU_ENABLED */
1689 char upper[100];
1690 int i;
1691
1692 if (name == NULL) return(NULL);
1693 if (name[0] == 0) return(NULL);
1694
1695 /*
1696 * Do the alias resolution
1697 */
1698 norig = name;
1699 nalias = xmlGetEncodingAlias(name);
1700 if (nalias != NULL)
1701 name = nalias;
1702
1703 /*
1704 * Check first for directly registered encoding names
1705 */
1706 for (i = 0;i < 99;i++) {
1707 upper[i] = (char) toupper((unsigned char) name[i]);
1708 if (upper[i] == 0) break;
1709 }
1710 upper[i] = 0;
1711
1712 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1713 if (strcmp(upper, defaultHandlers[i].name) == 0)
1715 }
1716
1717 if (handlers != NULL) {
1718 for (i = 0;i < nbCharEncodingHandler; i++) {
1719 if (!strcmp(upper, handlers[i]->name)) {
1720 return(handlers[i]);
1721 }
1722 }
1723 }
1724
1725#ifdef LIBXML_ICONV_ENABLED
1726 /* check whether iconv can handle this */
1727 icv_in = iconv_open("UTF-8", name);
1728 icv_out = iconv_open(name, "UTF-8");
1729 if (icv_in == (iconv_t) -1) {
1730 icv_in = iconv_open("UTF-8", upper);
1731 }
1732 if (icv_out == (iconv_t) -1) {
1733 icv_out = iconv_open(upper, "UTF-8");
1734 }
1735 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1738 if (enc == NULL) {
1739 iconv_close(icv_in);
1740 iconv_close(icv_out);
1741 return(NULL);
1742 }
1743 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1744 enc->name = xmlMemStrdup(name);
1745 if (enc->name == NULL) {
1746 xmlFree(enc);
1747 iconv_close(icv_in);
1748 iconv_close(icv_out);
1749 return(NULL);
1750 }
1751 enc->input = NULL;
1752 enc->output = NULL;
1753 enc->iconv_in = icv_in;
1754 enc->iconv_out = icv_out;
1755 return enc;
1756 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757 if (icv_in != (iconv_t) -1)
1758 iconv_close(icv_in);
1759 else
1760 iconv_close(icv_out);
1761 }
1762#endif /* LIBXML_ICONV_ENABLED */
1763#ifdef LIBXML_ICU_ENABLED
1764 /* check whether icu can handle this */
1765 ucv_in = openIcuConverter(name, 1);
1766 ucv_out = openIcuConverter(name, 0);
1767 if (ucv_in != NULL && ucv_out != NULL) {
1770 if (encu == NULL) {
1771 closeIcuConverter(ucv_in);
1772 closeIcuConverter(ucv_out);
1773 return(NULL);
1774 }
1775 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1776 encu->name = xmlMemStrdup(name);
1777 if (encu->name == NULL) {
1778 xmlFree(encu);
1779 closeIcuConverter(ucv_in);
1780 closeIcuConverter(ucv_out);
1781 return(NULL);
1782 }
1783 encu->input = NULL;
1784 encu->output = NULL;
1785 encu->uconv_in = ucv_in;
1786 encu->uconv_out = ucv_out;
1787 return encu;
1788 } else if (ucv_in != NULL || ucv_out != NULL) {
1789 closeIcuConverter(ucv_in);
1790 closeIcuConverter(ucv_out);
1791 }
1792#endif /* LIBXML_ICU_ENABLED */
1793
1794 /*
1795 * Fallback using the canonical names
1796 */
1797 alias = xmlParseCharEncoding(norig);
1799 const char* canon;
1801 if ((canon != NULL) && (strcmp(name, canon))) {
1802 return(xmlFindCharEncodingHandler(canon));
1803 }
1804 }
1805
1806 /* If "none of the above", give up */
1807 return(NULL);
1808}
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31
const char * xmlGetCharEncodingName(xmlCharEncoding enc)
Definition: encoding.c:1229
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name)
Definition: encoding.c:1677
xmlCharEncoding xmlParseCharEncoding(const char *name)
Definition: encoding.c:1148
const char * xmlGetEncodingAlias(const char *alias)
Definition: encoding.c:1003
#define NUM_DEFAULT_HANDLERS
Definition: encoding.c:1372
static const xmlCharEncodingHandler defaultHandlers[]
Definition: encoding.c:1332
xmlCharEncodingHandler * xmlCharEncodingHandlerPtr
Definition: encoding.h:139
int iconv_close(iconv_t cd)
Definition: win_iconv.c:756
iconv_t iconv_open(const char *tocode, const char *fromcode)
Definition: win_iconv.c:730
xmlMallocFunc xmlMalloc
Definition: globals.c:193
#define memset(x, y, z)
Definition: compat.h:39
xmlCharEncodingOutputFunc output
Definition: encoding.h:143
xmlCharEncodingInputFunc input
Definition: encoding.h:142

Referenced by init_char_encoders(), node_transform_write_to_bstr(), node_transform_write_to_stream(), xmlCtxtResetPush(), xmlDetectEBCDIC(), xmlDoRead(), xmlFindCharEncodingHandler(), xmlGetCharEncodingHandler(), xmlParseInNodeContext(), xmlSetDeclaredEncoding(), xsltSaveResultToFd(), xsltSaveResultToFile(), xsltSaveResultToFilename(), and xsltSaveResultToString().

◆ xmlGetCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler ( xmlCharEncoding  enc)

xmlGetCharEncodingHandler: @enc: an xmlCharEncoding value.

Search in the registered set the handler able to read/write that encoding.

Returns the handler or NULL if not found

Definition at line 1547 of file encoding.c.

1547 {
1549
1550 switch (enc) {
1552 return(NULL);
1554 return(NULL);
1556 return(NULL);
1563 if (handler != NULL) return(handler);
1565 if (handler != NULL) return(handler);
1566 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1567 if (handler != NULL) return(handler);
1569 if (handler != NULL) return(handler);
1570 break;
1572 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1573 if (handler != NULL) return(handler);
1575 if (handler != NULL) return(handler);
1577 if (handler != NULL) return(handler);
1578 break;
1580 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1581 if (handler != NULL) return(handler);
1583 if (handler != NULL) return(handler);
1585 if (handler != NULL) return(handler);
1586 break;
1588 break;
1590 break;
1592 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1593 if (handler != NULL) return(handler);
1595 if (handler != NULL) return(handler);
1597 if (handler != NULL) return(handler);
1598 break;
1599
1600 /*
1601 * We used to keep ISO Latin encodings native in the
1602 * generated data. This led to so many problems that
1603 * this has been removed. One can still change this
1604 * back by registering no-ops encoders for those
1605 */
1607 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1608 if (handler != NULL) return(handler);
1609 break;
1611 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1612 if (handler != NULL) return(handler);
1613 break;
1615 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1616 if (handler != NULL) return(handler);
1617 break;
1619 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1620 if (handler != NULL) return(handler);
1621 break;
1623 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1624 if (handler != NULL) return(handler);
1625 break;
1627 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1628 if (handler != NULL) return(handler);
1629 break;
1631 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1632 if (handler != NULL) return(handler);
1633 break;
1635 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1636 if (handler != NULL) return(handler);
1637 break;
1639 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1640 if (handler != NULL) return(handler);
1641 break;
1642
1643
1645 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1646 if (handler != NULL) return(handler);
1647 break;
1649 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1650 if (handler != NULL) return(handler);
1651 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1652 if (handler != NULL) return(handler);
1653 handler = xmlFindCharEncodingHandler("Shift_JIS");
1654 if (handler != NULL) return(handler);
1655 break;
1658 if (handler != NULL) return(handler);
1659 break;
1660 default:
1661 break;
1662 }
1663
1664 return(NULL);
1665}
UINT(* handler)(MSIPACKAGE *)
Definition: action.c:7512
static const xmlCharEncodingHandler * xmlUTF16LEHandler
Definition: encoding.c:1375
static const xmlCharEncodingHandler * xmlUTF16BEHandler
Definition: encoding.c:1376

Referenced by xmlDetectEBCDIC(), and xmlSwitchEncoding().

◆ xmlGetCharEncodingName()

XMLPUBFUN const char * xmlGetCharEncodingName ( xmlCharEncoding  enc)

xmlGetCharEncodingName: @enc: the encoding

The "canonical" name for XML encoding. C.f. http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities

Returns the canonical name for the given encoding

Definition at line 1229 of file encoding.c.

1229 {
1230 switch (enc) {
1232 return(NULL);
1234 return(NULL);
1236 return("UTF-8");
1238 return("UTF-16");
1240 return("UTF-16");
1242 return("EBCDIC");
1244 return("ISO-10646-UCS-4");
1246 return("ISO-10646-UCS-4");
1248 return("ISO-10646-UCS-4");
1250 return("ISO-10646-UCS-4");
1252 return("ISO-10646-UCS-2");
1254 return("ISO-8859-1");
1256 return("ISO-8859-2");
1258 return("ISO-8859-3");
1260 return("ISO-8859-4");
1262 return("ISO-8859-5");
1264 return("ISO-8859-6");
1266 return("ISO-8859-7");
1268 return("ISO-8859-8");
1270 return("ISO-8859-9");
1272 return("ISO-2022-JP");
1274 return("Shift-JIS");
1276 return("EUC-JP");
1278 return(NULL);
1279 }
1280 return(NULL);
1281}

Referenced by internal_parseBuffer(), xmlFindCharEncodingHandler(), xmlSwitchEncoding(), and xsltSaveResultTo().

◆ xmlGetEncodingAlias()

XMLPUBFUN const char * xmlGetEncodingAlias ( const char alias)

xmlGetEncodingAlias: @alias: the alias name as parsed, in UTF-8 format (ASCII actually)

Lookup an encoding name for the given alias.

Returns NULL if not found, otherwise the original name

Definition at line 1003 of file encoding.c.

1003 {
1004 int i;
1005 char upper[100];
1006
1007 if (alias == NULL)
1008 return(NULL);
1009
1011 return(NULL);
1012
1013 for (i = 0;i < 99;i++) {
1014 upper[i] = (char) toupper((unsigned char) alias[i]);
1015 if (upper[i] == 0) break;
1016 }
1017 upper[i] = 0;
1018
1019 /*
1020 * Walk down the list looking for a definition of the alias
1021 */
1022 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024 return(xmlCharEncodingAliases[i].name);
1025 }
1026 }
1027 return(NULL);
1028}

Referenced by xmlFindCharEncodingHandler(), xmlNewCharEncodingHandler(), and xmlParseCharEncoding().

◆ xmlInitCharEncodingHandlers()

XML_DEPRECATED XMLPUBFUN void xmlInitCharEncodingHandlers ( void  )

xmlInitCharEncodingHandlers:

DEPRECATED: Alias for xmlInitParser.

Definition at line 1460 of file encoding.c.

1460 {
1461 xmlInitParser();
1462}
XML_GLOBALS_PARSER XMLPUBFUN void xmlInitParser(void)
Definition: threads.c:569

◆ xmlNewCharEncodingHandler()

XMLPUBFUN xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler ( const char name,
xmlCharEncodingInputFunc  input,
xmlCharEncodingOutputFunc  output 
)

Definition at line 1394 of file encoding.c.

1396 {
1398 const char *alias;
1399 char upper[500];
1400 int i;
1401 char *up = NULL;
1402
1403 /*
1404 * Do the alias resolution
1405 */
1407 if (alias != NULL)
1408 name = alias;
1409
1410 /*
1411 * Keep only the uppercase version of the encoding.
1412 */
1413 if (name == NULL)
1414 return(NULL);
1415 for (i = 0;i < 499;i++) {
1416 upper[i] = (char) toupper((unsigned char) name[i]);
1417 if (upper[i] == 0) break;
1418 }
1419 upper[i] = 0;
1420 up = xmlMemStrdup(upper);
1421 if (up == NULL)
1422 return(NULL);
1423
1424 /*
1425 * allocate and fill-up an handler block.
1426 */
1429 if (handler == NULL) {
1430 xmlFree(up);
1431 return(NULL);
1432 }
1434 handler->input = input;
1435 handler->output = output;
1436 handler->name = up;
1437
1438#ifdef LIBXML_ICONV_ENABLED
1439 handler->iconv_in = NULL;
1440 handler->iconv_out = NULL;
1441#endif
1442#ifdef LIBXML_ICU_ENABLED
1443 handler->uconv_in = NULL;
1444 handler->uconv_out = NULL;
1445#endif
1446
1447 /*
1448 * registers and returns the handler.
1449 */
1451 return(handler);
1452}
void xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)
Definition: encoding.c:1515
GLenum GLenum GLenum input
Definition: glext.h:9031
#define up(mutex)
Definition: glue.h:30

Referenced by init_char_encoders().

◆ xmlParseCharEncoding()

XMLPUBFUN xmlCharEncoding xmlParseCharEncoding ( const char name)

Definition at line 1148 of file encoding.c.

1149{
1150 const char *alias;
1151 char upper[500];
1152 int i;
1153
1154 if (name == NULL)
1155 return(XML_CHAR_ENCODING_NONE);
1156
1157 /*
1158 * Do the alias resolution
1159 */
1161 if (alias != NULL)
1162 name = alias;
1163
1164 for (i = 0;i < 499;i++) {
1165 upper[i] = (char) toupper((unsigned char) name[i]);
1166 if (upper[i] == 0) break;
1167 }
1168 upper[i] = 0;
1169
1170 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1171 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1172 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1173
1174 /*
1175 * NOTE: if we were able to parse this, the endianness of UTF16 is
1176 * already found and in use
1177 */
1178 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1179 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1180
1181 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1183 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1184
1185 /*
1186 * NOTE: if we were able to parse this, the endianness of UCS4 is
1187 * already found and in use
1188 */
1189 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1191 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1192
1193
1194 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1195 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1196 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1197
1198 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1199 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1200 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1201
1202 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1203 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1204 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1205 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1206 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1207 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1208 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1209
1210 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1211 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1212 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1213
1215}

Referenced by xmlFindCharEncodingHandler().

◆ xmlRegisterCharEncodingHandler()

XMLPUBFUN void xmlRegisterCharEncodingHandler ( xmlCharEncodingHandlerPtr  handler)

xmlRegisterCharEncodingHandler: @handler: the xmlCharEncodingHandlerPtr handler block

Register the char encoding handler, surprising, isn't it ?

Definition at line 1515 of file encoding.c.

1515 {
1516 if (handler == NULL)
1517 return;
1518 if (handlers == NULL) {
1520 if (handlers == NULL)
1521 goto free_handler;
1522 }
1523
1525 goto free_handler;
1527 return;
1528
1529free_handler:
1530 if (handler != NULL) {
1531 if (handler->name != NULL) {
1532 xmlFree(handler->name);
1533 }
1535 }
1536}
#define MAX_ENCODING_HANDLERS
Definition: encoding.c:1379

Referenced by xmlNewCharEncodingHandler().