xmlGetUTF8Char: : a sequence of UTF-8 encoded bytes : a pointer to the minimum number of bytes present in the sequence. This is used to assure the next character is completely contained within the sequence.
Read the first UTF8 character from
Returns the char value or -1 in case of error, and sets *len to the actual number of bytes consumed (0 in case of error)
Definition at line 702 of file xmlstring.c.
Referenced by test_xmlGetUTF8Char(), and xmlCharEncOutFunc().
{
unsigned int c;
if (utf == NULL)
goto error;
if (len == NULL)
goto error;
if (*len < 1)
goto error;
c = utf[0];
if (c & 0x80) {
if (*len < 2)
goto error;
if ((utf[1] & 0xc0) != 0x80)
goto error;
if ((c & 0xe0) == 0xe0) {
if (*len < 3)
goto error;
if ((utf[2] & 0xc0) != 0x80)
goto error;
if ((c & 0xf0) == 0xf0) {
if (*len < 4)
goto error;
if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
goto error;
*len = 4;
c = (utf[0] & 0x7) << 18;
c |= (utf[1] & 0x3f) << 12;
c |= (utf[2] & 0x3f) << 6;
c |= utf[3] & 0x3f;
} else {
*len = 3;
c = (utf[0] & 0xf) << 12;
c |= (utf[1] & 0x3f) << 6;
c |= utf[2] & 0x3f;
}
} else {
*len = 2;
c = (utf[0] & 0x1f) << 6;
c |= utf[1] & 0x3f;
}
} else {
*len = 1;
}
return(c);
error:
if (len != NULL)
*len = 0;
return(-1);
}