20#ifdef LIBXML_REGEXP_ENABLED
36#define SIZE_MAX ((size_t) -1)
39#define MAX_PUSH 10000000
44#define XML_REGEXP_OK 0
45#define XML_REGEXP_NOT_FOUND (-1)
46#define XML_REGEXP_INTERNAL_ERROR (-4)
47#define XML_REGEXP_OUT_OF_MEMORY (-5)
48#define XML_REGEXP_INTERNAL_LIMIT (-6)
49#define XML_REGEXP_INVALID_UTF8 (-7)
55 ctxt->error = XML_REGEXP_COMPILE_ERROR; \
56 xmlRegexpErrCompile(ctxt, str);
57#define NEXT ctxt->cur++
58#define CUR (*(ctxt->cur))
59#define NXT(index) (ctxt->cur[index])
61#define NEXTL(l) ctxt->cur += l;
62#define XML_REG_STRING_SEPARATOR '|'
67#define PREV (ctxt->cur[-1])
75 xmlGenericError(xmlGenericErrorContext, \
76 "Unimplemented block at %s:%d\n", \
89 XML_REGEXP_EPSILON = 1,
98 XML_REGEXP_NOTINITNAME,
100 XML_REGEXP_NOTNAMECHAR,
102 XML_REGEXP_NOTDECIMAL,
104 XML_REGEXP_NOTREALCHAR,
105 XML_REGEXP_LETTER = 100,
106 XML_REGEXP_LETTER_UPPERCASE,
107 XML_REGEXP_LETTER_LOWERCASE,
108 XML_REGEXP_LETTER_TITLECASE,
109 XML_REGEXP_LETTER_MODIFIER,
110 XML_REGEXP_LETTER_OTHERS,
112 XML_REGEXP_MARK_NONSPACING,
113 XML_REGEXP_MARK_SPACECOMBINING,
114 XML_REGEXP_MARK_ENCLOSING,
116 XML_REGEXP_NUMBER_DECIMAL,
117 XML_REGEXP_NUMBER_LETTER,
118 XML_REGEXP_NUMBER_OTHERS,
120 XML_REGEXP_PUNCT_CONNECTOR,
121 XML_REGEXP_PUNCT_DASH,
122 XML_REGEXP_PUNCT_OPEN,
123 XML_REGEXP_PUNCT_CLOSE,
124 XML_REGEXP_PUNCT_INITQUOTE,
125 XML_REGEXP_PUNCT_FINQUOTE,
126 XML_REGEXP_PUNCT_OTHERS,
128 XML_REGEXP_SEPAR_SPACE,
129 XML_REGEXP_SEPAR_LINE,
130 XML_REGEXP_SEPAR_PARA,
132 XML_REGEXP_SYMBOL_MATH,
133 XML_REGEXP_SYMBOL_CURRENCY,
134 XML_REGEXP_SYMBOL_MODIFIER,
135 XML_REGEXP_SYMBOL_OTHERS,
137 XML_REGEXP_OTHER_CONTROL,
138 XML_REGEXP_OTHER_FORMAT,
139 XML_REGEXP_OTHER_PRIVATE,
141 XML_REGEXP_BLOCK_NAME
145 XML_REGEXP_QUANT_EPSILON = 1,
146 XML_REGEXP_QUANT_ONCE,
147 XML_REGEXP_QUANT_OPT,
148 XML_REGEXP_QUANT_MULT,
149 XML_REGEXP_QUANT_PLUS,
150 XML_REGEXP_QUANT_ONCEONLY,
151 XML_REGEXP_QUANT_ALL,
152 XML_REGEXP_QUANT_RANGE
156 XML_REGEXP_START_STATE = 1,
157 XML_REGEXP_FINAL_STATE,
158 XML_REGEXP_TRANS_STATE,
159 XML_REGEXP_SINK_STATE,
160 XML_REGEXP_UNREACH_STATE
164 XML_REGEXP_MARK_NORMAL = 0,
165 XML_REGEXP_MARK_START,
166 XML_REGEXP_MARK_VISITED
169typedef struct _xmlRegRange xmlRegRange;
170typedef xmlRegRange *xmlRegRangePtr;
180typedef struct _xmlRegAtom xmlRegAtom;
181typedef xmlRegAtom *xmlRegAtomPtr;
183typedef struct _xmlAutomataState xmlRegState;
184typedef xmlRegState *xmlRegStatePtr;
189 xmlRegQuantType quant;
197 xmlRegStatePtr
start;
198 xmlRegStatePtr start0;
202 xmlRegRangePtr *ranges;
206typedef struct _xmlRegCounter xmlRegCounter;
207typedef xmlRegCounter *xmlRegCounterPtr;
209struct _xmlRegCounter {
214typedef struct _xmlRegTrans xmlRegTrans;
215typedef xmlRegTrans *xmlRegTransPtr;
225struct _xmlAutomataState {
226 xmlRegStateType
type;
227 xmlRegMarkedType mark;
228 xmlRegMarkedType markd;
229 xmlRegMarkedType reached;
240typedef struct _xmlAutomata xmlRegParserCtxt;
241typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
243#define AM_AUTOMATA_RNG 1
252 xmlRegStatePtr
start;
254 xmlRegStatePtr
state;
260 xmlRegAtomPtr *atoms;
264 xmlRegStatePtr *states;
280 xmlRegStatePtr *states;
282 xmlRegAtomPtr *atoms;
297typedef struct _xmlRegExecRollback xmlRegExecRollback;
298typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
300struct _xmlRegExecRollback {
301 xmlRegStatePtr
state;
307typedef struct _xmlRegInputToken xmlRegInputToken;
308typedef xmlRegInputToken *xmlRegInputTokenPtr;
310struct _xmlRegInputToken {
315struct _xmlRegExecCtxt {
322 xmlRegStatePtr
state;
331 xmlRegExecRollback *rollbacks;
346 xmlRegInputTokenPtr inputStack;
352 xmlRegStatePtr errState;
358#define REGEXP_ALL_COUNTER 0x123456
359#define REGEXP_ALL_LAX_COUNTER 0x123457
361static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt,
int top);
362static void xmlRegFreeState(xmlRegStatePtr
state);
363static void xmlRegFreeAtom(xmlRegAtomPtr atom);
364static int xmlRegStrEqualWildcard(
const xmlChar *expStr,
const xmlChar *valStr);
365static int xmlRegCheckCharacter(xmlRegAtomPtr atom,
int codepoint);
366static int xmlRegCheckCharacterRange(xmlRegAtomType
type,
int codepoint,
381xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt,
const char *
extra)
383 const char *regexp =
NULL;
385 regexp = (
const char *) ctxt->string;
391 "Memory allocation failed : %s\n",
extra);
401xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt,
const char *
extra)
403 const char *regexp =
NULL;
407 regexp = (
const char *) ctxt->string;
408 idx = ctxt->cur - ctxt->string;
414 "failed to compile: %s\n",
extra);
423static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
436xmlRegCalloc2(
size_t dim1,
size_t dim2,
size_t elemSize) {
441 if ((dim2 == 0) || (elemSize == 0) ||
442 (dim1 >
SIZE_MAX / dim2 / elemSize))
444 totalSize = dim1 * dim2 * elemSize;
460xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
465 xmlRegexpErrMemory(ctxt,
"compiling regexp");
469 ret->string = ctxt->string;
470 ret->nbStates = ctxt->nbStates;
471 ret->states = ctxt->states;
472 ret->nbAtoms = ctxt->nbAtoms;
473 ret->atoms = ctxt->atoms;
474 ret->nbCounters = ctxt->nbCounters;
475 ret->counters = ctxt->counters;
476 ret->determinist = ctxt->determinist;
477 ret->flags = ctxt->flags;
478 if (
ret->determinist == -1) {
479 if (xmlRegexpIsDeterminist(
ret) < 0) {
480 xmlRegexpErrMemory(ctxt,
"checking determinism");
486 if ((
ret->determinist != 0) &&
487 (
ret->nbCounters == 0) &&
491 (
ret->atoms[0]->type == XML_REGEXP_STRING)) {
492 int i,
j, nbstates = 0, nbatoms = 0;
509 if (stateRemap ==
NULL) {
510 xmlRegexpErrMemory(ctxt,
"compiling regexp");
514 for (
i = 0;
i <
ret->nbStates;
i++) {
516 stateRemap[
i] = nbstates;
523 if (stringMap ==
NULL) {
524 xmlRegexpErrMemory(ctxt,
"compiling regexp");
530 if (stringRemap ==
NULL) {
531 xmlRegexpErrMemory(ctxt,
"compiling regexp");
537 for (
i = 0;
i <
ret->nbAtoms;
i++) {
538 if ((
ret->atoms[
i]->type == XML_REGEXP_STRING) &&
539 (
ret->atoms[
i]->quant == XML_REGEXP_QUANT_ONCE)) {
541 for (
j = 0;
j < nbatoms;
j++) {
548 stringRemap[
i] = nbatoms;
550 if (stringMap[nbatoms] ==
NULL) {
551 for (
i = 0;
i < nbatoms;
i++)
564 for (
i = 0;
i < nbatoms;
i++)
571 transitions = (
int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
573 if (transitions ==
NULL) {
576 for (
i = 0;
i < nbatoms;
i++)
589 for (
i = 0;
i <
ret->nbStates;
i++) {
590 int stateno, atomno, targetno, prev;
591 xmlRegStatePtr
state;
592 xmlRegTransPtr trans;
594 stateno = stateRemap[
i];
599 transitions[stateno * (nbatoms + 1)] =
state->type;
602 trans = &(
state->trans[
j]);
603 if ((trans->to < 0) || (trans->atom ==
NULL))
605 atomno = stringRemap[trans->atom->no];
606 if ((trans->atom->data !=
NULL) && (transdata ==
NULL)) {
607 transdata = (
void **) xmlRegCalloc2(nbstates, nbatoms,
609 if (transdata ==
NULL) {
610 xmlRegexpErrMemory(ctxt,
"compiling regexp");
614 targetno = stateRemap[trans->to];
620 prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
622 if (prev != targetno + 1) {
623 ret->determinist = 0;
624 if (transdata !=
NULL)
629 for (
i = 0;
i < nbatoms;
i++)
636 printf(
"State %d trans %d: atom %d to %d : %d to %d\n",
637 i,
j, trans->atom->no, trans->to, atomno, targetno);
639 transitions[stateno * (nbatoms + 1) + atomno + 1] =
641 if (transdata !=
NULL)
642 transdata[stateno * nbatoms + atomno] =
647 ret->determinist = 1;
652 for (
i = 0;
i <
ret->nbStates;
i++)
653 xmlRegFreeState(
ret->states[
i]);
659 for (
i = 0;
i <
ret->nbAtoms;
i++)
660 xmlRegFreeAtom(
ret->atoms[
i]);
666 ret->compact = transitions;
667 ret->transdata = transdata;
668 ret->stringMap = stringMap;
669 ret->nbstrings = nbatoms;
670 ret->nbstates = nbstates;
680 ctxt->nbCounters = 0;
681 ctxt->counters =
NULL;
693static xmlRegParserCtxtPtr
694xmlRegNewParserCtxt(
const xmlChar *
string) {
695 xmlRegParserCtxtPtr
ret;
697 ret = (xmlRegParserCtxtPtr)
xmlMalloc(
sizeof(xmlRegParserCtxt));
700 memset(
ret, 0,
sizeof(xmlRegParserCtxt));
707 ret->determinist = -1;
724xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
730 xmlRegexpErrMemory(ctxt,
"allocating range");
747xmlRegFreeRange(xmlRegRangePtr
range) {
765xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr
range) {
778 xmlRegexpErrMemory(ctxt,
"allocating range");
779 xmlRegFreeRange(
ret);
796xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType
type) {
801 xmlRegexpErrMemory(ctxt,
"allocating atom");
806 ret->quant = XML_REGEXP_QUANT_ONCE;
819xmlRegFreeAtom(xmlRegAtomPtr atom) {
825 for (
i = 0;
i < atom->nbRanges;
i++)
826 xmlRegFreeRange(atom->ranges[
i]);
827 if (atom->ranges !=
NULL)
829 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep !=
NULL))
831 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 !=
NULL))
833 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep !=
NULL))
848xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
853 xmlRegexpErrMemory(ctxt,
"copying atom");
857 ret->type = atom->type;
858 ret->quant = atom->quant;
859 ret->min = atom->min;
860 ret->max = atom->max;
861 if (atom->nbRanges > 0) {
864 ret->ranges = (xmlRegRangePtr *)
xmlMalloc(
sizeof(xmlRegRangePtr) *
867 xmlRegexpErrMemory(ctxt,
"copying atom");
870 for (
i = 0;
i < atom->nbRanges;
i++) {
871 ret->ranges[
i] = xmlRegCopyRange(ctxt, atom->ranges[
i]);
874 ret->nbRanges =
i + 1;
885xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
890 xmlRegexpErrMemory(ctxt,
"allocating state");
894 ret->type = XML_REGEXP_TRANS_STATE;
895 ret->mark = XML_REGEXP_MARK_NORMAL;
906xmlRegFreeState(xmlRegStatePtr
state) {
924xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
929 if (ctxt->string !=
NULL)
931 if (ctxt->states !=
NULL) {
932 for (
i = 0;
i < ctxt->nbStates;
i++)
933 xmlRegFreeState(ctxt->states[
i]);
936 if (ctxt->atoms !=
NULL) {
937 for (
i = 0;
i < ctxt->nbAtoms;
i++)
938 xmlRegFreeAtom(ctxt->atoms[
i]);
941 if (ctxt->counters !=
NULL)
953xmlRegPrintAtomType(
FILE *output, xmlRegAtomType
type) {
955 case XML_REGEXP_EPSILON:
956 fprintf(output,
"epsilon ");
break;
957 case XML_REGEXP_CHARVAL:
958 fprintf(output,
"charval ");
break;
959 case XML_REGEXP_RANGES:
960 fprintf(output,
"ranges ");
break;
961 case XML_REGEXP_SUBREG:
962 fprintf(output,
"subexpr ");
break;
963 case XML_REGEXP_STRING:
964 fprintf(output,
"string ");
break;
965 case XML_REGEXP_ANYCHAR:
966 fprintf(output,
"anychar ");
break;
967 case XML_REGEXP_ANYSPACE:
968 fprintf(output,
"anyspace ");
break;
969 case XML_REGEXP_NOTSPACE:
970 fprintf(output,
"notspace ");
break;
971 case XML_REGEXP_INITNAME:
972 fprintf(output,
"initname ");
break;
973 case XML_REGEXP_NOTINITNAME:
974 fprintf(output,
"notinitname ");
break;
975 case XML_REGEXP_NAMECHAR:
976 fprintf(output,
"namechar ");
break;
977 case XML_REGEXP_NOTNAMECHAR:
978 fprintf(output,
"notnamechar ");
break;
979 case XML_REGEXP_DECIMAL:
980 fprintf(output,
"decimal ");
break;
981 case XML_REGEXP_NOTDECIMAL:
982 fprintf(output,
"notdecimal ");
break;
983 case XML_REGEXP_REALCHAR:
984 fprintf(output,
"realchar ");
break;
985 case XML_REGEXP_NOTREALCHAR:
986 fprintf(output,
"notrealchar ");
break;
987 case XML_REGEXP_LETTER:
988 fprintf(output,
"LETTER ");
break;
989 case XML_REGEXP_LETTER_UPPERCASE:
990 fprintf(output,
"LETTER_UPPERCASE ");
break;
991 case XML_REGEXP_LETTER_LOWERCASE:
992 fprintf(output,
"LETTER_LOWERCASE ");
break;
993 case XML_REGEXP_LETTER_TITLECASE:
994 fprintf(output,
"LETTER_TITLECASE ");
break;
995 case XML_REGEXP_LETTER_MODIFIER:
996 fprintf(output,
"LETTER_MODIFIER ");
break;
997 case XML_REGEXP_LETTER_OTHERS:
998 fprintf(output,
"LETTER_OTHERS ");
break;
999 case XML_REGEXP_MARK:
1000 fprintf(output,
"MARK ");
break;
1001 case XML_REGEXP_MARK_NONSPACING:
1002 fprintf(output,
"MARK_NONSPACING ");
break;
1003 case XML_REGEXP_MARK_SPACECOMBINING:
1004 fprintf(output,
"MARK_SPACECOMBINING ");
break;
1005 case XML_REGEXP_MARK_ENCLOSING:
1006 fprintf(output,
"MARK_ENCLOSING ");
break;
1007 case XML_REGEXP_NUMBER:
1008 fprintf(output,
"NUMBER ");
break;
1009 case XML_REGEXP_NUMBER_DECIMAL:
1010 fprintf(output,
"NUMBER_DECIMAL ");
break;
1011 case XML_REGEXP_NUMBER_LETTER:
1012 fprintf(output,
"NUMBER_LETTER ");
break;
1013 case XML_REGEXP_NUMBER_OTHERS:
1014 fprintf(output,
"NUMBER_OTHERS ");
break;
1015 case XML_REGEXP_PUNCT:
1016 fprintf(output,
"PUNCT ");
break;
1017 case XML_REGEXP_PUNCT_CONNECTOR:
1018 fprintf(output,
"PUNCT_CONNECTOR ");
break;
1019 case XML_REGEXP_PUNCT_DASH:
1020 fprintf(output,
"PUNCT_DASH ");
break;
1021 case XML_REGEXP_PUNCT_OPEN:
1022 fprintf(output,
"PUNCT_OPEN ");
break;
1023 case XML_REGEXP_PUNCT_CLOSE:
1024 fprintf(output,
"PUNCT_CLOSE ");
break;
1025 case XML_REGEXP_PUNCT_INITQUOTE:
1026 fprintf(output,
"PUNCT_INITQUOTE ");
break;
1027 case XML_REGEXP_PUNCT_FINQUOTE:
1028 fprintf(output,
"PUNCT_FINQUOTE ");
break;
1029 case XML_REGEXP_PUNCT_OTHERS:
1030 fprintf(output,
"PUNCT_OTHERS ");
break;
1031 case XML_REGEXP_SEPAR:
1032 fprintf(output,
"SEPAR ");
break;
1033 case XML_REGEXP_SEPAR_SPACE:
1034 fprintf(output,
"SEPAR_SPACE ");
break;
1035 case XML_REGEXP_SEPAR_LINE:
1036 fprintf(output,
"SEPAR_LINE ");
break;
1037 case XML_REGEXP_SEPAR_PARA:
1038 fprintf(output,
"SEPAR_PARA ");
break;
1039 case XML_REGEXP_SYMBOL:
1040 fprintf(output,
"SYMBOL ");
break;
1041 case XML_REGEXP_SYMBOL_MATH:
1042 fprintf(output,
"SYMBOL_MATH ");
break;
1043 case XML_REGEXP_SYMBOL_CURRENCY:
1044 fprintf(output,
"SYMBOL_CURRENCY ");
break;
1045 case XML_REGEXP_SYMBOL_MODIFIER:
1046 fprintf(output,
"SYMBOL_MODIFIER ");
break;
1047 case XML_REGEXP_SYMBOL_OTHERS:
1048 fprintf(output,
"SYMBOL_OTHERS ");
break;
1049 case XML_REGEXP_OTHER:
1050 fprintf(output,
"OTHER ");
break;
1051 case XML_REGEXP_OTHER_CONTROL:
1052 fprintf(output,
"OTHER_CONTROL ");
break;
1053 case XML_REGEXP_OTHER_FORMAT:
1054 fprintf(output,
"OTHER_FORMAT ");
break;
1055 case XML_REGEXP_OTHER_PRIVATE:
1056 fprintf(output,
"OTHER_PRIVATE ");
break;
1057 case XML_REGEXP_OTHER_NA:
1058 fprintf(output,
"OTHER_NA ");
break;
1059 case XML_REGEXP_BLOCK_NAME:
1060 fprintf(output,
"BLOCK ");
break;
1065xmlRegPrintQuantType(
FILE *output, xmlRegQuantType
type) {
1067 case XML_REGEXP_QUANT_EPSILON:
1068 fprintf(output,
"epsilon ");
break;
1069 case XML_REGEXP_QUANT_ONCE:
1070 fprintf(output,
"once ");
break;
1071 case XML_REGEXP_QUANT_OPT:
1073 case XML_REGEXP_QUANT_MULT:
1075 case XML_REGEXP_QUANT_PLUS:
1077 case XML_REGEXP_QUANT_RANGE:
1078 fprintf(output,
"range ");
break;
1079 case XML_REGEXP_QUANT_ONCEONLY:
1080 fprintf(output,
"onceonly ");
break;
1081 case XML_REGEXP_QUANT_ALL:
1082 fprintf(output,
"all ");
break;
1086xmlRegPrintRange(
FILE *output, xmlRegRangePtr
range) {
1090 xmlRegPrintAtomType(output,
range->type);
1095xmlRegPrintAtom(
FILE *output, xmlRegAtomPtr atom) {
1103 xmlRegPrintAtomType(output, atom->type);
1104 xmlRegPrintQuantType(output, atom->quant);
1105 if (atom->quant == XML_REGEXP_QUANT_RANGE)
1106 fprintf(output,
"%d-%d ", atom->min, atom->max);
1107 if (atom->type == XML_REGEXP_STRING)
1108 fprintf(output,
"'%s' ", (
char *) atom->valuep);
1109 if (atom->type == XML_REGEXP_CHARVAL)
1110 fprintf(output,
"char %c\n", atom->codepoint);
1111 else if (atom->type == XML_REGEXP_RANGES) {
1113 fprintf(output,
"%d entries\n", atom->nbRanges);
1114 for (
i = 0;
i < atom->nbRanges;
i++)
1115 xmlRegPrintRange(output, atom->ranges[
i]);
1116 }
else if (atom->type == XML_REGEXP_SUBREG) {
1117 fprintf(output,
"start %d end %d\n", atom->start->no, atom->stop->no);
1124xmlRegPrintTrans(
FILE *output, xmlRegTransPtr trans) {
1126 if (trans ==
NULL) {
1130 if (trans->to < 0) {
1134 if (trans->nd != 0) {
1136 fprintf(output,
"last not determinist, ");
1138 fprintf(output,
"not determinist, ");
1140 if (trans->counter >= 0) {
1141 fprintf(output,
"counted %d, ", trans->counter);
1143 if (trans->count == REGEXP_ALL_COUNTER) {
1144 fprintf(output,
"all transition, ");
1145 }
else if (trans->count >= 0) {
1146 fprintf(output,
"count based %d, ", trans->count);
1148 if (trans->atom ==
NULL) {
1149 fprintf(output,
"epsilon to %d\n", trans->to);
1152 if (trans->atom->type == XML_REGEXP_CHARVAL)
1153 fprintf(output,
"char %c ", trans->atom->codepoint);
1154 fprintf(output,
"atom %d, to %d\n", trans->atom->no, trans->to);
1158xmlRegPrintState(
FILE *output, xmlRegStatePtr
state) {
1166 if (
state->type == XML_REGEXP_START_STATE)
1168 if (
state->type == XML_REGEXP_FINAL_STATE)
1172 for (
i = 0;
i <
state->nbTrans;
i++) {
1173 xmlRegPrintTrans(output, &(
state->trans[
i]));
1183static xmlRegRangePtr
1184xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1187 xmlRegRangePtr
range;
1190 ERROR(
"add range: atom is NULL");
1193 if (atom->type != XML_REGEXP_RANGES) {
1194 ERROR(
"add range: atom is not ranges");
1197 if (atom->maxRanges == 0) {
1198 atom->maxRanges = 4;
1199 atom->ranges = (xmlRegRangePtr *)
xmlMalloc(atom->maxRanges *
1200 sizeof(xmlRegRangePtr));
1201 if (atom->ranges ==
NULL) {
1202 xmlRegexpErrMemory(ctxt,
"adding ranges");
1203 atom->maxRanges = 0;
1206 }
else if (atom->nbRanges >= atom->maxRanges) {
1207 xmlRegRangePtr *tmp;
1208 atom->maxRanges *= 2;
1209 tmp = (xmlRegRangePtr *)
xmlRealloc(atom->ranges, atom->maxRanges *
1210 sizeof(xmlRegRangePtr));
1212 xmlRegexpErrMemory(ctxt,
"adding ranges");
1213 atom->maxRanges /= 2;
1221 range->blockName = blockName;
1222 atom->ranges[atom->nbRanges++] =
range;
1228xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1229 if (ctxt->maxCounters == 0) {
1230 ctxt->maxCounters = 4;
1231 ctxt->counters = (xmlRegCounter *)
xmlMalloc(ctxt->maxCounters *
1232 sizeof(xmlRegCounter));
1233 if (ctxt->counters ==
NULL) {
1234 xmlRegexpErrMemory(ctxt,
"allocating counter");
1235 ctxt->maxCounters = 0;
1238 }
else if (ctxt->nbCounters >= ctxt->maxCounters) {
1240 ctxt->maxCounters *= 2;
1241 tmp = (xmlRegCounter *)
xmlRealloc(ctxt->counters, ctxt->maxCounters *
1242 sizeof(xmlRegCounter));
1244 xmlRegexpErrMemory(ctxt,
"allocating counter");
1245 ctxt->maxCounters /= 2;
1248 ctxt->counters = tmp;
1250 ctxt->counters[ctxt->nbCounters].min = -1;
1251 ctxt->counters[ctxt->nbCounters].max = -1;
1252 return(ctxt->nbCounters++);
1256xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1258 ERROR(
"atom push: atom is NULL");
1261 if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1262 size_t newSize = ctxt->maxAtoms ? ctxt->maxAtoms * 2 : 4;
1265 tmp =
xmlRealloc(ctxt->atoms, newSize *
sizeof(xmlRegAtomPtr));
1267 xmlRegexpErrMemory(ctxt,
"allocating counter");
1271 ctxt->maxAtoms = newSize;
1273 atom->no = ctxt->nbAtoms;
1274 ctxt->atoms[ctxt->nbAtoms++] = atom;
1279xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
target,
1281 if (
target->maxTransTo == 0) {
1286 xmlRegexpErrMemory(ctxt,
"adding transition");
1296 xmlRegexpErrMemory(ctxt,
"adding transition");
1307xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state,
1308 xmlRegAtomPtr atom, xmlRegStatePtr
target,
1314 ERROR(
"add state: state is NULL");
1318 ERROR(
"add state: target is NULL");
1327 for (nrtrans =
state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1328 xmlRegTransPtr trans = &(
state->trans[nrtrans]);
1329 if ((trans->atom == atom) &&
1330 (trans->to ==
target->no) &&
1331 (trans->counter ==
counter) &&
1332 (trans->count ==
count)) {
1337 if (
state->maxTrans == 0) {
1338 state->maxTrans = 8;
1340 sizeof(xmlRegTrans));
1342 xmlRegexpErrMemory(ctxt,
"adding transition");
1343 state->maxTrans = 0;
1346 }
else if (
state->nbTrans >=
state->maxTrans) {
1348 state->maxTrans *= 2;
1350 sizeof(xmlRegTrans));
1352 xmlRegexpErrMemory(ctxt,
"adding transition");
1353 state->maxTrans /= 2;
1368static xmlRegStatePtr
1369xmlRegStatePush(xmlRegParserCtxtPtr ctxt) {
1370 xmlRegStatePtr
state;
1372 if (ctxt->nbStates >= ctxt->maxStates) {
1373 size_t newSize = ctxt->maxStates ? ctxt->maxStates * 2 : 4;
1374 xmlRegStatePtr *tmp;
1376 tmp =
xmlRealloc(ctxt->states, newSize *
sizeof(tmp[0]));
1378 xmlRegexpErrMemory(ctxt,
"adding state");
1382 ctxt->maxStates = newSize;
1385 state = xmlRegNewState(ctxt);
1389 state->no = ctxt->nbStates;
1390 ctxt->states[ctxt->nbStates++] =
state;
1404xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1405 xmlRegStatePtr
from, xmlRegStatePtr to,
1408 to = xmlRegStatePush(ctxt);
1414 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1416 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, REGEXP_ALL_COUNTER);
1428xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1429 xmlRegStatePtr
from, xmlRegStatePtr to) {
1431 to = xmlRegStatePush(ctxt);
1436 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, -1);
1449xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1450 xmlRegStatePtr
from, xmlRegStatePtr to,
int counter) {
1452 to = xmlRegStatePush(ctxt);
1470xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1471 xmlRegStatePtr
from, xmlRegStatePtr to,
int counter) {
1473 to = xmlRegStatePush(ctxt);
1492xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
from,
1493 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1498 ERROR(
"generate transition: atom == NULL");
1501 if (atom->type == XML_REGEXP_SUBREG) {
1506 if ((to !=
NULL) && (atom->stop != to) &&
1507 (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1511 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1513 }
else if ((to ==
NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1514 (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1515 to = xmlRegStatePush(ctxt, to);
1519 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1522 switch (atom->quant) {
1523 case XML_REGEXP_QUANT_OPT:
1524 atom->quant = XML_REGEXP_QUANT_ONCE;
1531 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1532 xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1535 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1538 case XML_REGEXP_QUANT_MULT:
1539 atom->quant = XML_REGEXP_QUANT_ONCE;
1540 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1541 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1543 case XML_REGEXP_QUANT_PLUS:
1544 atom->quant = XML_REGEXP_QUANT_ONCE;
1545 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1547 case XML_REGEXP_QUANT_RANGE: {
1549 xmlRegStatePtr inter, newstate;
1557 newstate = xmlRegStatePush(ctxt);
1558 if (newstate ==
NULL)
1568 if ((atom->min == 0) && (atom->start0 ==
NULL)) {
1579 copy = xmlRegCopyAtom(ctxt, atom);
1582 copy->quant = XML_REGEXP_QUANT_ONCE;
1586 if (xmlFAGenerateTransitions(ctxt, atom->start,
NULL,
copy)
1588 xmlRegFreeAtom(
copy);
1591 inter = ctxt->state;
1592 counter = xmlRegGetCounter(ctxt);
1595 ctxt->counters[
counter].min = atom->min - 1;
1596 ctxt->counters[
counter].max = atom->max - 1;
1598 xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1601 xmlFAGenerateCountedTransition(ctxt, inter,
1604 xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1612 counter = xmlRegGetCounter(ctxt);
1615 ctxt->counters[
counter].min = atom->min - 1;
1616 ctxt->counters[
counter].max = atom->max - 1;
1618 xmlFAGenerateCountedTransition(ctxt, atom->stop,
1621 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1625 xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1631 atom->quant = XML_REGEXP_QUANT_ONCE;
1632 ctxt->state = newstate;
1637 if (xmlRegAtomPush(ctxt, atom) < 0)
1641 if ((atom->min == 0) && (atom->max == 0) &&
1642 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1647 to = xmlRegStatePush(ctxt);
1651 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1653 xmlRegFreeAtom(atom);
1657 to = xmlRegStatePush(ctxt);
1662 if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1663 (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1671 tmp = xmlRegStatePush(ctxt);
1674 xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1677 if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1678 (atom->min == 0) && (atom->max > 0)) {
1682 atom->quant = XML_REGEXP_QUANT_OPT;
1684 xmlRegStateAddTrans(ctxt,
from, atom, to, -1, -1);
1686 switch (atom->quant) {
1687 case XML_REGEXP_QUANT_OPT:
1688 atom->quant = XML_REGEXP_QUANT_ONCE;
1689 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1691 case XML_REGEXP_QUANT_MULT:
1692 atom->quant = XML_REGEXP_QUANT_ONCE;
1693 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1694 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1696 case XML_REGEXP_QUANT_PLUS:
1697 atom->quant = XML_REGEXP_QUANT_ONCE;
1698 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1700 case XML_REGEXP_QUANT_RANGE:
1702 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1707 if (xmlRegAtomPush(ctxt, atom) < 0)
1721xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt,
int fromnr,
1724 xmlRegStatePtr
from;
1727 from = ctxt->states[fromnr];
1730 to = ctxt->states[tonr];
1733 if ((to->mark == XML_REGEXP_MARK_START) ||
1734 (to->mark == XML_REGEXP_MARK_VISITED))
1737 to->mark = XML_REGEXP_MARK_VISITED;
1738 if (to->type == XML_REGEXP_FINAL_STATE) {
1739 from->type = XML_REGEXP_FINAL_STATE;
1741 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1742 xmlRegTransPtr t1 = &to->trans[transnr];
1747 if (t1->counter >= 0) {
1749 tcounter = t1->counter;
1753 if (t1->atom ==
NULL) {
1758 if (t1->to != fromnr) {
1759 if (t1->count >= 0) {
1760 xmlRegStateAddTrans(ctxt,
from,
NULL, ctxt->states[t1->to],
1763 xmlFAReduceEpsilonTransitions(ctxt, fromnr, t1->to,
1768 xmlRegStateAddTrans(ctxt,
from, t1->atom,
1769 ctxt->states[t1->to], tcounter, -1);
1783xmlFAFinishReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt,
int tonr) {
1787 to = ctxt->states[tonr];
1790 if ((to->mark == XML_REGEXP_MARK_START) ||
1791 (to->mark == XML_REGEXP_MARK_NORMAL))
1794 to->mark = XML_REGEXP_MARK_NORMAL;
1795 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1796 xmlRegTransPtr t1 = &to->trans[transnr];
1797 if ((t1->to >= 0) && (t1->atom ==
NULL))
1798 xmlFAFinishReduceEpsilonTransitions(ctxt, t1->to);
1824xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1825 int statenr,
i,
j, newto;
1826 xmlRegStatePtr
state, tmp;
1828 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1829 state = ctxt->states[statenr];
1832 if (
state->nbTrans != 1)
1834 if (
state->type == XML_REGEXP_UNREACH_STATE ||
1835 state->type == XML_REGEXP_FINAL_STATE)
1839 (
state->trans[0].to >= 0) &&
1840 (
state->trans[0].to != statenr) &&
1841 (
state->trans[0].counter < 0) &&
1842 (
state->trans[0].count < 0)) {
1843 newto =
state->trans[0].to;
1845 if (
state->type == XML_REGEXP_START_STATE) {
1847 for (
i = 0;
i <
state->nbTransTo;
i++) {
1848 tmp = ctxt->states[
state->transTo[
i]];
1849 for (
j = 0;
j < tmp->nbTrans;
j++) {
1850 if (tmp->trans[
j].to == statenr) {
1851 tmp->trans[
j].to = -1;
1852 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[
j].atom,
1853 ctxt->states[newto],
1854 tmp->trans[
j].counter,
1855 tmp->trans[
j].count);
1859 if (
state->type == XML_REGEXP_FINAL_STATE)
1860 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1864 state->type = XML_REGEXP_UNREACH_STATE;
1877xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1878 int statenr, transnr;
1879 xmlRegStatePtr
state;
1882 if (ctxt->states ==
NULL)
return;
1888 xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1889 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1890 state = ctxt->states[statenr];
1891 if ((
state !=
NULL) && (
state->type == XML_REGEXP_UNREACH_STATE)) {
1892 xmlRegFreeState(
state);
1893 ctxt->states[statenr] =
NULL;
1907 for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1908 state = ctxt->states[statenr];
1911 if ((
state->nbTrans == 0) &&
1912 (
state->type != XML_REGEXP_FINAL_STATE)) {
1913 state->type = XML_REGEXP_SINK_STATE;
1915 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
1916 if ((
state->trans[transnr].atom ==
NULL) &&
1917 (
state->trans[transnr].to >= 0)) {
1918 if (
state->trans[transnr].to == statenr) {
1919 state->trans[transnr].to = -1;
1920 }
else if (
state->trans[transnr].count < 0) {
1921 int newto =
state->trans[transnr].to;
1924 state->trans[transnr].to = -2;
1925 state->mark = XML_REGEXP_MARK_START;
1926 xmlFAReduceEpsilonTransitions(ctxt, statenr,
1927 newto,
state->trans[transnr].counter);
1928 xmlFAFinishReduceEpsilonTransitions(ctxt, newto);
1929 state->mark = XML_REGEXP_MARK_NORMAL;
1938 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1939 state = ctxt->states[statenr];
1942 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
1943 xmlRegTransPtr trans = &(
state->trans[transnr]);
1944 if ((trans->atom ==
NULL) &&
1945 (trans->count < 0) &&
1956 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1957 state = ctxt->states[statenr];
1959 state->reached = XML_REGEXP_MARK_NORMAL;
1961 state = ctxt->states[0];
1963 state->reached = XML_REGEXP_MARK_START;
1966 state->reached = XML_REGEXP_MARK_VISITED;
1970 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
1971 if ((
state->trans[transnr].to >= 0) &&
1972 ((
state->trans[transnr].atom !=
NULL) ||
1973 (
state->trans[transnr].count >= 0))) {
1974 int newto =
state->trans[transnr].to;
1976 if (ctxt->states[newto] ==
NULL)
1978 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
1979 ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
1980 target = ctxt->states[newto];
1989 for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1990 state = ctxt->states[statenr];
1992 XML_REGEXP_MARK_START)) {
2000 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2001 state = ctxt->states[statenr];
2002 if ((
state !=
NULL) && (
state->reached == XML_REGEXP_MARK_NORMAL)) {
2003 xmlRegFreeState(
state);
2004 ctxt->states[statenr] =
NULL;
2011xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2014 if ((range1->type == XML_REGEXP_RANGES) ||
2015 (range2->type == XML_REGEXP_RANGES) ||
2016 (range2->type == XML_REGEXP_SUBREG) ||
2017 (range1->type == XML_REGEXP_SUBREG) ||
2018 (range1->type == XML_REGEXP_STRING) ||
2019 (range2->type == XML_REGEXP_STRING))
2023 if (range1->type > range2->type) {
2030 if ((range1->type == XML_REGEXP_ANYCHAR) ||
2031 (range2->type == XML_REGEXP_ANYCHAR)) {
2033 }
else if ((range1->type == XML_REGEXP_EPSILON) ||
2034 (range2->type == XML_REGEXP_EPSILON)) {
2036 }
else if (range1->type == range2->type) {
2037 if (range1->type != XML_REGEXP_CHARVAL)
2039 else if ((range1->end < range2->start) ||
2040 (range2->end < range1->start))
2044 }
else if (range1->type == XML_REGEXP_CHARVAL) {
2054 if (((range1->neg == 0) && (range2->neg != 0)) ||
2055 ((range1->neg != 0) && (range2->neg == 0)))
2058 for (codepoint = range1->start;codepoint <= range1->
end ;codepoint++) {
2059 ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2060 0, range2->start, range2->end,
2064 if (((neg == 1) && (
ret == 0)) ||
2065 ((neg == 0) && (
ret == 1)))
2069 }
else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2070 (range2->type == XML_REGEXP_BLOCK_NAME)) {
2071 if (range1->type == range2->type) {
2082 }
else if ((range1->type < XML_REGEXP_LETTER) ||
2083 (range2->type < XML_REGEXP_LETTER)) {
2084 if ((range1->type == XML_REGEXP_ANYSPACE) &&
2085 (range2->type == XML_REGEXP_NOTSPACE))
2087 else if ((range1->type == XML_REGEXP_INITNAME) &&
2088 (range2->type == XML_REGEXP_NOTINITNAME))
2090 else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2091 (range2->type == XML_REGEXP_NOTNAMECHAR))
2093 else if ((range1->type == XML_REGEXP_DECIMAL) &&
2094 (range2->type == XML_REGEXP_NOTDECIMAL))
2096 else if ((range1->type == XML_REGEXP_REALCHAR) &&
2097 (range2->type == XML_REGEXP_NOTREALCHAR))
2106 switch (range1->type) {
2107 case XML_REGEXP_LETTER:
2109 if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2110 (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2111 (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2112 (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2113 (range2->type == XML_REGEXP_LETTER_OTHERS))
2116 case XML_REGEXP_MARK:
2117 if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2118 (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2119 (range2->type == XML_REGEXP_MARK_ENCLOSING))
2122 case XML_REGEXP_NUMBER:
2123 if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2124 (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2125 (range2->type == XML_REGEXP_NUMBER_OTHERS))
2128 case XML_REGEXP_PUNCT:
2129 if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2130 (range2->type == XML_REGEXP_PUNCT_DASH) ||
2131 (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2132 (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2133 (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2134 (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2135 (range2->type == XML_REGEXP_PUNCT_OTHERS))
2138 case XML_REGEXP_SEPAR:
2139 if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2140 (range2->type == XML_REGEXP_SEPAR_LINE) ||
2141 (range2->type == XML_REGEXP_SEPAR_PARA))
2144 case XML_REGEXP_SYMBOL:
2145 if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2146 (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2147 (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2148 (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2151 case XML_REGEXP_OTHER:
2152 if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2153 (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2154 (range2->type == XML_REGEXP_OTHER_PRIVATE))
2158 if ((range2->type >= XML_REGEXP_LETTER) &&
2159 (range2->type < XML_REGEXP_BLOCK_NAME))
2167 if (((range1->neg == 0) && (range2->neg != 0)) ||
2168 ((range1->neg != 0) && (range2->neg == 0)))
2184xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2185 if ((type1 == XML_REGEXP_EPSILON) ||
2186 (type1 == XML_REGEXP_CHARVAL) ||
2187 (type1 == XML_REGEXP_RANGES) ||
2188 (type1 == XML_REGEXP_SUBREG) ||
2189 (type1 == XML_REGEXP_STRING) ||
2190 (type1 == XML_REGEXP_ANYCHAR))
2192 if ((type2 == XML_REGEXP_EPSILON) ||
2193 (type2 == XML_REGEXP_CHARVAL) ||
2194 (type2 == XML_REGEXP_RANGES) ||
2195 (type2 == XML_REGEXP_SUBREG) ||
2196 (type2 == XML_REGEXP_STRING) ||
2197 (type2 == XML_REGEXP_ANYCHAR))
2200 if (type1 == type2)
return(1);
2203 if (type1 > type2) {
2204 xmlRegAtomType tmp = type1;
2209 case XML_REGEXP_ANYSPACE:
2211 if ((type2 == XML_REGEXP_NOTSPACE) ||
2212 ((type2 >= XML_REGEXP_LETTER) &&
2213 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2214 ((type2 >= XML_REGEXP_NUMBER) &&
2215 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2216 ((type2 >= XML_REGEXP_MARK) &&
2217 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2218 ((type2 >= XML_REGEXP_PUNCT) &&
2219 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2220 ((type2 >= XML_REGEXP_SYMBOL) &&
2221 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2224 case XML_REGEXP_NOTSPACE:
2226 case XML_REGEXP_INITNAME:
2228 if ((type2 == XML_REGEXP_NOTINITNAME) ||
2229 ((type2 >= XML_REGEXP_NUMBER) &&
2230 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2231 ((type2 >= XML_REGEXP_MARK) &&
2232 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2233 ((type2 >= XML_REGEXP_SEPAR) &&
2234 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2235 ((type2 >= XML_REGEXP_PUNCT) &&
2236 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2237 ((type2 >= XML_REGEXP_SYMBOL) &&
2238 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2239 ((type2 >= XML_REGEXP_OTHER) &&
2240 (type2 <= XML_REGEXP_OTHER_NA))
2243 case XML_REGEXP_NOTINITNAME:
2245 case XML_REGEXP_NAMECHAR:
2247 if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2248 ((type2 >= XML_REGEXP_MARK) &&
2249 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2250 ((type2 >= XML_REGEXP_PUNCT) &&
2251 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2252 ((type2 >= XML_REGEXP_SEPAR) &&
2253 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2254 ((type2 >= XML_REGEXP_SYMBOL) &&
2255 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2256 ((type2 >= XML_REGEXP_OTHER) &&
2257 (type2 <= XML_REGEXP_OTHER_NA))
2260 case XML_REGEXP_NOTNAMECHAR:
2262 case XML_REGEXP_DECIMAL:
2264 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2265 (type2 == XML_REGEXP_REALCHAR) ||
2266 ((type2 >= XML_REGEXP_LETTER) &&
2267 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2268 ((type2 >= XML_REGEXP_MARK) &&
2269 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2270 ((type2 >= XML_REGEXP_PUNCT) &&
2271 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2272 ((type2 >= XML_REGEXP_SEPAR) &&
2273 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2274 ((type2 >= XML_REGEXP_SYMBOL) &&
2275 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2276 ((type2 >= XML_REGEXP_OTHER) &&
2277 (type2 <= XML_REGEXP_OTHER_NA))
2280 case XML_REGEXP_NOTDECIMAL:
2282 case XML_REGEXP_REALCHAR:
2284 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2285 ((type2 >= XML_REGEXP_MARK) &&
2286 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2287 ((type2 >= XML_REGEXP_PUNCT) &&
2288 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2289 ((type2 >= XML_REGEXP_SEPAR) &&
2290 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2291 ((type2 >= XML_REGEXP_SYMBOL) &&
2292 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2293 ((type2 >= XML_REGEXP_OTHER) &&
2294 (type2 <= XML_REGEXP_OTHER_NA))
2297 case XML_REGEXP_NOTREALCHAR:
2304 case XML_REGEXP_LETTER:
2305 if (type2 <= XML_REGEXP_LETTER_OTHERS)
2308 case XML_REGEXP_LETTER_UPPERCASE:
2309 case XML_REGEXP_LETTER_LOWERCASE:
2310 case XML_REGEXP_LETTER_TITLECASE:
2311 case XML_REGEXP_LETTER_MODIFIER:
2312 case XML_REGEXP_LETTER_OTHERS:
2314 case XML_REGEXP_MARK:
2315 if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2318 case XML_REGEXP_MARK_NONSPACING:
2319 case XML_REGEXP_MARK_SPACECOMBINING:
2320 case XML_REGEXP_MARK_ENCLOSING:
2322 case XML_REGEXP_NUMBER:
2323 if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2326 case XML_REGEXP_NUMBER_DECIMAL:
2327 case XML_REGEXP_NUMBER_LETTER:
2328 case XML_REGEXP_NUMBER_OTHERS:
2330 case XML_REGEXP_PUNCT:
2331 if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2334 case XML_REGEXP_PUNCT_CONNECTOR:
2335 case XML_REGEXP_PUNCT_DASH:
2336 case XML_REGEXP_PUNCT_OPEN:
2337 case XML_REGEXP_PUNCT_CLOSE:
2338 case XML_REGEXP_PUNCT_INITQUOTE:
2339 case XML_REGEXP_PUNCT_FINQUOTE:
2340 case XML_REGEXP_PUNCT_OTHERS:
2342 case XML_REGEXP_SEPAR:
2343 if (type2 <= XML_REGEXP_SEPAR_PARA)
2346 case XML_REGEXP_SEPAR_SPACE:
2347 case XML_REGEXP_SEPAR_LINE:
2348 case XML_REGEXP_SEPAR_PARA:
2350 case XML_REGEXP_SYMBOL:
2351 if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2354 case XML_REGEXP_SYMBOL_MATH:
2355 case XML_REGEXP_SYMBOL_CURRENCY:
2356 case XML_REGEXP_SYMBOL_MODIFIER:
2357 case XML_REGEXP_SYMBOL_OTHERS:
2359 case XML_REGEXP_OTHER:
2360 if (type2 <= XML_REGEXP_OTHER_NA)
2363 case XML_REGEXP_OTHER_CONTROL:
2364 case XML_REGEXP_OTHER_FORMAT:
2365 case XML_REGEXP_OTHER_PRIVATE:
2366 case XML_REGEXP_OTHER_NA:
2386xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2,
int deep) {
2391 if ((atom1 ==
NULL) || (atom2 ==
NULL))
2394 if (atom1->type != atom2->type)
2396 switch (atom1->type) {
2397 case XML_REGEXP_EPSILON:
2400 case XML_REGEXP_STRING:
2402 ret = (atom1->valuep == atom2->valuep);
2407 case XML_REGEXP_CHARVAL:
2408 ret = (atom1->codepoint == atom2->codepoint);
2410 case XML_REGEXP_RANGES:
2431xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2,
int deep) {
2436 if ((atom1 ==
NULL) || (atom2 ==
NULL))
2439 if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2440 (atom2->type == XML_REGEXP_ANYCHAR))
2443 if (atom1->type > atom2->type) {
2449 if (atom1->type != atom2->type) {
2450 ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2455 switch (atom1->type) {
2456 case XML_REGEXP_STRING:
2458 ret = (atom1->valuep != atom2->valuep);
2466 if (compound1 != compound2)
2469 ret = xmlRegStrEqualWildcard(val1, val2);
2472 case XML_REGEXP_EPSILON:
2473 goto not_determinist;
2474 case XML_REGEXP_CHARVAL:
2475 if (atom2->type == XML_REGEXP_CHARVAL) {
2476 ret = (atom1->codepoint == atom2->codepoint);
2478 ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2483 case XML_REGEXP_RANGES:
2484 if (atom2->type == XML_REGEXP_RANGES) {
2486 xmlRegRangePtr
r1,
r2;
2491 for (
i = 0;
i < atom1->nbRanges;
i++) {
2492 for (
j = 0;
j < atom2->nbRanges;
j++) {
2493 r1 = atom1->ranges[
i];
2494 r2 = atom2->ranges[
j];
2495 res = xmlFACompareRanges(
r1,
r2);
2506 goto not_determinist;
2509 if (atom1->neg != atom2->neg) {
2527xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state,
2528 int fromnr,
int tonr, xmlRegAtomPtr atom) {
2531 int transnr, nbTrans;
2537 if (
state->markd == XML_REGEXP_MARK_VISITED)
2540 if (ctxt->flags & AM_AUTOMATA_RNG)
2547 nbTrans =
state->nbTrans;
2548 for (transnr = 0;transnr < nbTrans;transnr++) {
2549 t1 = &(
state->trans[transnr]);
2553 if ((t1->to < 0) || (t1->to == fromnr))
2555 if (t1->atom ==
NULL) {
2556 state->markd = XML_REGEXP_MARK_VISITED;
2557 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2558 fromnr, tonr, atom);
2565 if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2567 if ((t1->to != tonr) ||
2568 (!xmlFAEqualAtoms(t1->atom, atom, deep)))
2584xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state) {
2585 int transnr, nbTrans;
2589 if (
state->markd != XML_REGEXP_MARK_VISITED)
2593 nbTrans =
state->nbTrans;
2594 for (transnr = 0; transnr < nbTrans; transnr++) {
2595 xmlRegTransPtr t1 = &
state->trans[transnr];
2596 if ((t1->atom ==
NULL) && (t1->to >= 0))
2597 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2610xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2611 int statenr, transnr;
2612 xmlRegStatePtr
state;
2613 xmlRegTransPtr t1, t2,
last;
2618 if (ctxt->determinist != -1)
2619 return(ctxt->determinist);
2621 if (ctxt->flags & AM_AUTOMATA_RNG)
2627 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2628 state = ctxt->states[statenr];
2631 if (
state->nbTrans < 2)
2633 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2634 t1 = &(
state->trans[transnr]);
2639 if (t1->atom ==
NULL) {
2645 for (
i = 0;
i < transnr;
i++) {
2649 if (t2->atom !=
NULL) {
2650 if (t1->to == t2->to) {
2655 if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2656 (t1->counter == t2->counter) &&
2657 (t1->count == t2->count))
2669 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2670 state = ctxt->states[statenr];
2673 if (
state->nbTrans < 2)
2676 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2677 t1 = &(
state->trans[transnr]);
2682 if (t1->atom ==
NULL) {
2687 for (
i = 0;
i < transnr;
i++) {
2691 if (t2->atom !=
NULL) {
2696 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2701 if ((t1->to != t2->to) ||
2702 (t1->counter == t2->counter) ||
2703 (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
2717 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
2718 statenr, t1->to, t1->atom);
2719 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
2754 ctxt->determinist =
ret;
2765xmlRegCheckCharacterRange(xmlRegAtomType
type,
int codepoint,
int neg,
2770 case XML_REGEXP_STRING:
2771 case XML_REGEXP_SUBREG:
2772 case XML_REGEXP_RANGES:
2773 case XML_REGEXP_EPSILON:
2775 case XML_REGEXP_ANYCHAR:
2776 ret = ((codepoint !=
'\n') && (codepoint !=
'\r'));
2778 case XML_REGEXP_CHARVAL:
2779 ret = ((codepoint >=
start) && (codepoint <=
end));
2781 case XML_REGEXP_NOTSPACE:
2784 case XML_REGEXP_ANYSPACE:
2785 ret = ((codepoint ==
'\n') || (codepoint ==
'\r') ||
2786 (codepoint ==
'\t') || (codepoint ==
' '));
2788 case XML_REGEXP_NOTINITNAME:
2791 case XML_REGEXP_INITNAME:
2793 (codepoint ==
'_') || (codepoint ==
':'));
2795 case XML_REGEXP_NOTNAMECHAR:
2798 case XML_REGEXP_NAMECHAR:
2800 (codepoint ==
'.') || (codepoint ==
'-') ||
2801 (codepoint ==
'_') || (codepoint ==
':') ||
2804 case XML_REGEXP_NOTDECIMAL:
2807 case XML_REGEXP_DECIMAL:
2808 ret = xmlUCSIsCatNd(codepoint);
2810 case XML_REGEXP_REALCHAR:
2813 case XML_REGEXP_NOTREALCHAR:
2814 ret = xmlUCSIsCatP(codepoint);
2816 ret = xmlUCSIsCatZ(codepoint);
2818 ret = xmlUCSIsCatC(codepoint);
2820 case XML_REGEXP_LETTER:
2821 ret = xmlUCSIsCatL(codepoint);
2823 case XML_REGEXP_LETTER_UPPERCASE:
2824 ret = xmlUCSIsCatLu(codepoint);
2826 case XML_REGEXP_LETTER_LOWERCASE:
2827 ret = xmlUCSIsCatLl(codepoint);
2829 case XML_REGEXP_LETTER_TITLECASE:
2830 ret = xmlUCSIsCatLt(codepoint);
2832 case XML_REGEXP_LETTER_MODIFIER:
2833 ret = xmlUCSIsCatLm(codepoint);
2835 case XML_REGEXP_LETTER_OTHERS:
2836 ret = xmlUCSIsCatLo(codepoint);
2838 case XML_REGEXP_MARK:
2839 ret = xmlUCSIsCatM(codepoint);
2841 case XML_REGEXP_MARK_NONSPACING:
2842 ret = xmlUCSIsCatMn(codepoint);
2844 case XML_REGEXP_MARK_SPACECOMBINING:
2845 ret = xmlUCSIsCatMc(codepoint);
2847 case XML_REGEXP_MARK_ENCLOSING:
2848 ret = xmlUCSIsCatMe(codepoint);
2850 case XML_REGEXP_NUMBER:
2851 ret = xmlUCSIsCatN(codepoint);
2853 case XML_REGEXP_NUMBER_DECIMAL:
2854 ret = xmlUCSIsCatNd(codepoint);
2856 case XML_REGEXP_NUMBER_LETTER:
2857 ret = xmlUCSIsCatNl(codepoint);
2859 case XML_REGEXP_NUMBER_OTHERS:
2860 ret = xmlUCSIsCatNo(codepoint);
2862 case XML_REGEXP_PUNCT:
2863 ret = xmlUCSIsCatP(codepoint);
2865 case XML_REGEXP_PUNCT_CONNECTOR:
2866 ret = xmlUCSIsCatPc(codepoint);
2868 case XML_REGEXP_PUNCT_DASH:
2869 ret = xmlUCSIsCatPd(codepoint);
2871 case XML_REGEXP_PUNCT_OPEN:
2872 ret = xmlUCSIsCatPs(codepoint);
2874 case XML_REGEXP_PUNCT_CLOSE:
2875 ret = xmlUCSIsCatPe(codepoint);
2877 case XML_REGEXP_PUNCT_INITQUOTE:
2878 ret = xmlUCSIsCatPi(codepoint);
2880 case XML_REGEXP_PUNCT_FINQUOTE:
2881 ret = xmlUCSIsCatPf(codepoint);
2883 case XML_REGEXP_PUNCT_OTHERS:
2884 ret = xmlUCSIsCatPo(codepoint);
2886 case XML_REGEXP_SEPAR:
2887 ret = xmlUCSIsCatZ(codepoint);
2889 case XML_REGEXP_SEPAR_SPACE:
2890 ret = xmlUCSIsCatZs(codepoint);
2892 case XML_REGEXP_SEPAR_LINE:
2893 ret = xmlUCSIsCatZl(codepoint);
2895 case XML_REGEXP_SEPAR_PARA:
2896 ret = xmlUCSIsCatZp(codepoint);
2898 case XML_REGEXP_SYMBOL:
2899 ret = xmlUCSIsCatS(codepoint);
2901 case XML_REGEXP_SYMBOL_MATH:
2902 ret = xmlUCSIsCatSm(codepoint);
2904 case XML_REGEXP_SYMBOL_CURRENCY:
2905 ret = xmlUCSIsCatSc(codepoint);
2907 case XML_REGEXP_SYMBOL_MODIFIER:
2908 ret = xmlUCSIsCatSk(codepoint);
2910 case XML_REGEXP_SYMBOL_OTHERS:
2911 ret = xmlUCSIsCatSo(codepoint);
2913 case XML_REGEXP_OTHER:
2914 ret = xmlUCSIsCatC(codepoint);
2916 case XML_REGEXP_OTHER_CONTROL:
2917 ret = xmlUCSIsCatCc(codepoint);
2919 case XML_REGEXP_OTHER_FORMAT:
2920 ret = xmlUCSIsCatCf(codepoint);
2922 case XML_REGEXP_OTHER_PRIVATE:
2923 ret = xmlUCSIsCatCo(codepoint);
2925 case XML_REGEXP_OTHER_NA:
2930 case XML_REGEXP_BLOCK_NAME:
2931 ret = xmlUCSIsBlock(codepoint, (
const char *) blockName);
2940xmlRegCheckCharacter(xmlRegAtomPtr atom,
int codepoint) {
2942 xmlRegRangePtr
range;
2947 switch (atom->type) {
2948 case XML_REGEXP_SUBREG:
2949 case XML_REGEXP_EPSILON:
2951 case XML_REGEXP_CHARVAL:
2952 return(codepoint == atom->codepoint);
2953 case XML_REGEXP_RANGES: {
2956 for (
i = 0;
i < atom->nbRanges;
i++) {
2958 if (
range->neg == 2) {
2959 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
2964 }
else if (
range->neg) {
2965 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
2973 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
2982 case XML_REGEXP_STRING:
2983 printf(
"TODO: XML_REGEXP_STRING\n");
2985 case XML_REGEXP_ANYCHAR:
2986 case XML_REGEXP_ANYSPACE:
2987 case XML_REGEXP_NOTSPACE:
2988 case XML_REGEXP_INITNAME:
2989 case XML_REGEXP_NOTINITNAME:
2990 case XML_REGEXP_NAMECHAR:
2991 case XML_REGEXP_NOTNAMECHAR:
2992 case XML_REGEXP_DECIMAL:
2993 case XML_REGEXP_NOTDECIMAL:
2994 case XML_REGEXP_REALCHAR:
2995 case XML_REGEXP_NOTREALCHAR:
2996 case XML_REGEXP_LETTER:
2997 case XML_REGEXP_LETTER_UPPERCASE:
2998 case XML_REGEXP_LETTER_LOWERCASE:
2999 case XML_REGEXP_LETTER_TITLECASE:
3000 case XML_REGEXP_LETTER_MODIFIER:
3001 case XML_REGEXP_LETTER_OTHERS:
3002 case XML_REGEXP_MARK:
3003 case XML_REGEXP_MARK_NONSPACING:
3004 case XML_REGEXP_MARK_SPACECOMBINING:
3005 case XML_REGEXP_MARK_ENCLOSING:
3006 case XML_REGEXP_NUMBER:
3007 case XML_REGEXP_NUMBER_DECIMAL:
3008 case XML_REGEXP_NUMBER_LETTER:
3009 case XML_REGEXP_NUMBER_OTHERS:
3010 case XML_REGEXP_PUNCT:
3011 case XML_REGEXP_PUNCT_CONNECTOR:
3012 case XML_REGEXP_PUNCT_DASH:
3013 case XML_REGEXP_PUNCT_OPEN:
3014 case XML_REGEXP_PUNCT_CLOSE:
3015 case XML_REGEXP_PUNCT_INITQUOTE:
3016 case XML_REGEXP_PUNCT_FINQUOTE:
3017 case XML_REGEXP_PUNCT_OTHERS:
3018 case XML_REGEXP_SEPAR:
3019 case XML_REGEXP_SEPAR_SPACE:
3020 case XML_REGEXP_SEPAR_LINE:
3021 case XML_REGEXP_SEPAR_PARA:
3022 case XML_REGEXP_SYMBOL:
3023 case XML_REGEXP_SYMBOL_MATH:
3024 case XML_REGEXP_SYMBOL_CURRENCY:
3025 case XML_REGEXP_SYMBOL_MODIFIER:
3026 case XML_REGEXP_SYMBOL_OTHERS:
3027 case XML_REGEXP_OTHER:
3028 case XML_REGEXP_OTHER_CONTROL:
3029 case XML_REGEXP_OTHER_FORMAT:
3030 case XML_REGEXP_OTHER_PRIVATE:
3031 case XML_REGEXP_OTHER_NA:
3032 case XML_REGEXP_BLOCK_NAME:
3033 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3034 (
const xmlChar *)atom->valuep);
3049xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3051 if (exec->nbPush > MAX_PUSH) {
3052 exec->status = XML_REGEXP_INTERNAL_LIMIT;
3058 if (exec->maxRollbacks == 0) {
3059 exec->maxRollbacks = 4;
3060 exec->rollbacks = (xmlRegExecRollback *)
xmlMalloc(exec->maxRollbacks *
3061 sizeof(xmlRegExecRollback));
3062 if (exec->rollbacks ==
NULL) {
3063 xmlRegexpErrMemory(
NULL,
"saving regexp");
3064 exec->maxRollbacks = 0;
3065 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3068 memset(exec->rollbacks, 0,
3069 exec->maxRollbacks *
sizeof(xmlRegExecRollback));
3070 }
else if (exec->nbRollbacks >= exec->maxRollbacks) {
3071 xmlRegExecRollback *tmp;
3072 int len = exec->maxRollbacks;
3074 exec->maxRollbacks *= 2;
3075 tmp = (xmlRegExecRollback *)
xmlRealloc(exec->rollbacks,
3076 exec->maxRollbacks *
sizeof(xmlRegExecRollback));
3078 xmlRegexpErrMemory(
NULL,
"saving regexp");
3079 exec->maxRollbacks /= 2;
3080 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3083 exec->rollbacks = tmp;
3084 tmp = &exec->rollbacks[
len];
3085 memset(tmp, 0, (exec->maxRollbacks -
len) *
sizeof(xmlRegExecRollback));
3087 exec->rollbacks[exec->nbRollbacks].state = exec->state;
3088 exec->rollbacks[exec->nbRollbacks].index = exec->index;
3089 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3090 if (exec->comp->nbCounters > 0) {
3091 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3092 exec->rollbacks[exec->nbRollbacks].counts = (
int *)
3093 xmlMalloc(exec->comp->nbCounters *
sizeof(
int));
3094 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3095 xmlRegexpErrMemory(
NULL,
"saving regexp");
3096 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3100 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3101 exec->comp->nbCounters *
sizeof(
int));
3103 exec->nbRollbacks++;
3107xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3108 if (exec->status != XML_REGEXP_OK)
3110 if (exec->nbRollbacks <= 0) {
3111 exec->status = XML_REGEXP_NOT_FOUND;
3114 exec->nbRollbacks--;
3115 exec->state = exec->rollbacks[exec->nbRollbacks].state;
3116 exec->index = exec->rollbacks[exec->nbRollbacks].index;
3117 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3118 if (exec->comp->nbCounters > 0) {
3119 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3121 exec->status = XML_REGEXP_INTERNAL_ERROR;
3125 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3126 exec->comp->nbCounters *
sizeof(
int));
3139 xmlRegExecCtxt execval;
3140 xmlRegExecCtxtPtr exec = &execval;
3141 int ret, codepoint = 0,
len, deter;
3146 exec->determinist = 1;
3147 exec->maxRollbacks = 0;
3148 exec->nbRollbacks = 0;
3149 exec->rollbacks =
NULL;
3150 exec->status = XML_REGEXP_OK;
3152 exec->state = comp->states[0];
3154 exec->transcount = 0;
3155 exec->inputStack =
NULL;
3156 exec->inputStackMax = 0;
3157 if (comp->nbCounters > 0) {
3158 exec->counts = (
int *)
xmlMalloc(comp->nbCounters *
sizeof(
int));
3159 if (exec->counts ==
NULL) {
3160 xmlRegexpErrMemory(
NULL,
"running regexp");
3161 return(XML_REGEXP_OUT_OF_MEMORY);
3163 memset(exec->counts, 0, comp->nbCounters *
sizeof(
int));
3165 exec->counts =
NULL;
3166 while ((exec->status == XML_REGEXP_OK) && (exec->state !=
NULL) &&
3167 ((exec->inputString[exec->index] != 0) ||
3168 ((exec->state !=
NULL) &&
3169 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3170 xmlRegTransPtr trans;
3181 if ((exec->inputString[exec->index] == 0) && (exec->counts ==
NULL)) {
3186 if (exec->transno < exec->state->nbTrans) {
3187 trans = &exec->state->trans[exec->transno];
3188 if (trans->to >=0) {
3190 if (!((atom->min == 0) && (atom->max > 0)))
3197 exec->transcount = 0;
3198 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3199 trans = &exec->state->trans[exec->transno];
3205 if (trans->count >= 0) {
3209 if (exec->counts ==
NULL) {
3210 exec->
status = XML_REGEXP_INTERNAL_ERROR;
3217 count = exec->counts[trans->count];
3218 counter = &exec->comp->counters[trans->count];
3222 }
else if (atom ==
NULL) {
3224 exec->status = XML_REGEXP_INTERNAL_ERROR;
3226 }
else if (exec->inputString[exec->index] != 0) {
3230 if (codepoint < 0) {
3231 exec->status = XML_REGEXP_INVALID_UTF8;
3234 ret = xmlRegCheckCharacter(atom, codepoint);
3235 if ((
ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3236 xmlRegStatePtr to = comp->states[trans->to];
3244 if (trans->counter >= 0) {
3247 if ((exec->counts ==
NULL) ||
3248 (exec->comp ==
NULL) ||
3249 (exec->comp->counters ==
NULL)) {
3250 exec->
status = XML_REGEXP_INTERNAL_ERROR;
3253 counter = &exec->comp->counters[trans->counter];
3254 if (exec->counts[trans->counter] >=
counter->max)
3258 if (exec->state->nbTrans > exec->transno + 1) {
3259 xmlFARegExecSave(exec);
3260 if (exec->status != XML_REGEXP_OK)
3263 if (trans->counter >= 0) {
3264 exec->counts[trans->counter]++;
3266 exec->transcount = 1;
3271 if (exec->transcount == atom->max) {
3278 if (exec->inputString[exec->index] == 0) {
3282 if (exec->transcount >= atom->min) {
3283 int transno = exec->transno;
3284 xmlRegStatePtr
state = exec->state;
3291 xmlFARegExecSave(exec);
3292 if (exec->status != XML_REGEXP_OK)
3294 exec->transno = transno;
3295 exec->state =
state;
3299 &exec->inputString[exec->index], &
len);
3300 if (codepoint < 0) {
3301 exec->status = XML_REGEXP_INVALID_UTF8;
3304 ret = xmlRegCheckCharacter(atom, codepoint);
3307 if (exec->transcount < atom->min)
3319 if (trans->counter >= 0) {
3320 if (exec->counts ==
NULL) {
3321 exec->status = XML_REGEXP_INTERNAL_ERROR;
3324 exec->counts[trans->counter]--;
3326 }
else if ((
ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3332 exec->transcount = 1;
3336 }
else if ((atom->min == 0) && (atom->max > 0)) {
3338 exec->transcount = 1;
3343 if ((trans->nd == 1) ||
3344 ((trans->count >= 0) && (deter == 0) &&
3345 (exec->state->nbTrans > exec->transno + 1))) {
3346 xmlFARegExecSave(exec);
3347 if (exec->status != XML_REGEXP_OK)
3350 if (trans->counter >= 0) {
3354 if ((exec->counts ==
NULL) ||
3355 (exec->comp ==
NULL) ||
3356 (exec->comp->counters ==
NULL)) {
3357 exec->
status = XML_REGEXP_INTERNAL_ERROR;
3360 counter = &exec->comp->counters[trans->counter];
3361 if (exec->counts[trans->counter] >=
counter->max)
3363 exec->counts[trans->counter]++;
3365 if ((trans->count >= 0) &&
3366 (trans->count < REGEXP_ALL_COUNTER)) {
3367 if (exec->counts ==
NULL) {
3368 exec->
status = XML_REGEXP_INTERNAL_ERROR;
3371 exec->counts[trans->count] = 0;
3373 exec->state = comp->states[trans->to];
3375 if (trans->atom !=
NULL) {
3379 }
else if (
ret < 0) {
3380 exec->status = XML_REGEXP_INTERNAL_ERROR;
3384 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3389 exec->determinist = 0;
3390 xmlFARegExecRollBack(exec);
3396 if (exec->rollbacks !=
NULL) {
3397 if (exec->counts !=
NULL) {
3400 for (
i = 0;
i < exec->maxRollbacks;
i++)
3401 if (exec->rollbacks[
i].counts !=
NULL)
3402 xmlFree(exec->rollbacks[
i].counts);
3406 if (exec->state ==
NULL)
3407 return(XML_REGEXP_INTERNAL_ERROR);
3408 if (exec->counts !=
NULL)
3410 if (exec->status == XML_REGEXP_OK)
3412 if (exec->status == XML_REGEXP_NOT_FOUND)
3414 return(exec->status);
3435xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks
callback,
void *
data) {
3436 xmlRegExecCtxtPtr exec;
3440 if ((comp->compact ==
NULL) && (comp->states ==
NULL))
3442 exec = (xmlRegExecCtxtPtr)
xmlMalloc(
sizeof(xmlRegExecCtxt));
3444 xmlRegexpErrMemory(
NULL,
"creating execution context");
3447 memset(exec, 0,
sizeof(xmlRegExecCtxt));
3448 exec->inputString =
NULL;
3450 exec->determinist = 1;
3451 exec->maxRollbacks = 0;
3452 exec->nbRollbacks = 0;
3453 exec->rollbacks =
NULL;
3454 exec->status = XML_REGEXP_OK;
3456 if (comp->compact ==
NULL)
3457 exec->state = comp->states[0];
3459 exec->transcount = 0;
3462 if (comp->nbCounters > 0) {
3467 exec->counts = (
int *)
xmlMalloc(comp->nbCounters *
sizeof(
int)
3469 if (exec->counts ==
NULL) {
3470 xmlRegexpErrMemory(
NULL,
"creating execution context");
3474 memset(exec->counts, 0, comp->nbCounters *
sizeof(
int) * 2);
3475 exec->errCounts = &exec->counts[comp->nbCounters];
3477 exec->counts =
NULL;
3478 exec->errCounts =
NULL;
3480 exec->inputStackMax = 0;
3481 exec->inputStackNr = 0;
3482 exec->inputStack =
NULL;
3483 exec->errStateNo = -1;
3484 exec->errString =
NULL;
3496xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3500 if (exec->rollbacks !=
NULL) {
3501 if (exec->counts !=
NULL) {
3504 for (
i = 0;
i < exec->maxRollbacks;
i++)
3505 if (exec->rollbacks[
i].counts !=
NULL)
3506 xmlFree(exec->rollbacks[
i].counts);
3510 if (exec->counts !=
NULL)
3512 if (exec->inputStack !=
NULL) {
3515 for (
i = 0;
i < exec->inputStackNr;
i++) {
3516 if (exec->inputStack[
i].value !=
NULL)
3517 xmlFree(exec->inputStack[
i].value);
3521 if (exec->errString !=
NULL)
3527xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
3529 if (exec->inputStackMax == 0) {
3530 exec->inputStackMax = 4;
3531 exec->inputStack = (xmlRegInputTokenPtr)
3532 xmlMalloc(exec->inputStackMax *
sizeof(xmlRegInputToken));
3533 if (exec->inputStack ==
NULL) {
3534 xmlRegexpErrMemory(
NULL,
"pushing input string");
3535 exec->inputStackMax = 0;
3538 }
else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3539 xmlRegInputTokenPtr tmp;
3541 exec->inputStackMax *= 2;
3542 tmp = (xmlRegInputTokenPtr)
xmlRealloc(exec->inputStack,
3543 exec->inputStackMax *
sizeof(xmlRegInputToken));
3545 xmlRegexpErrMemory(
NULL,
"pushing input string");
3546 exec->inputStackMax /= 2;
3549 exec->inputStack = tmp;
3552 exec->inputStack[exec->inputStackNr].data =
data;
3553 exec->inputStackNr++;
3554 exec->inputStack[exec->inputStackNr].value =
NULL;
3555 exec->inputStack[exec->inputStackNr].data =
NULL;
3572xmlRegStrEqualWildcard(
const xmlChar *expStr,
const xmlChar *valStr) {
3573 if (expStr == valStr)
return(1);
3574 if (expStr ==
NULL)
return(0);
3575 if (valStr ==
NULL)
return(0);
3580 if (*expStr != *valStr) {
3582 if (*valStr ==
'*') {
3589 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ ==
'*')) {
3591 if (*valStr == XML_REG_STRING_SEPARATOR)
3594 }
while (*valStr != 0);
3601 }
while (*valStr != 0);
3621xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3625 int state = exec->index;
3628 if ((comp ==
NULL) || (comp->compact ==
NULL) || (comp->stringMap ==
NULL))
3635 if (comp->compact[
state * (comp->nbstrings + 1)] ==
3636 XML_REGEXP_FINAL_STATE)
3644 for (
i = 0;
i < comp->nbstrings;
i++) {
3645 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
3648 if (xmlRegStrEqualWildcard(comp->stringMap[
i],
value)) {
3650 if ((exec->callback !=
NULL) && (comp->transdata !=
NULL)) {
3651 exec->callback(exec->data,
value,
3652 comp->transdata[
state * comp->nbstrings +
i],
data);
3654 if (comp->compact[
target * (comp->nbstrings + 1)] ==
3655 XML_REGEXP_SINK_STATE)
3658 if (comp->compact[
target * (comp->nbstrings + 1)] ==
3659 XML_REGEXP_FINAL_STATE)
3670 if (exec->errString !=
NULL)
3673 exec->errStateNo =
state;
3674 exec->status = XML_REGEXP_NOT_FOUND;
3675 return(XML_REGEXP_NOT_FOUND);
3691xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
3692 void *
data,
int compound) {
3693 xmlRegTransPtr trans;
3701 if (exec->comp ==
NULL)
3703 if (exec->status != XML_REGEXP_OK)
3704 return(exec->status);
3706 if (exec->comp->compact !=
NULL)
3707 return(xmlRegCompactPushString(exec, exec->comp,
value,
data));
3710 if (exec->state->type == XML_REGEXP_FINAL_STATE)
3719 if ((
value !=
NULL) && (exec->inputStackNr > 0)) {
3720 xmlFARegExecSaveInputString(exec,
value,
data);
3721 value = exec->inputStack[exec->index].value;
3722 data = exec->inputStack[exec->index].data;
3725 while ((exec->status == XML_REGEXP_OK) &&
3728 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3738 exec->transcount = 0;
3739 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3740 trans = &exec->state->trans[exec->transno];
3745 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3759 for (
i = 0;
i < exec->state->nbTrans;
i++) {
3760 t = &exec->state->trans[
i];
3761 if ((
t->counter < 0) || (
t == trans))
3763 counter = &exec->comp->counters[
t->counter];
3764 count = exec->counts[
t->counter];
3765 if ((count < counter->
max) &&
3766 (
t->atom !=
NULL) &&
3772 (count < counter->
max) &&
3773 (
t->atom !=
NULL) &&
3780 }
else if (trans->count == REGEXP_ALL_COUNTER) {
3791 for (
i = 0;
i < exec->state->nbTrans;
i++) {
3792 t = &exec->state->trans[
i];
3793 if ((
t->counter < 0) || (
t == trans))
3795 counter = &exec->comp->counters[
t->counter];
3796 count = exec->counts[
t->counter];
3802 }
else if (trans->count >= 0) {
3810 count = exec->counts[trans->count];
3811 counter = &exec->comp->counters[trans->count];
3813 }
else if (atom ==
NULL) {
3815 exec->status = XML_REGEXP_INTERNAL_ERROR;
3818 ret = xmlRegStrEqualWildcard(atom->valuep,
value);
3824 if ((
ret == 1) && (trans->counter >= 0)) {
3828 count = exec->counts[trans->counter];
3829 counter = &exec->comp->counters[trans->counter];
3834 if ((
ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3835 xmlRegStatePtr to = exec->comp->states[trans->to];
3840 if (exec->state->nbTrans > exec->transno + 1) {
3841 if (exec->inputStackNr <= 0) {
3842 xmlFARegExecSaveInputString(exec,
value,
data);
3844 xmlFARegExecSave(exec);
3846 exec->transcount = 1;
3851 if (exec->transcount == atom->max) {
3855 value = exec->inputStack[exec->index].value;
3856 data = exec->inputStack[exec->index].data;
3865 if (exec->transcount >= atom->min) {
3866 int transno = exec->transno;
3867 xmlRegStatePtr
state = exec->state;
3874 if (exec->inputStackNr <= 0) {
3875 xmlFARegExecSaveInputString(exec,
value,
data);
3877 xmlFARegExecSave(exec);
3878 exec->transno = transno;
3879 exec->state =
state;
3884 if (exec->transcount < atom->min)
3899 if ((exec->callback !=
NULL) && (atom !=
NULL) &&
3901 exec->callback(exec->data, atom->valuep,
3904 if (exec->state->nbTrans > exec->transno + 1) {
3905 if (exec->inputStackNr <= 0) {
3906 xmlFARegExecSaveInputString(exec,
value,
data);
3908 xmlFARegExecSave(exec);
3910 if (trans->counter >= 0) {
3911 exec->counts[trans->counter]++;
3913 if ((trans->count >= 0) &&
3914 (trans->count < REGEXP_ALL_COUNTER)) {
3915 exec->counts[trans->count] = 0;
3917 if ((exec->comp->states[trans->to] !=
NULL) &&
3918 (exec->comp->states[trans->to]->type ==
3919 XML_REGEXP_SINK_STATE)) {
3924 if (exec->errString !=
NULL)
3927 exec->errState = exec->state;
3928 memcpy(exec->errCounts, exec->counts,
3929 exec->comp->nbCounters *
sizeof(
int));
3931 exec->state = exec->comp->states[trans->to];
3933 if (trans->atom !=
NULL) {
3934 if (exec->inputStack !=
NULL) {
3936 if (exec->index < exec->inputStackNr) {
3937 value = exec->inputStack[exec->index].value;
3938 data = exec->inputStack[exec->index].data;
3949 }
else if (
ret < 0) {
3950 exec->status = XML_REGEXP_INTERNAL_ERROR;
3954 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3961 (exec->state->type != XML_REGEXP_SINK_STATE)) {
3963 if (exec->errString !=
NULL)
3966 exec->errState = exec->state;
3967 if (exec->comp->nbCounters)
3968 memcpy(exec->errCounts, exec->counts,
3969 exec->comp->nbCounters *
sizeof(
int));
3975 exec->determinist = 0;
3976 xmlFARegExecRollBack(exec);
3977 if ((exec->inputStack !=
NULL ) &&
3978 (exec->status == XML_REGEXP_OK)) {
3979 value = exec->inputStack[exec->index].value;
3980 data = exec->inputStack[exec->index].data;
3988 if (exec->status == XML_REGEXP_OK) {
3989 return(exec->state->type == XML_REGEXP_FINAL_STATE);
3991 return(exec->status);
4006xmlRegExecPushString(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
4008 return(xmlRegExecPushStringInternal(exec,
value,
data, 0));
4024xmlRegExecPushString2(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
4027 int lenn, lenp,
ret;
4032 if (exec->comp ==
NULL)
4034 if (exec->status != XML_REGEXP_OK)
4035 return(exec->status);
4038 return(xmlRegExecPushString(exec,
value,
data));
4040 lenn =
strlen((
char *) value2);
4043 if (150 < lenn + lenp + 2) {
4046 exec->status = XML_REGEXP_OUT_OF_MEMORY;
4053 str[lenp] = XML_REG_STRING_SEPARATOR;
4055 str[lenn + lenp + 1] = 0;
4057 if (exec->comp->compact !=
NULL)
4058 ret = xmlRegCompactPushString(exec, exec->comp,
str,
data);
4060 ret = xmlRegExecPushStringInternal(exec,
str,
data, 1);
4082xmlRegExecGetValues(xmlRegExecCtxtPtr exec,
int err,
4083 int *nbval,
int *nbneg,
4088 if ((exec ==
NULL) || (nbval ==
NULL) || (nbneg ==
NULL) ||
4095 if ((exec->comp !=
NULL) && (exec->comp->compact !=
NULL)) {
4102 if (exec->errStateNo == -1)
return(-1);
4103 state = exec->errStateNo;
4105 state = exec->index;
4107 if (terminal !=
NULL) {
4108 if (comp->compact[
state * (comp->nbstrings + 1)] ==
4109 XML_REGEXP_FINAL_STATE)
4114 for (
i = 0;(
i < comp->nbstrings) && (nb < maxval);
i++) {
4115 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
4117 (comp->compact[(
target - 1) * (comp->nbstrings + 1)] !=
4118 XML_REGEXP_SINK_STATE)) {
4119 values[nb++] = comp->stringMap[
i];
4123 for (
i = 0;(
i < comp->nbstrings) && (nb < maxval);
i++) {
4124 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
4126 (comp->compact[(
target - 1) * (comp->nbstrings + 1)] ==
4127 XML_REGEXP_SINK_STATE)) {
4128 values[nb++] = comp->stringMap[
i];
4134 xmlRegTransPtr trans;
4136 xmlRegStatePtr
state;
4138 if (terminal !=
NULL) {
4139 if (exec->state->type == XML_REGEXP_FINAL_STATE)
4146 if (exec->errState ==
NULL)
return(-1);
4147 state = exec->errState;
4149 if (exec->state ==
NULL)
return(-1);
4150 state = exec->state;
4153 (transno <
state->nbTrans) && (nb < maxval);
4155 trans = &
state->trans[transno];
4159 if ((atom ==
NULL) || (atom->valuep ==
NULL))
4161 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4164 }
else if (trans->count == REGEXP_ALL_COUNTER) {
4167 }
else if (trans->counter >= 0) {
4172 count = exec->errCounts[trans->counter];
4174 count = exec->counts[trans->counter];
4175 if (exec->comp !=
NULL)
4176 counter = &exec->comp->counters[trans->counter];
4185 if ((exec->comp !=
NULL) && (exec->comp->states[trans->to] !=
NULL) &&
4186 (exec->comp->states[trans->to]->type !=
4187 XML_REGEXP_SINK_STATE)) {
4197 (transno <
state->nbTrans) && (nb < maxval);
4199 trans = &
state->trans[transno];
4203 if ((atom ==
NULL) || (atom->valuep ==
NULL))
4205 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4207 }
else if (trans->count == REGEXP_ALL_COUNTER) {
4209 }
else if (trans->counter >= 0) {
4212 if ((exec->comp->states[trans->to] !=
NULL) &&
4213 (exec->comp->states[trans->to]->type ==
4214 XML_REGEXP_SINK_STATE)) {
4245xmlRegExecNextValues(xmlRegExecCtxtPtr exec,
int *nbval,
int *nbneg,
4247 return(xmlRegExecGetValues(exec, 0, nbval, nbneg,
values, terminal));
4270xmlRegExecErrInfo(xmlRegExecCtxtPtr exec,
const xmlChar **
string,
4274 if (
string !=
NULL) {
4275 if (exec->status != XML_REGEXP_OK)
4276 *
string = exec->errString;
4280 return(xmlRegExecGetValues(exec, 1, nbval, nbneg,
values, terminal));
4285xmlRegExecPushChar(xmlRegExecCtxtPtr exec,
int UCS) {
4286 xmlRegTransPtr trans;
4293 if (exec->status != XML_REGEXP_OK)
4294 return(exec->status);
4296 while ((exec->status == XML_REGEXP_OK) &&
4297 ((exec->inputString[exec->index] != 0) ||
4298 (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4305 if ((exec->inputString[exec->index] == 0) && (exec->counts ==
NULL))
4308 exec->transcount = 0;
4309 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4310 trans = &exec->state->trans[exec->transno];
4315 if (trans->count >= 0) {
4323 count = exec->counts[trans->count];
4324 counter = &exec->comp->counters[trans->count];
4326 }
else if (atom ==
NULL) {
4328 exec->status = XML_REGEXP_INTERNAL_ERROR;
4330 }
else if (exec->inputString[exec->index] != 0) {
4331 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
len);
4332 ret = xmlRegCheckCharacter(atom, codepoint);
4333 if ((
ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4334 xmlRegStatePtr to = exec->comp->states[trans->to];
4339 if (exec->state->nbTrans > exec->transno + 1) {
4340 xmlFARegExecSave(exec);
4342 exec->transcount = 1;
4347 if (exec->transcount == atom->max) {
4354 if (exec->inputString[exec->index] == 0) {
4358 if (exec->transcount >= atom->min) {
4359 int transno = exec->transno;
4360 xmlRegStatePtr
state = exec->state;
4367 xmlFARegExecSave(exec);
4368 exec->transno = transno;
4369 exec->state =
state;
4371 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
4373 ret = xmlRegCheckCharacter(atom, codepoint);
4376 if (exec->transcount < atom->min)
4391 if (exec->state->nbTrans > exec->transno + 1) {
4392 xmlFARegExecSave(exec);
4397 if (trans->count >= 0) {
4398 exec->counts[trans->count] = 0;
4400 if (trans->counter >= 0) {
4401 exec->counts[trans->counter]++;
4403 exec->state = exec->comp->states[trans->to];
4405 if (trans->atom !=
NULL) {
4409 }
else if (
ret < 0) {
4410 exec->status = XML_REGEXP_INTERNAL_ERROR;
4414 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4419 exec->determinist = 0;
4420 xmlFARegExecRollBack(exec);
4441xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4448 ERROR(
"Invalid UTF-8");
4451 if ((
cur ==
'.') || (
cur ==
'\\') || (
cur ==
'?') ||
4452 (
cur ==
'*') || (
cur ==
'+') || (
cur ==
'(') ||
4453 (
cur ==
')') || (
cur ==
'|') || (
cur == 0x5B) ||
4454 (
cur == 0x5D) || (
cur == 0))
4476xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4478 xmlRegAtomType
type = (xmlRegAtomType) 0;
4487 type = XML_REGEXP_LETTER_UPPERCASE;
4488 }
else if (
cur ==
'l') {
4490 type = XML_REGEXP_LETTER_LOWERCASE;
4491 }
else if (
cur ==
't') {
4493 type = XML_REGEXP_LETTER_TITLECASE;
4494 }
else if (
cur ==
'm') {
4496 type = XML_REGEXP_LETTER_MODIFIER;
4497 }
else if (
cur ==
'o') {
4499 type = XML_REGEXP_LETTER_OTHERS;
4501 type = XML_REGEXP_LETTER;
4503 }
else if (
cur ==
'M') {
4509 type = XML_REGEXP_MARK_NONSPACING;
4510 }
else if (
cur ==
'c') {
4513 type = XML_REGEXP_MARK_SPACECOMBINING;
4514 }
else if (
cur ==
'e') {
4517 type = XML_REGEXP_MARK_ENCLOSING;
4520 type = XML_REGEXP_MARK;
4522 }
else if (
cur ==
'N') {
4528 type = XML_REGEXP_NUMBER_DECIMAL;
4529 }
else if (
cur ==
'l') {
4532 type = XML_REGEXP_NUMBER_LETTER;
4533 }
else if (
cur ==
'o') {
4536 type = XML_REGEXP_NUMBER_OTHERS;
4539 type = XML_REGEXP_NUMBER;
4541 }
else if (
cur ==
'P') {
4547 type = XML_REGEXP_PUNCT_CONNECTOR;
4548 }
else if (
cur ==
'd') {
4551 type = XML_REGEXP_PUNCT_DASH;
4552 }
else if (
cur ==
's') {
4555 type = XML_REGEXP_PUNCT_OPEN;
4556 }
else if (
cur ==
'e') {
4559 type = XML_REGEXP_PUNCT_CLOSE;
4560 }
else if (
cur ==
'i') {
4563 type = XML_REGEXP_PUNCT_INITQUOTE;
4564 }
else if (
cur ==
'f') {
4567 type = XML_REGEXP_PUNCT_FINQUOTE;
4568 }
else if (
cur ==
'o') {
4571 type = XML_REGEXP_PUNCT_OTHERS;
4574 type = XML_REGEXP_PUNCT;
4576 }
else if (
cur ==
'Z') {
4582 type = XML_REGEXP_SEPAR_SPACE;
4583 }
else if (
cur ==
'l') {
4586 type = XML_REGEXP_SEPAR_LINE;
4587 }
else if (
cur ==
'p') {
4590 type = XML_REGEXP_SEPAR_PARA;
4593 type = XML_REGEXP_SEPAR;
4595 }
else if (
cur ==
'S') {
4600 type = XML_REGEXP_SYMBOL_MATH;
4602 }
else if (
cur ==
'c') {
4604 type = XML_REGEXP_SYMBOL_CURRENCY;
4606 }
else if (
cur ==
'k') {
4608 type = XML_REGEXP_SYMBOL_MODIFIER;
4610 }
else if (
cur ==
'o') {
4612 type = XML_REGEXP_SYMBOL_OTHERS;
4616 type = XML_REGEXP_SYMBOL;
4618 }
else if (
cur ==
'C') {
4624 type = XML_REGEXP_OTHER_CONTROL;
4625 }
else if (
cur ==
'f') {
4628 type = XML_REGEXP_OTHER_FORMAT;
4629 }
else if (
cur ==
'o') {
4632 type = XML_REGEXP_OTHER_PRIVATE;
4633 }
else if (
cur ==
'n') {
4636 type = XML_REGEXP_OTHER_NA;
4639 type = XML_REGEXP_OTHER;
4641 }
else if (
cur ==
'I') {
4646 ERROR(
"IsXXXX expected");
4652 if (((
cur >=
'a') && (
cur <=
'z')) ||
4653 ((
cur >=
'A') && (
cur <=
'Z')) ||
4654 ((
cur >=
'0') && (
cur <=
'9')) ||
4658 while (((
cur >=
'a') && (
cur <=
'z')) ||
4659 ((
cur >=
'A') && (
cur <=
'Z')) ||
4660 ((
cur >=
'0') && (
cur <=
'9')) ||
4666 type = XML_REGEXP_BLOCK_NAME;
4669 ERROR(
"Unknown char property");
4672 if (ctxt->atom ==
NULL) {
4673 ctxt->atom = xmlRegNewAtom(ctxt,
type);
4674 if (ctxt->atom ==
NULL) {
4678 ctxt->atom->valuep = blockName;
4679 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4680 if (xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4687static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
4690 for (
i = 0;
i < 4;
i++) {
4694 if (
cur >=
'0' &&
cur <=
'9') {
4696 }
else if (
cur >=
'A' &&
cur <=
'F') {
4698 }
else if (
cur >=
'a' &&
cur <=
'f') {
4701 ERROR(
"Expecting hex digit");
4708static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
4710 int val = parse_escaped_codeunit(ctxt);
4711 if (0xD800 <=
val &&
val <= 0xDBFF) {
4716 int low = parse_escaped_codeunit(ctxt);
4717 if (0xDC00 <= low && low <= 0xDFFF) {
4718 return (
val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
4722 ERROR(
"Invalid low surrogate pair code unit");
4739xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4743 if (ctxt->atom ==
NULL) {
4744 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4745 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4746 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4747 XML_REGEXP_ANYCHAR, 0, 0,
NULL);
4753 ERROR(
"Escaped sequence: expecting \\");
4761 ERROR(
"Expecting '{'");
4765 xmlFAParseCharProp(ctxt);
4767 ERROR(
"Expecting '}'");
4771 }
else if (
cur ==
'P') {
4774 ERROR(
"Expecting '{'");
4778 xmlFAParseCharProp(ctxt);
4779 if (ctxt->atom !=
NULL)
4780 ctxt->atom->neg = 1;
4782 ERROR(
"Expecting '}'");
4786 }
else if ((
cur ==
'n') || (
cur ==
'r') || (
cur ==
't') || (
cur ==
'\\') ||
4787 (
cur ==
'|') || (
cur ==
'.') || (
cur ==
'?') || (
cur ==
'*') ||
4788 (
cur ==
'+') || (
cur ==
'(') || (
cur ==
')') || (
cur ==
'{') ||
4789 (
cur ==
'}') || (
cur == 0x2D) || (
cur == 0x5B) || (
cur == 0x5D) ||
4809 if (ctxt->atom ==
NULL) {
4810 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4811 if (ctxt->atom !=
NULL) {
4814 ctxt->atom->codepoint =
'\n';
4817 ctxt->atom->codepoint =
'\r';
4820 ctxt->atom->codepoint =
'\t';
4823 cur = parse_escaped_codepoint(ctxt);
4827 ctxt->atom->codepoint =
cur;
4830 ctxt->atom->codepoint =
cur;
4833 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4845 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4849 }
else if ((
cur ==
's') || (
cur ==
'S') || (
cur ==
'i') || (
cur ==
'I') ||
4850 (
cur ==
'c') || (
cur ==
'C') || (
cur ==
'd') || (
cur ==
'D') ||
4851 (
cur ==
'w') || (
cur ==
'W')) {
4852 xmlRegAtomType
type = XML_REGEXP_ANYSPACE;
4856 type = XML_REGEXP_ANYSPACE;
4859 type = XML_REGEXP_NOTSPACE;
4862 type = XML_REGEXP_INITNAME;
4865 type = XML_REGEXP_NOTINITNAME;
4868 type = XML_REGEXP_NAMECHAR;
4871 type = XML_REGEXP_NOTNAMECHAR;
4874 type = XML_REGEXP_DECIMAL;
4877 type = XML_REGEXP_NOTDECIMAL;
4880 type = XML_REGEXP_REALCHAR;
4883 type = XML_REGEXP_NOTREALCHAR;
4887 if (ctxt->atom ==
NULL) {
4888 ctxt->atom = xmlRegNewAtom(ctxt,
type);
4889 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4890 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4894 ERROR(
"Wrong escape sequence, misuse of character '\\'");
4909xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4915 ERROR(
"Expecting ']'");
4924 case 'n':
start = 0xA;
break;
4925 case 'r':
start = 0xD;
break;
4926 case 't':
start = 0x9;
break;
4927 case '\\':
case '|':
case '.':
case '-':
case '^':
case '?':
4928 case '*':
case '+':
case '{':
case '}':
case '(':
case ')':
4932 ERROR(
"Invalid escape value");
4937 }
else if ((
cur != 0x5B) && (
cur != 0x5D)) {
4941 ERROR(
"Invalid UTF-8");
4945 ERROR(
"Expecting a char range");
4952 if ((
start ==
'-') && (
NXT(1) !=
']') && (PREV !=
'[') && (PREV !=
'^')) {
4958 if ((
cur !=
'-') || (
NXT(1) ==
'[') || (
NXT(1) ==
']')) {
4959 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4969 case 'n':
end = 0xA;
break;
4970 case 'r':
end = 0xD;
break;
4971 case 't':
end = 0x9;
break;
4972 case '\\':
case '|':
case '.':
case '-':
case '^':
case '?':
4973 case '*':
case '+':
case '{':
case '}':
case '(':
case ')':
4977 ERROR(
"Invalid escape value");
4981 }
else if ((
cur !=
'\0') && (
cur != 0x5B) && (
cur != 0x5D)) {
4985 ERROR(
"Invalid UTF-8");
4989 ERROR(
"Expecting the end of a char range");
4995 ERROR(
"End of range is before start of range");
4998 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5011xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5014 xmlFAParseCharClassEsc(ctxt);
5016 xmlFAParseCharRange(ctxt);
5018 }
while ((
CUR !=
']') && (
CUR !=
'-') &&
5019 (
CUR != 0) && (ctxt->error == 0));
5032xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5033 int neg = ctxt->neg;
5037 ctxt->neg = !ctxt->neg;
5038 xmlFAParsePosCharGroup(ctxt);
5041 while ((
CUR !=
']') && (ctxt->error == 0)) {
5042 if ((
CUR ==
'-') && (
NXT(1) ==
'[')) {
5046 xmlFAParseCharGroup(ctxt);
5051 ERROR(
"charClassExpr: ']' expected");
5055 xmlFAParsePosCharGroup(ctxt);
5068xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5071 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5072 if (ctxt->atom ==
NULL)
5074 xmlFAParseCharGroup(ctxt);
5078 ERROR(
"xmlFAParseCharClass: ']' expected");
5081 xmlFAParseCharClassEsc(ctxt);
5094xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5099 while ((
CUR >=
'0') && (
CUR <=
'9')) {
5114 if ((
ok != 1) || (overflow == 1)) {
5131xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5135 if ((
cur ==
'?') || (
cur ==
'*') || (
cur ==
'+')) {
5136 if (ctxt->atom !=
NULL) {
5138 ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5139 else if (
cur ==
'*')
5140 ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5141 else if (
cur ==
'+')
5142 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5151 cur = xmlFAParseQuantExact(ctxt);
5155 ERROR(
"Improper quantifier");
5162 cur = xmlFAParseQuantExact(ctxt);
5166 ERROR(
"Improper quantifier");
5173 ERROR(
"Unterminated quantifier");
5177 if (ctxt->atom !=
NULL) {
5178 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5179 ctxt->atom->min =
min;
5180 ctxt->atom->max =
max;
5194xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5197 codepoint = xmlFAIsChar(ctxt);
5198 if (codepoint > 0) {
5199 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5200 if (ctxt->atom ==
NULL)
5204 if (codepoint < 0) {
5205 ERROR(
"Invalid UTF-8");
5208 ctxt->atom->codepoint = codepoint;
5211 }
else if (
CUR ==
'|') {
5213 }
else if (
CUR == 0) {
5215 }
else if (
CUR ==
')') {
5217 }
else if (
CUR ==
'(') {
5218 xmlRegStatePtr
start, oldend, start0;
5221 if (ctxt->depth >= 50) {
5222 ERROR(
"xmlFAParseAtom: maximum nesting depth exceeded");
5229 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state,
NULL);
5230 start0 = ctxt->state;
5231 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state,
NULL);
5232 start = ctxt->state;
5237 xmlFAParseRegExp(ctxt, 0);
5242 ERROR(
"xmlFAParseAtom: expecting ')'");
5244 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5245 if (ctxt->atom ==
NULL)
5247 ctxt->atom->start =
start;
5248 ctxt->atom->start0 = start0;
5249 ctxt->atom->stop = ctxt->state;
5252 }
else if ((
CUR ==
'[') || (
CUR ==
'\\') || (
CUR ==
'.')) {
5253 xmlFAParseCharClass(ctxt);
5266xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5270 ret = xmlFAParseAtom(ctxt);
5273 if (ctxt->atom ==
NULL) {
5274 ERROR(
"internal: no atom generated");
5276 xmlFAParseQuantifier(ctxt);
5291xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5292 xmlRegStatePtr previous;
5295 previous = ctxt->state;
5296 ret = xmlFAParsePiece(ctxt);
5299 xmlFAGenerateEpsilonTransition(ctxt, previous, to);
5301 if (xmlFAGenerateTransitions(ctxt, previous,
5304 xmlRegFreeAtom(ctxt->atom);
5308 previous = ctxt->state;
5311 while ((
ret != 0) && (ctxt->error == 0)) {
5312 ret = xmlFAParsePiece(ctxt);
5314 if (xmlFAGenerateTransitions(ctxt, previous,
5317 xmlRegFreeAtom(ctxt->atom);
5321 previous = ctxt->state;
5336xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt,
int top) {
5340 start = ctxt->state;
5342 xmlFAParseBranch(ctxt,
NULL);
5344 ctxt->state->type = XML_REGEXP_FINAL_STATE;
5347 ctxt->end = ctxt->state;
5351 while ((
CUR ==
'|') && (ctxt->error == 0)) {
5353 ctxt->state =
start;
5355 xmlFAParseBranch(ctxt,
end);
5377xmlRegexpPrint(
FILE *output, xmlRegexpPtr regexp) {
5383 if (regexp ==
NULL) {
5387 fprintf(output,
"'%s' ", regexp->string);
5389 fprintf(output,
"%d atoms:\n", regexp->nbAtoms);
5390 for (
i = 0;
i < regexp->nbAtoms;
i++) {
5392 xmlRegPrintAtom(output, regexp->atoms[
i]);
5394 fprintf(output,
"%d states:", regexp->nbStates);
5396 for (
i = 0;
i < regexp->nbStates;
i++) {
5397 xmlRegPrintState(output, regexp->states[
i]);
5399 fprintf(output,
"%d counters:\n", regexp->nbCounters);
5400 for (
i = 0;
i < regexp->nbCounters;
i++) {
5401 fprintf(output,
" %d: min %d max %d\n",
i, regexp->counters[
i].min,
5402 regexp->counters[
i].max);
5417xmlRegexpCompile(
const xmlChar *regexp) {
5419 xmlRegParserCtxtPtr ctxt;
5424 ctxt = xmlRegNewParserCtxt(regexp);
5429 ctxt->state = xmlRegStatePush(ctxt);
5430 if (ctxt->state ==
NULL)
5432 ctxt->start = ctxt->state;
5436 xmlFAParseRegExp(ctxt, 1);
5438 ERROR(
"xmlFAParseRegExp: extra characters");
5440 if (ctxt->error != 0)
5442 ctxt->end = ctxt->state;
5443 ctxt->start->type = XML_REGEXP_START_STATE;
5444 ctxt->end->type = XML_REGEXP_FINAL_STATE;
5447 xmlFAEliminateEpsilonTransitions(ctxt);
5450 if (ctxt->error != 0)
5452 ret = xmlRegEpxFromParse(ctxt);
5455 xmlRegFreeParserCtxt(ctxt);
5472 return(xmlFARegExec(comp,
content));
5484xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5490 if (comp->determinist != -1)
5491 return(comp->determinist);
5493 am = xmlNewAutomata();
5496 if (am->states !=
NULL) {
5499 for (
i = 0;
i < am->nbStates;
i++)
5500 xmlRegFreeState(am->states[
i]);
5503 am->nbAtoms = comp->nbAtoms;
5504 am->atoms = comp->atoms;
5505 am->nbStates = comp->nbStates;
5506 am->states = comp->states;
5507 am->determinist = -1;
5508 am->flags = comp->flags;
5509 ret = xmlFAComputesDeterminism(am);
5512 xmlFreeAutomata(am);
5513 comp->determinist =
ret;
5524xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5529 if (regexp->string !=
NULL)
5531 if (regexp->states !=
NULL) {
5532 for (
i = 0;
i < regexp->nbStates;
i++)
5533 xmlRegFreeState(regexp->states[
i]);
5536 if (regexp->atoms !=
NULL) {
5537 for (
i = 0;
i < regexp->nbAtoms;
i++)
5538 xmlRegFreeAtom(regexp->atoms[
i]);
5541 if (regexp->counters !=
NULL)
5543 if (regexp->compact !=
NULL)
5545 if (regexp->transdata !=
NULL)
5547 if (regexp->stringMap !=
NULL) {
5548 for (
i = 0;
i < regexp->nbstrings;
i++)
5556#ifdef LIBXML_AUTOMATA_ENABLED
5571xmlNewAutomata(
void) {
5572 xmlAutomataPtr ctxt;
5574 ctxt = xmlRegNewParserCtxt(
NULL);
5579 ctxt->state = xmlRegStatePush(ctxt);
5580 if (ctxt->state ==
NULL) {
5581 xmlFreeAutomata(ctxt);
5584 ctxt->start = ctxt->state;
5587 ctxt->start->type = XML_REGEXP_START_STATE;
5600xmlFreeAutomata(xmlAutomataPtr am) {
5603 xmlRegFreeParserCtxt(am);
5629xmlAutomataGetInitState(xmlAutomataPtr am) {
5645xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr
state) {
5648 state->type = XML_REGEXP_FINAL_STATE;
5667xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5674 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5680 if (xmlFAGenerateTransitions(am,
from, to, atom) < 0) {
5681 xmlRegFreeAtom(atom);
5705xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5712 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5716 if ((token2 ==
NULL) || (*token2 == 0)) {
5722 lenn =
strlen((
char *) token2);
5727 xmlRegFreeAtom(atom);
5733 str[lenn + lenp + 1] = 0;
5738 if (xmlFAGenerateTransitions(am,
from, to, atom) < 0) {
5739 xmlRegFreeAtom(atom);
5765xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5773 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5778 if ((token2 ==
NULL) || (*token2 == 0)) {
5784 lenn =
strlen((
char *) token2);
5789 xmlRegFreeAtom(atom);
5795 str[lenn + lenp + 1] = 0;
5799 snprintf((
char *) err_msg, 199,
"not %s", (
const char *) atom->valuep);
5803 if (xmlFAGenerateTransitions(am,
from, to, atom) < 0) {
5804 xmlRegFreeAtom(atom);
5832xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5845 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5848 if ((token2 ==
NULL) || (*token2 == 0)) {
5850 if (atom->valuep ==
NULL)
5856 lenn =
strlen((
char *) token2);
5865 str[lenn + lenp + 1] = 0;
5879 counter = xmlRegGetCounter(am);
5887 to = xmlRegStatePush(am);
5891 xmlRegStateAddTrans(am,
from, atom, to,
counter, -1);
5892 if (xmlRegAtomPush(am, atom) < 0)
5901 xmlFAGenerateEpsilonTransition(am,
from, to);
5905 xmlRegFreeAtom(atom);
5927xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5939 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5943 if (atom->valuep ==
NULL)
5955 counter = xmlRegGetCounter(am);
5963 to = xmlRegStatePush(am);
5967 xmlRegStateAddTrans(am,
from, atom, to,
counter, -1);
5968 if (xmlRegAtomPush(am, atom) < 0)
5977 xmlFAGenerateEpsilonTransition(am,
from, to);
5981 xmlRegFreeAtom(atom);
6005xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6018 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6021 if ((token2 ==
NULL) || (*token2 == 0)) {
6023 if (atom->valuep ==
NULL)
6029 lenn =
strlen((
char *) token2);
6038 str[lenn + lenp + 1] = 0;
6043 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6049 counter = xmlRegGetCounter(am);
6052 am->counters[
counter].min = 1;
6053 am->counters[
counter].max = 1;
6057 to = xmlRegStatePush(am);
6061 xmlRegStateAddTrans(am,
from, atom, to,
counter, -1);
6062 if (xmlRegAtomPush(am, atom) < 0)
6068 xmlRegFreeAtom(atom);
6093xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6105 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6110 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6116 counter = xmlRegGetCounter(am);
6119 am->counters[
counter].min = 1;
6120 am->counters[
counter].max = 1;
6124 to = xmlRegStatePush(am);
6128 xmlRegStateAddTrans(am,
from, atom, to,
counter, -1);
6129 if (xmlRegAtomPush(am, atom) < 0)
6135 xmlRegFreeAtom(atom);
6148xmlAutomataNewState(xmlAutomataPtr am) {
6151 return(xmlRegStatePush(am));
6167xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6168 xmlAutomataStatePtr to) {
6171 xmlFAGenerateEpsilonTransition(am,
from, to);
6192xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6193 xmlAutomataStatePtr to,
int lax) {
6196 xmlFAGenerateAllTransition(am,
from, to, lax);
6213xmlAutomataNewCounter(xmlAutomataPtr am,
int min,
int max) {
6219 ret = xmlRegGetCounter(am);
6222 am->counters[
ret].min =
min;
6223 am->counters[
ret].max =
max;
6241xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6242 xmlAutomataStatePtr to,
int counter) {
6245 xmlFAGenerateCountedEpsilonTransition(am,
from, to,
counter);
6265xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr
from,
6266 xmlAutomataStatePtr to,
int counter) {
6269 xmlFAGenerateCountedTransition(am,
from, to,
counter);
6285xmlAutomataCompile(xmlAutomataPtr am) {
6288 if ((am ==
NULL) || (am->error != 0))
return(
NULL);
6289 xmlFAEliminateEpsilonTransitions(am);
6291 ret = xmlRegEpxFromParse(am);
6305xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6311 ret = xmlFAComputesDeterminism(am);
6316#ifdef LIBXML_EXPR_ENABLED
6330 xmlExpNodePtr *
table;
6351xmlExpNewCtxt(
int maxNodes,
xmlDictPtr dict) {
6355 if (maxNodes <= 4096)
6364 ret->maxNodes = maxNodes;
6392xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6396 if (ctxt->table !=
NULL)
6406#define MAX_NODES 10000
6422 XML_EXP_NILABLE = (1 << 0)
6425#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6433 xmlExpNodePtr exp_left;
6441 xmlExpNodePtr f_right;
6447#define exp_min field.count.f_min
6448#define exp_max field.count.f_max
6450#define exp_right field.children.f_right
6451#define exp_str field.f_str
6453static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType
type);
6454static xmlExpNode forbiddenExpNode = {
6455 XML_EXP_FORBID, 0, 0, 0, 0,
NULL,
NULL, {{ 0, 0}}
6457xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6458static xmlExpNode emptyExpNode = {
6459 XML_EXP_EMPTY, 1, 0, 0, 0,
NULL,
NULL, {{ 0, 0}}
6461xmlExpNodePtr emptyExp = &emptyExpNode;
6473static unsigned short
6475 unsigned short value = 0
L;
6479 value += 30 * (*name);
6480 while ((
ch = *
name++) != 0) {
6491static unsigned short
6492xmlExpHashComputeKey(xmlExpNodeType
type, xmlExpNodePtr
left,
6493 xmlExpNodePtr
right) {
6494 unsigned long value;
6523xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType
type) {
6526 if (ctxt->nb_nodes >= MAX_NODES)
6550xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType
type,
6551 xmlExpNodePtr
left, xmlExpNodePtr
right,
6553 unsigned short kbase,
key;
6554 xmlExpNodePtr
entry;
6555 xmlExpNodePtr insert;
6563 if (
type == XML_EXP_ATOM) {
6564 kbase = xmlExpHashNameComputeKey(
name);
6565 }
else if (
type == XML_EXP_COUNT) {
6573 xmlExpFree(ctxt,
left);
6578 xmlExpFree(ctxt,
left);
6579 return(forbiddenExp);
6586 }
else if (
type == XML_EXP_OR) {
6588 if (
left->type == XML_EXP_FORBID) {
6589 xmlExpFree(ctxt,
left);
6592 if (
right->type == XML_EXP_FORBID) {
6593 xmlExpFree(ctxt,
right);
6600 xmlExpFree(ctxt,
right);
6605 if ((
left->type == XML_EXP_OR) && (
right->type != XML_EXP_OR)) {
6606 xmlExpNodePtr tmp =
left;
6612 if (
right->type == XML_EXP_OR) {
6615 xmlExpFree(ctxt,
left);
6621 if (
left->type == XML_EXP_OR) {
6625 if ((
left->exp_right->type != XML_EXP_OR) &&
6626 (
left->exp_right->key <
left->exp_left->key)) {
6627 tmp =
left->exp_right;
6629 left->exp_left = tmp;
6631 left->exp_right->ref++;
6632 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
left->exp_right,
right,
6634 left->exp_left->ref++;
6635 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
left->exp_left, tmp,
6638 xmlExpFree(ctxt,
left);
6641 if (
right->type == XML_EXP_OR) {
6644 if (
left->key >
right->exp_right->key) {
6646 right->exp_right->ref++;
6647 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
right->exp_right,
6649 right->exp_left->ref++;
6650 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
right->exp_left,
6652 xmlExpFree(ctxt,
right);
6659 right->exp_right->ref++;
6660 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
left,
6662 right->exp_left->ref++;
6663 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
right->exp_left,
6665 xmlExpFree(ctxt,
right);
6671 xmlExpNodePtr tmp =
left;
6676 }
else if (
type == XML_EXP_SEQ) {
6678 if (
left->type == XML_EXP_FORBID) {
6679 xmlExpFree(ctxt,
right);
6682 if (
right->type == XML_EXP_FORBID) {
6683 xmlExpFree(ctxt,
left);
6687 if (
right->type == XML_EXP_EMPTY) {
6690 if (
left->type == XML_EXP_EMPTY) {
6697 key = kbase % ctxt->size;
6698 if (ctxt->table[
key] !=
NULL) {
6699 for (insert = ctxt->table[
key]; insert !=
NULL;
6700 insert = insert->
next) {
6701 if ((insert->key == kbase) &&
6702 (insert->type ==
type)) {
6703 if (
type == XML_EXP_ATOM) {
6704 if (
name == insert->exp_str) {
6708 }
else if (
type == XML_EXP_COUNT) {
6709 if ((insert->exp_min ==
min) && (insert->exp_max ==
max) &&
6710 (insert->exp_left ==
left)) {
6715 }
else if ((insert->exp_left ==
left) &&
6716 (insert->exp_right ==
right)) {
6730 if (
type == XML_EXP_ATOM) {
6733 }
else if (
type == XML_EXP_COUNT) {
6737 if ((
min == 0) || (IS_NILLABLE(
left)))
6738 entry->info |= XML_EXP_NILABLE;
6746 if (
type == XML_EXP_OR) {
6747 if ((IS_NILLABLE(
left)) || (IS_NILLABLE(
right)))
6748 entry->info |= XML_EXP_NILABLE;
6749 if ((
entry->exp_left->c_max == -1) ||
6750 (
entry->exp_right->c_max == -1))
6752 else if (
entry->exp_left->c_max >
entry->exp_right->c_max)
6757 if ((IS_NILLABLE(
left)) && (IS_NILLABLE(
right)))
6758 entry->info |= XML_EXP_NILABLE;
6759 if ((
entry->exp_left->c_max == -1) ||
6760 (
entry->exp_right->c_max == -1))
6784xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp) {
6785 if ((
exp ==
NULL) || (
exp == forbiddenExp) || (
exp == emptyExp))
6788 if (
exp->ref == 0) {
6792 key =
exp->key % ctxt->size;
6793 if (ctxt->table[
key] ==
exp) {
6794 ctxt->table[
key] =
exp->next;
6798 tmp = ctxt->table[
key];
6799 while (tmp !=
NULL) {
6800 if (tmp->next ==
exp) {
6801 tmp->next =
exp->next;
6808 if ((
exp->type == XML_EXP_SEQ) || (
exp->type == XML_EXP_OR)) {
6809 xmlExpFree(ctxt,
exp->exp_left);
6810 xmlExpFree(ctxt,
exp->exp_right);
6811 }
else if (
exp->type == XML_EXP_COUNT) {
6812 xmlExpFree(ctxt,
exp->exp_left);
6826xmlExpRef(xmlExpNodePtr
exp) {
6848 return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM,
NULL,
NULL,
name, 0, 0));
6865xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr
left, xmlExpNodePtr
right) {
6869 xmlExpFree(ctxt,
left);
6870 xmlExpFree(ctxt,
right);
6873 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR,
left,
right,
NULL, 0, 0));
6890xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr
left, xmlExpNodePtr
right) {
6894 xmlExpFree(ctxt,
left);
6895 xmlExpFree(ctxt,
right);
6898 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ,
left,
right,
NULL, 0, 0));
6916xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset,
int min,
int max) {
6919 if ((subset ==
NULL) || (
min < 0) || (
max < -1) ||
6921 xmlExpFree(ctxt, subset);
6924 return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6935xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
6939 switch (
exp->type) {
6943 for (tmp = 0;tmp < nb;tmp++)
6944 if (
list[tmp] ==
exp->exp_str)
6955 tmp = xmlExpGetLanguageInt(ctxt,
exp->exp_left,
list,
len, nb);
6958 tmp2 = xmlExpGetLanguageInt(ctxt,
exp->exp_right,
list,
len,
6980xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
6984 return(xmlExpGetLanguageInt(ctxt,
exp, langList,
len, 0));
6988xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
6992 switch (
exp->type) {
6993 case XML_EXP_FORBID:
6998 for (tmp = 0;tmp < nb;tmp++)
6999 if (
list[tmp] ==
exp->exp_str)
7009 tmp = xmlExpGetStartInt(ctxt,
exp->exp_left,
list,
len, nb);
7012 if (IS_NILLABLE(
exp->exp_left)) {
7013 tmp2 = xmlExpGetStartInt(ctxt,
exp->exp_right,
list,
len,
7021 tmp = xmlExpGetStartInt(ctxt,
exp->exp_left,
list,
len, nb);
7024 tmp2 = xmlExpGetStartInt(ctxt,
exp->exp_right,
list,
len,
7048xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
7052 return(xmlExpGetStartInt(ctxt,
exp, tokList,
len, 0));
7064xmlExpIsNillable(xmlExpNodePtr
exp) {
7067 return(IS_NILLABLE(
exp) != 0);
7071xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
const xmlChar *
str)
7075 switch (
exp->type) {
7077 return(forbiddenExp);
7078 case XML_EXP_FORBID:
7079 return(forbiddenExp);
7081 if (
exp->exp_str ==
str) {
7091 tmp = xmlExpStringDeriveInt(ctxt,
exp->exp_left,
str);
7095 ret = xmlExpStringDeriveInt(ctxt,
exp->exp_right,
str);
7097 xmlExpFree(ctxt, tmp);
7100 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp,
ret,
7105 ret = xmlExpStringDeriveInt(ctxt,
exp->exp_left,
str);
7108 }
else if (
ret == forbiddenExp) {
7109 if (IS_NILLABLE(
exp->exp_left)) {
7110 ret = xmlExpStringDeriveInt(ctxt,
exp->exp_right,
str);
7113 exp->exp_right->ref++;
7114 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ,
ret,
exp->exp_right,
7118 case XML_EXP_COUNT: {
7122 if (
exp->exp_max == 0)
7123 return(forbiddenExp);
7124 ret = xmlExpStringDeriveInt(ctxt,
exp->exp_left,
str);
7127 if (
ret == forbiddenExp) {
7130 if (
exp->exp_max == 1)
7132 if (
exp->exp_max < 0)
7136 if (
exp->exp_min > 0)
7140 exp->exp_left->ref++;
7141 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
exp->exp_left,
NULL,
7143 if (
ret == emptyExp) {
7146 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ,
ret, tmp,
7166xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
7179 return(forbiddenExp);
7181 return(xmlExpStringDeriveInt(ctxt,
exp,
input));
7185xmlExpCheckCard(xmlExpNodePtr
exp, xmlExpNodePtr sub) {
7188 if (sub->c_max == -1) {
7189 if (
exp->c_max != -1)
7191 }
else if ((
exp->c_max >= 0) && (
exp->c_max < sub->c_max)) {
7195 if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(
exp)))
7201static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp,
7219xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp, xmlExpNodePtr sub,
7220 xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7222 xmlExpNodePtr tmp, tmp2;
7225 if (remain !=
NULL) *remain =
NULL;
7226 if (
exp->c_max == -1)
return(0);
7227 if (IS_NILLABLE(
exp) && (!IS_NILLABLE(sub)))
return(0);
7229 for (
i = 1;
i <=
exp->c_max;
i++) {
7231 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7236 if (!xmlExpCheckCard(tmp,
exp)) {
7237 xmlExpFree(ctxt, tmp);
7240 tmp2 = xmlExpExpDeriveInt(ctxt, tmp,
exp);
7242 xmlExpFree(ctxt, tmp);
7245 if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7249 xmlExpFree(ctxt, tmp2);
7253 xmlExpFree(ctxt, tmp);
7256 xmlExpFree(ctxt, tmp);
7257 xmlExpFree(ctxt, tmp2);
7274xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp, xmlExpNodePtr sub) {
7275 xmlExpNodePtr
ret, tmp, tmp2, tmp3;
7283 if ((
exp == sub) && (
exp->c_max >= 0)) {
7289 if (sub->type == XML_EXP_EMPTY) {
7293 if (sub->type == XML_EXP_SEQ) {
7294 tmp = xmlExpExpDeriveInt(ctxt,
exp, sub->exp_left);
7297 if (tmp == forbiddenExp)
7299 ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7300 xmlExpFree(ctxt, tmp);
7303 if (sub->type == XML_EXP_OR) {
7304 tmp = xmlExpExpDeriveInt(ctxt,
exp, sub->exp_left);
7305 if (tmp == forbiddenExp)
7309 ret = xmlExpExpDeriveInt(ctxt,
exp, sub->exp_right);
7310 if ((
ret ==
NULL) || (
ret == forbiddenExp)) {
7311 xmlExpFree(ctxt, tmp);
7314 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp,
ret,
NULL, 0, 0));
7316 if (!xmlExpCheckCard(
exp, sub)) {
7317 return(forbiddenExp);
7319 switch (
exp->type) {
7321 if (sub == emptyExp)
7323 return(forbiddenExp);
7324 case XML_EXP_FORBID:
7325 return(forbiddenExp);
7327 if (sub->type == XML_EXP_ATOM) {
7329 if (
exp->exp_str == sub->exp_str) {
7332 return(forbiddenExp);
7334 if ((sub->type == XML_EXP_COUNT) &&
7335 (sub->exp_max == 1) &&
7336 (sub->exp_left->type == XML_EXP_ATOM)) {
7338 if (
exp->exp_str == sub->exp_left->exp_str) {
7341 return(forbiddenExp);
7343 return(forbiddenExp);
7346 if (xmlExpCheckCard(
exp->exp_left, sub)) {
7348 ret = xmlExpExpDeriveInt(ctxt,
exp->exp_left, sub);
7349 if ((
ret != forbiddenExp) && (
ret !=
NULL)) {
7357 exp->exp_right->ref++;
7358 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ,
ret,
7363 if (sub->type == XML_EXP_COUNT) {
7366 ret = xmlExpExpDeriveInt(ctxt,
exp->exp_left, sub->exp_left);
7369 if (
ret != forbiddenExp) {
7370 if (sub->exp_max < 0)
7373 max = sub->exp_max -1;
7374 if (sub->exp_min > 0)
7375 min = sub->exp_min -1;
7378 exp->exp_right->ref++;
7379 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ,
ret,
7384 sub->exp_left->ref++;
7385 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7388 xmlExpFree(ctxt, tmp);
7391 ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7392 xmlExpFree(ctxt, tmp);
7393 xmlExpFree(ctxt, tmp2);
7400 ret = xmlExpExpDeriveInt(ctxt,
exp->exp_left, sub);
7403 tmp = xmlExpExpDeriveInt(ctxt,
exp->exp_right, sub);
7405 xmlExpFree(ctxt,
ret);
7408 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR,
ret, tmp,
NULL, 0, 0));
7409 case XML_EXP_COUNT: {
7412 if (sub->type == XML_EXP_COUNT) {
7416 tmp = xmlExpExpDeriveInt(ctxt,
exp->exp_left, sub->exp_left);
7419 if (tmp == forbiddenExp) {
7422 mult = xmlExpDivide(ctxt, sub->exp_left,
exp->exp_left,
7425 return(forbiddenExp);
7427 if (sub->exp_max == -1) {
7429 if (
exp->exp_max == -1) {
7430 if (
exp->exp_min <= sub->exp_min * mult)
7433 min =
exp->exp_min - sub->exp_min * mult;
7435 xmlExpFree(ctxt, tmp);
7436 return(forbiddenExp);
7439 if (
exp->exp_max == -1) {
7440 if (
exp->exp_min > sub->exp_min * mult) {
7442 min =
exp->exp_min - sub->exp_min * mult;
7448 if (
exp->exp_max < sub->exp_max * mult) {
7449 xmlExpFree(ctxt, tmp);
7450 return(forbiddenExp);
7452 if (sub->exp_max * mult >
exp->exp_min)
7455 min =
exp->exp_min - sub->exp_max * mult;
7456 max =
exp->exp_max - sub->exp_max * mult;
7459 }
else if (!IS_NILLABLE(tmp)) {
7464 xmlExpFree(ctxt, tmp);
7465 return(forbiddenExp);
7466 }
else if (sub->exp_max == -1) {
7467 if (
exp->exp_max == -1) {
7468 if (
exp->exp_min <= sub->exp_min) {
7473 min =
exp->exp_min - sub->exp_min;
7475 }
else if (
exp->exp_min > sub->exp_min) {
7476 xmlExpFree(ctxt, tmp);
7477 return(forbiddenExp);
7483 if (
exp->exp_max == -1) {
7484 if (
exp->exp_min > sub->exp_min) {
7486 min =
exp->exp_min - sub->exp_min;
7492 if (
exp->exp_max < sub->exp_max) {
7493 xmlExpFree(ctxt, tmp);
7494 return(forbiddenExp);
7496 if (sub->exp_max >
exp->exp_min)
7499 min =
exp->exp_min - sub->exp_max;
7500 max =
exp->exp_max - sub->exp_max;
7503 exp->exp_left->ref++;
7504 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
exp->exp_left,
7509 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7513 tmp = xmlExpExpDeriveInt(ctxt,
exp->exp_left, sub);
7516 if (tmp == forbiddenExp) {
7517 return(forbiddenExp);
7519 if (
exp->exp_min > 0)
7523 if (
exp->exp_max < 0)
7528 exp->exp_left->ref++;
7529 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
exp->exp_left,
7533 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7539 if (IS_NILLABLE(sub)) {
7540 if (!(IS_NILLABLE(
exp)))
7541 return(forbiddenExp);
7550 if (ctxt->tabSize == 0)
7562 len = xmlExpGetStartInt(ctxt, sub,
tab, ctxt->tabSize, 0);
7573 len = xmlExpGetStartInt(ctxt, sub,
tab, ctxt->tabSize, 0);
7575 for (
i = 0;
i <
len;
i++) {
7576 tmp = xmlExpStringDeriveInt(ctxt,
exp,
tab[
i]);
7577 if ((tmp ==
NULL) || (tmp == forbiddenExp)) {
7578 xmlExpFree(ctxt,
ret);
7582 tmp2 = xmlExpStringDeriveInt(ctxt, sub,
tab[
i]);
7583 if ((tmp2 ==
NULL) || (tmp2 == forbiddenExp)) {
7584 xmlExpFree(ctxt, tmp);
7585 xmlExpFree(ctxt,
ret);
7589 tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7590 xmlExpFree(ctxt, tmp);
7591 xmlExpFree(ctxt, tmp2);
7593 if ((tmp3 ==
NULL) || (tmp3 == forbiddenExp)) {
7594 xmlExpFree(ctxt,
ret);
7602 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR,
ret, tmp3,
NULL, 0, 0);
7628xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp, xmlExpNodePtr sub) {
7635 if (IS_NILLABLE(sub) && (!IS_NILLABLE(
exp))) {
7636 return(forbiddenExp);
7638 if (xmlExpCheckCard(
exp, sub) == 0) {
7639 return(forbiddenExp);
7641 return(xmlExpExpDeriveInt(ctxt,
exp, sub));
7656xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr
exp, xmlExpNodePtr sub) {
7669 if (IS_NILLABLE(sub) && (!IS_NILLABLE(
exp))) {
7672 if (xmlExpCheckCard(
exp, sub) == 0) {
7675 tmp = xmlExpExpDeriveInt(ctxt,
exp, sub);
7678 if (tmp == forbiddenExp)
7680 if (tmp == emptyExp)
7682 if ((tmp !=
NULL) && (IS_NILLABLE(tmp))) {
7683 xmlExpFree(ctxt, tmp);
7686 xmlExpFree(ctxt, tmp);
7696static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7699#define CUR (*ctxt->cur)
7701#define NEXT ctxt->cur++;
7703#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7704#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7707xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7715 if ((
CUR <
'0') || (
CUR >
'9'))
7717 while ((
CUR >=
'0') && (
CUR <=
'9')) {
7725xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7732 if (*ctxt->cur ==
'(') {
7734 ret = xmlExpParseExpr(ctxt);
7736 if (*ctxt->cur !=
')') {
7738 xmlExpFree(ctxt,
ret);
7743 goto parse_quantifier;
7746 (
CUR !=
')') && (
CUR !=
'|') && (
CUR !=
',') && (
CUR !=
'{') &&
7747 (
CUR !=
'*') && (
CUR !=
'+') && (
CUR !=
'?') && (
CUR !=
'}'))
7752 ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM,
NULL,
NULL,
val, 0, 0);
7761 min = xmlExpParseNumber(ctxt);
7763 xmlExpFree(ctxt,
ret);
7769 max = xmlExpParseNumber(ctxt);
7774 xmlExpFree(ctxt,
ret);
7781 }
else if (
CUR ==
'?') {
7786 }
else if (
CUR ==
'+') {
7791 }
else if (
CUR ==
'*') {
7802xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7805 ret = xmlExpParseOr(ctxt);
7807 while (
CUR ==
'|') {
7809 right = xmlExpParseOr(ctxt);
7811 xmlExpFree(ctxt,
ret);
7822xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
7825 ret = xmlExpParseSeq(ctxt);
7827 while (
CUR ==
',') {
7829 right = xmlExpParseSeq(ctxt);
7831 xmlExpFree(ctxt,
ret);
7859xmlExpParse(xmlExpCtxtPtr ctxt,
const char *
expr) {
7865 ret = xmlExpParseExpr(ctxt);
7867 if (*ctxt->cur != 0) {
7868 xmlExpFree(ctxt,
ret);
7875xmlExpDumpInt(xmlBufferPtr
buf, xmlExpNodePtr
expr,
int glob) {
7879 if (
glob) xmlBufferWriteChar(
buf,
"(");
7882 xmlBufferWriteChar(
buf,
"empty");
7884 case XML_EXP_FORBID:
7885 xmlBufferWriteChar(
buf,
"forbidden");
7888 xmlBufferWriteCHAR(
buf,
expr->exp_str);
7892 if ((
c->type == XML_EXP_SEQ) || (
c->type == XML_EXP_OR))
7893 xmlExpDumpInt(
buf,
c, 1);
7895 xmlExpDumpInt(
buf,
c, 0);
7896 xmlBufferWriteChar(
buf,
" , ");
7897 c =
expr->exp_right;
7898 if ((
c->type == XML_EXP_SEQ) || (
c->type == XML_EXP_OR))
7899 xmlExpDumpInt(
buf,
c, 1);
7901 xmlExpDumpInt(
buf,
c, 0);
7905 if ((
c->type == XML_EXP_SEQ) || (
c->type == XML_EXP_OR))
7906 xmlExpDumpInt(
buf,
c, 1);
7908 xmlExpDumpInt(
buf,
c, 0);
7909 xmlBufferWriteChar(
buf,
" | ");
7910 c =
expr->exp_right;
7911 if ((
c->type == XML_EXP_SEQ) || (
c->type == XML_EXP_OR))
7912 xmlExpDumpInt(
buf,
c, 1);
7914 xmlExpDumpInt(
buf,
c, 0);
7916 case XML_EXP_COUNT: {
7920 if ((
c->type == XML_EXP_SEQ) || (
c->type == XML_EXP_OR))
7921 xmlExpDumpInt(
buf,
c, 1);
7923 xmlExpDumpInt(
buf,
c, 0);
7924 if ((
expr->exp_min == 0) && (
expr->exp_max == 1)) {
7927 }
else if ((
expr->exp_min == 0) && (
expr->exp_max == -1)) {
7930 }
else if ((
expr->exp_min == 1) && (
expr->exp_max == -1)) {
7933 }
else if (
expr->exp_max ==
expr->exp_min) {
7935 }
else if (
expr->exp_max < 0) {
7941 xmlBufferWriteChar(
buf, rep);
7948 xmlBufferWriteChar(
buf,
")");
7958xmlExpDump(xmlBufferPtr
buf, xmlExpNodePtr
expr) {
7973xmlExpMaxToken(xmlExpNodePtr
expr) {
7976 return(
expr->c_max);
7988xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
7991 return(ctxt->nb_nodes);
8003xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8006 return(ctxt->nb_cons);
struct outqueuenode * tail
INT copy(TCHAR source[MAX_PATH], TCHAR dest[MAX_PATH], INT append, DWORD lpdwFlags, BOOL bTouch)
_In_ fcb _In_ chunk _In_ uint64_t _In_ uint64_t _In_ bool _In_opt_ void _In_opt_ PIRP _In_ LIST_ENTRY * rollback
static WCHAR no[MAX_STRING_RESOURCE_LEN]
int WINAPIV fprintf(FILE *file, const char *format,...)
_ACRTIMP size_t __cdecl strlen(const char *)
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
char ** glob(const char *v)
GLint GLint GLsizei GLsizei GLsizei depth
GLuint GLuint GLsizei count
GLuint GLuint GLsizei GLenum type
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
GLdouble GLdouble GLdouble GLdouble top
GLenum GLuint GLenum GLsizei const GLchar * buf
GLboolean GLenum GLenum GLvoid * values
GLint GLint GLsizei GLuint * counters
GLenum GLenum GLenum input
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat token
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
#define memcpy(s1, s2, n)
static IPrintDialogCallback callback
static unsigned __int64 next
void xmlDictFree(xmlDictPtr dict)
const xmlChar * xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len)
int xmlDictReference(xmlDictPtr dict)
xmlDictPtr xmlDictCreate(void)
const xmlChar * xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len)
xmlReallocFunc xmlRealloc
xmlMallocFunc xmlMallocAtomic
XML_HIDDEN void __xmlRaiseError(xmlStructuredErrorFunc schannel, xmlGenericErrorFunc channel, void *data, void *ctx, void *nod, int domain, int code, xmlErrorLevel level, const char *file, int line, const char *str1, const char *str2, const char *str3, int int1, int col, const char *msg,...) LIBXML_ATTR_FORMAT(16
XML_HIDDEN void xmlAutomataSetFlags(xmlAutomataPtr am, int flags)
SOCKET WSAAPI accept(IN SOCKET s, OUT LPSOCKADDR addr, OUT INT FAR *addrlen)
static struct wctab tab[]
@ XML_REGEXP_COMPILE_ERROR
XMLPUBFUN xmlChar * xmlStrndup(const xmlChar *cur, int len)
XMLPUBFUN int XMLPUBFUN int XMLPUBFUN int xmlGetUTF8Char(const unsigned char *utf, int *len)
XMLPUBFUN int xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
XMLPUBFUN const xmlChar * xmlStrchr(const xmlChar *str, xmlChar val)
XMLPUBFUN xmlChar * xmlStrdup(const xmlChar *cur)