20#ifdef LIBXML_REGEXP_ENABLED
35#define SIZE_MAX ((size_t) -1)
43#define MAX_PUSH 10000000
49 ctxt->error = XML_REGEXP_COMPILE_ERROR; \
50 xmlRegexpErrCompile(ctxt, str);
51#define NEXT ctxt->cur++
52#define CUR (*(ctxt->cur))
53#define NXT(index) (ctxt->cur[index])
55#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
56#define NEXTL(l) ctxt->cur += l;
57#define XML_REG_STRING_SEPARATOR '|'
62#define PREV (ctxt->cur[-1])
70 xmlGenericError(xmlGenericErrorContext, \
71 "Unimplemented block at %s:%d\n", \
84 XML_REGEXP_EPSILON = 1,
93 XML_REGEXP_NOTINITNAME,
95 XML_REGEXP_NOTNAMECHAR,
97 XML_REGEXP_NOTDECIMAL,
99 XML_REGEXP_NOTREALCHAR,
100 XML_REGEXP_LETTER = 100,
101 XML_REGEXP_LETTER_UPPERCASE,
102 XML_REGEXP_LETTER_LOWERCASE,
103 XML_REGEXP_LETTER_TITLECASE,
104 XML_REGEXP_LETTER_MODIFIER,
105 XML_REGEXP_LETTER_OTHERS,
107 XML_REGEXP_MARK_NONSPACING,
108 XML_REGEXP_MARK_SPACECOMBINING,
109 XML_REGEXP_MARK_ENCLOSING,
111 XML_REGEXP_NUMBER_DECIMAL,
112 XML_REGEXP_NUMBER_LETTER,
113 XML_REGEXP_NUMBER_OTHERS,
115 XML_REGEXP_PUNCT_CONNECTOR,
116 XML_REGEXP_PUNCT_DASH,
117 XML_REGEXP_PUNCT_OPEN,
118 XML_REGEXP_PUNCT_CLOSE,
119 XML_REGEXP_PUNCT_INITQUOTE,
120 XML_REGEXP_PUNCT_FINQUOTE,
121 XML_REGEXP_PUNCT_OTHERS,
123 XML_REGEXP_SEPAR_SPACE,
124 XML_REGEXP_SEPAR_LINE,
125 XML_REGEXP_SEPAR_PARA,
127 XML_REGEXP_SYMBOL_MATH,
128 XML_REGEXP_SYMBOL_CURRENCY,
129 XML_REGEXP_SYMBOL_MODIFIER,
130 XML_REGEXP_SYMBOL_OTHERS,
132 XML_REGEXP_OTHER_CONTROL,
133 XML_REGEXP_OTHER_FORMAT,
134 XML_REGEXP_OTHER_PRIVATE,
136 XML_REGEXP_BLOCK_NAME
140 XML_REGEXP_QUANT_EPSILON = 1,
141 XML_REGEXP_QUANT_ONCE,
142 XML_REGEXP_QUANT_OPT,
143 XML_REGEXP_QUANT_MULT,
144 XML_REGEXP_QUANT_PLUS,
145 XML_REGEXP_QUANT_ONCEONLY,
146 XML_REGEXP_QUANT_ALL,
147 XML_REGEXP_QUANT_RANGE
151 XML_REGEXP_START_STATE = 1,
152 XML_REGEXP_FINAL_STATE,
153 XML_REGEXP_TRANS_STATE,
154 XML_REGEXP_SINK_STATE,
155 XML_REGEXP_UNREACH_STATE
159 XML_REGEXP_MARK_NORMAL = 0,
160 XML_REGEXP_MARK_START,
161 XML_REGEXP_MARK_VISITED
164typedef struct _xmlRegRange xmlRegRange;
165typedef xmlRegRange *xmlRegRangePtr;
175typedef struct _xmlRegAtom xmlRegAtom;
176typedef xmlRegAtom *xmlRegAtomPtr;
178typedef struct _xmlAutomataState xmlRegState;
179typedef xmlRegState *xmlRegStatePtr;
184 xmlRegQuantType quant;
192 xmlRegStatePtr
start;
193 xmlRegStatePtr start0;
197 xmlRegRangePtr *ranges;
201typedef struct _xmlRegCounter xmlRegCounter;
202typedef xmlRegCounter *xmlRegCounterPtr;
204struct _xmlRegCounter {
209typedef struct _xmlRegTrans xmlRegTrans;
210typedef xmlRegTrans *xmlRegTransPtr;
220struct _xmlAutomataState {
221 xmlRegStateType
type;
222 xmlRegMarkedType mark;
223 xmlRegMarkedType markd;
224 xmlRegMarkedType reached;
235typedef struct _xmlAutomata xmlRegParserCtxt;
236typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
238#define AM_AUTOMATA_RNG 1
247 xmlRegStatePtr
start;
249 xmlRegStatePtr
state;
255 xmlRegAtomPtr *atoms;
259 xmlRegStatePtr *states;
275 xmlRegStatePtr *states;
277 xmlRegAtomPtr *atoms;
292typedef struct _xmlRegExecRollback xmlRegExecRollback;
293typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
295struct _xmlRegExecRollback {
296 xmlRegStatePtr
state;
302typedef struct _xmlRegInputToken xmlRegInputToken;
303typedef xmlRegInputToken *xmlRegInputTokenPtr;
305struct _xmlRegInputToken {
310struct _xmlRegExecCtxt {
317 xmlRegStatePtr
state;
326 xmlRegExecRollback *rollbacks;
341 xmlRegInputTokenPtr inputStack;
347 xmlRegStatePtr errState;
353#define REGEXP_ALL_COUNTER 0x123456
354#define REGEXP_ALL_LAX_COUNTER 0x123457
356static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt,
int top);
357static void xmlRegFreeState(xmlRegStatePtr
state);
358static void xmlRegFreeAtom(xmlRegAtomPtr atom);
359static int xmlRegStrEqualWildcard(
const xmlChar *expStr,
const xmlChar *valStr);
360static int xmlRegCheckCharacter(xmlRegAtomPtr atom,
int codepoint);
361static int xmlRegCheckCharacterRange(xmlRegAtomType
type,
int codepoint,
364void xmlAutomataSetFlags(xmlAutomataPtr am,
int flags);
378xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt,
const char *
extra)
380 const char *regexp =
NULL;
382 regexp = (
const char *) ctxt->string;
388 "Memory allocation failed : %s\n",
extra);
398xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt,
const char *
extra)
400 const char *regexp =
NULL;
404 regexp = (
const char *) ctxt->string;
405 idx = ctxt->cur - ctxt->string;
411 "failed to compile: %s\n",
extra);
420static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
433xmlRegCalloc2(
size_t dim1,
size_t dim2,
size_t elemSize) {
438 if (dim1 >
SIZE_MAX / dim2 / elemSize)
440 totalSize = dim1 * dim2 * elemSize;
456xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
461 xmlRegexpErrMemory(ctxt,
"compiling regexp");
465 ret->string = ctxt->string;
466 ret->nbStates = ctxt->nbStates;
467 ret->states = ctxt->states;
468 ret->nbAtoms = ctxt->nbAtoms;
469 ret->atoms = ctxt->atoms;
470 ret->nbCounters = ctxt->nbCounters;
471 ret->counters = ctxt->counters;
472 ret->determinist = ctxt->determinist;
473 ret->flags = ctxt->flags;
474 if (
ret->determinist == -1) {
475 xmlRegexpIsDeterminist(
ret);
478 if ((
ret->determinist != 0) &&
479 (
ret->nbCounters == 0) &&
483 (
ret->atoms[0]->type == XML_REGEXP_STRING)) {
484 int i,
j, nbstates = 0, nbatoms = 0;
501 if (stateRemap ==
NULL) {
502 xmlRegexpErrMemory(ctxt,
"compiling regexp");
506 for (
i = 0;
i <
ret->nbStates;
i++) {
508 stateRemap[
i] = nbstates;
514#ifdef DEBUG_COMPACTION
515 printf(
"Final: %d states\n", nbstates);
518 if (stringMap ==
NULL) {
519 xmlRegexpErrMemory(ctxt,
"compiling regexp");
525 if (stringRemap ==
NULL) {
526 xmlRegexpErrMemory(ctxt,
"compiling regexp");
532 for (
i = 0;
i <
ret->nbAtoms;
i++) {
533 if ((
ret->atoms[
i]->type == XML_REGEXP_STRING) &&
534 (
ret->atoms[
i]->quant == XML_REGEXP_QUANT_ONCE)) {
536 for (
j = 0;
j < nbatoms;
j++) {
543 stringRemap[
i] = nbatoms;
545 if (stringMap[nbatoms] ==
NULL) {
546 for (
i = 0;
i < nbatoms;
i++)
559 for (
i = 0;
i < nbatoms;
i++)
566#ifdef DEBUG_COMPACTION
567 printf(
"Final: %d atoms\n", nbatoms);
569 transitions = (
int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
571 if (transitions ==
NULL) {
574 for (
i = 0;
i < nbatoms;
i++)
587 for (
i = 0;
i <
ret->nbStates;
i++) {
588 int stateno, atomno, targetno, prev;
589 xmlRegStatePtr
state;
590 xmlRegTransPtr trans;
592 stateno = stateRemap[
i];
597 transitions[stateno * (nbatoms + 1)] =
state->type;
600 trans = &(
state->trans[
j]);
601 if ((trans->to == -1) || (trans->atom ==
NULL))
603 atomno = stringRemap[trans->atom->no];
604 if ((trans->atom->data !=
NULL) && (transdata ==
NULL)) {
605 transdata = (
void **) xmlRegCalloc2(nbstates, nbatoms,
607 if (transdata ==
NULL) {
608 xmlRegexpErrMemory(ctxt,
"compiling regexp");
612 targetno = stateRemap[trans->to];
618 prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
620 if (prev != targetno + 1) {
621 ret->determinist = 0;
622#ifdef DEBUG_COMPACTION
623 printf(
"Indet: state %d trans %d, atom %d to %d : %d to %d\n",
624 i,
j, trans->atom->no, trans->to, atomno, targetno);
625 printf(
" previous to is %d\n", prev);
627 if (transdata !=
NULL)
632 for (
i = 0;
i < nbatoms;
i++)
639 printf(
"State %d trans %d: atom %d to %d : %d to %d\n",
640 i,
j, trans->atom->no, trans->to, atomno, targetno);
642 transitions[stateno * (nbatoms + 1) + atomno + 1] =
644 if (transdata !=
NULL)
645 transdata[stateno * nbatoms + atomno] =
650 ret->determinist = 1;
651#ifdef DEBUG_COMPACTION
655 for (
i = 0;
i < nbstates;
i++) {
656 for (
j = 0;
j < nbatoms + 1;
j++) {
657 printf(
"%02d ", transitions[
i * (nbatoms + 1) +
j]);
667 for (
i = 0;
i <
ret->nbStates;
i++)
668 xmlRegFreeState(
ret->states[
i]);
674 for (
i = 0;
i <
ret->nbAtoms;
i++)
675 xmlRegFreeAtom(
ret->atoms[
i]);
681 ret->compact = transitions;
682 ret->transdata = transdata;
683 ret->stringMap = stringMap;
684 ret->nbstrings = nbatoms;
685 ret->nbstates = nbstates;
695 ctxt->nbCounters = 0;
696 ctxt->counters =
NULL;
708static xmlRegParserCtxtPtr
709xmlRegNewParserCtxt(
const xmlChar *
string) {
710 xmlRegParserCtxtPtr
ret;
712 ret = (xmlRegParserCtxtPtr)
xmlMalloc(
sizeof(xmlRegParserCtxt));
715 memset(
ret, 0,
sizeof(xmlRegParserCtxt));
722 ret->determinist = -1;
739xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
745 xmlRegexpErrMemory(ctxt,
"allocating range");
762xmlRegFreeRange(xmlRegRangePtr
range) {
780xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr
range) {
793 xmlRegexpErrMemory(ctxt,
"allocating range");
794 xmlRegFreeRange(
ret);
811xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType
type) {
816 xmlRegexpErrMemory(ctxt,
"allocating atom");
821 ret->quant = XML_REGEXP_QUANT_ONCE;
834xmlRegFreeAtom(xmlRegAtomPtr atom) {
840 for (
i = 0;
i < atom->nbRanges;
i++)
841 xmlRegFreeRange(atom->ranges[
i]);
842 if (atom->ranges !=
NULL)
844 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep !=
NULL))
846 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 !=
NULL))
848 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep !=
NULL))
863xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
868 xmlRegexpErrMemory(ctxt,
"copying atom");
872 ret->type = atom->type;
873 ret->quant = atom->quant;
874 ret->min = atom->min;
875 ret->max = atom->max;
876 if (atom->nbRanges > 0) {
879 ret->ranges = (xmlRegRangePtr *)
xmlMalloc(
sizeof(xmlRegRangePtr) *
882 xmlRegexpErrMemory(ctxt,
"copying atom");
885 for (
i = 0;
i < atom->nbRanges;
i++) {
886 ret->ranges[
i] = xmlRegCopyRange(ctxt, atom->ranges[
i]);
889 ret->nbRanges =
i + 1;
900xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
905 xmlRegexpErrMemory(ctxt,
"allocating state");
909 ret->type = XML_REGEXP_TRANS_STATE;
910 ret->mark = XML_REGEXP_MARK_NORMAL;
921xmlRegFreeState(xmlRegStatePtr
state) {
939xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
944 if (ctxt->string !=
NULL)
946 if (ctxt->states !=
NULL) {
947 for (
i = 0;
i < ctxt->nbStates;
i++)
948 xmlRegFreeState(ctxt->states[
i]);
951 if (ctxt->atoms !=
NULL) {
952 for (
i = 0;
i < ctxt->nbAtoms;
i++)
953 xmlRegFreeAtom(ctxt->atoms[
i]);
956 if (ctxt->counters !=
NULL)
968xmlRegPrintAtomType(
FILE *output, xmlRegAtomType
type) {
970 case XML_REGEXP_EPSILON:
971 fprintf(output,
"epsilon ");
break;
972 case XML_REGEXP_CHARVAL:
973 fprintf(output,
"charval ");
break;
974 case XML_REGEXP_RANGES:
975 fprintf(output,
"ranges ");
break;
976 case XML_REGEXP_SUBREG:
977 fprintf(output,
"subexpr ");
break;
978 case XML_REGEXP_STRING:
979 fprintf(output,
"string ");
break;
980 case XML_REGEXP_ANYCHAR:
981 fprintf(output,
"anychar ");
break;
982 case XML_REGEXP_ANYSPACE:
983 fprintf(output,
"anyspace ");
break;
984 case XML_REGEXP_NOTSPACE:
985 fprintf(output,
"notspace ");
break;
986 case XML_REGEXP_INITNAME:
987 fprintf(output,
"initname ");
break;
988 case XML_REGEXP_NOTINITNAME:
989 fprintf(output,
"notinitname ");
break;
990 case XML_REGEXP_NAMECHAR:
991 fprintf(output,
"namechar ");
break;
992 case XML_REGEXP_NOTNAMECHAR:
993 fprintf(output,
"notnamechar ");
break;
994 case XML_REGEXP_DECIMAL:
995 fprintf(output,
"decimal ");
break;
996 case XML_REGEXP_NOTDECIMAL:
997 fprintf(output,
"notdecimal ");
break;
998 case XML_REGEXP_REALCHAR:
999 fprintf(output,
"realchar ");
break;
1000 case XML_REGEXP_NOTREALCHAR:
1001 fprintf(output,
"notrealchar ");
break;
1002 case XML_REGEXP_LETTER:
1003 fprintf(output,
"LETTER ");
break;
1004 case XML_REGEXP_LETTER_UPPERCASE:
1005 fprintf(output,
"LETTER_UPPERCASE ");
break;
1006 case XML_REGEXP_LETTER_LOWERCASE:
1007 fprintf(output,
"LETTER_LOWERCASE ");
break;
1008 case XML_REGEXP_LETTER_TITLECASE:
1009 fprintf(output,
"LETTER_TITLECASE ");
break;
1010 case XML_REGEXP_LETTER_MODIFIER:
1011 fprintf(output,
"LETTER_MODIFIER ");
break;
1012 case XML_REGEXP_LETTER_OTHERS:
1013 fprintf(output,
"LETTER_OTHERS ");
break;
1014 case XML_REGEXP_MARK:
1015 fprintf(output,
"MARK ");
break;
1016 case XML_REGEXP_MARK_NONSPACING:
1017 fprintf(output,
"MARK_NONSPACING ");
break;
1018 case XML_REGEXP_MARK_SPACECOMBINING:
1019 fprintf(output,
"MARK_SPACECOMBINING ");
break;
1020 case XML_REGEXP_MARK_ENCLOSING:
1021 fprintf(output,
"MARK_ENCLOSING ");
break;
1022 case XML_REGEXP_NUMBER:
1023 fprintf(output,
"NUMBER ");
break;
1024 case XML_REGEXP_NUMBER_DECIMAL:
1025 fprintf(output,
"NUMBER_DECIMAL ");
break;
1026 case XML_REGEXP_NUMBER_LETTER:
1027 fprintf(output,
"NUMBER_LETTER ");
break;
1028 case XML_REGEXP_NUMBER_OTHERS:
1029 fprintf(output,
"NUMBER_OTHERS ");
break;
1030 case XML_REGEXP_PUNCT:
1031 fprintf(output,
"PUNCT ");
break;
1032 case XML_REGEXP_PUNCT_CONNECTOR:
1033 fprintf(output,
"PUNCT_CONNECTOR ");
break;
1034 case XML_REGEXP_PUNCT_DASH:
1035 fprintf(output,
"PUNCT_DASH ");
break;
1036 case XML_REGEXP_PUNCT_OPEN:
1037 fprintf(output,
"PUNCT_OPEN ");
break;
1038 case XML_REGEXP_PUNCT_CLOSE:
1039 fprintf(output,
"PUNCT_CLOSE ");
break;
1040 case XML_REGEXP_PUNCT_INITQUOTE:
1041 fprintf(output,
"PUNCT_INITQUOTE ");
break;
1042 case XML_REGEXP_PUNCT_FINQUOTE:
1043 fprintf(output,
"PUNCT_FINQUOTE ");
break;
1044 case XML_REGEXP_PUNCT_OTHERS:
1045 fprintf(output,
"PUNCT_OTHERS ");
break;
1046 case XML_REGEXP_SEPAR:
1047 fprintf(output,
"SEPAR ");
break;
1048 case XML_REGEXP_SEPAR_SPACE:
1049 fprintf(output,
"SEPAR_SPACE ");
break;
1050 case XML_REGEXP_SEPAR_LINE:
1051 fprintf(output,
"SEPAR_LINE ");
break;
1052 case XML_REGEXP_SEPAR_PARA:
1053 fprintf(output,
"SEPAR_PARA ");
break;
1054 case XML_REGEXP_SYMBOL:
1055 fprintf(output,
"SYMBOL ");
break;
1056 case XML_REGEXP_SYMBOL_MATH:
1057 fprintf(output,
"SYMBOL_MATH ");
break;
1058 case XML_REGEXP_SYMBOL_CURRENCY:
1059 fprintf(output,
"SYMBOL_CURRENCY ");
break;
1060 case XML_REGEXP_SYMBOL_MODIFIER:
1061 fprintf(output,
"SYMBOL_MODIFIER ");
break;
1062 case XML_REGEXP_SYMBOL_OTHERS:
1063 fprintf(output,
"SYMBOL_OTHERS ");
break;
1064 case XML_REGEXP_OTHER:
1065 fprintf(output,
"OTHER ");
break;
1066 case XML_REGEXP_OTHER_CONTROL:
1067 fprintf(output,
"OTHER_CONTROL ");
break;
1068 case XML_REGEXP_OTHER_FORMAT:
1069 fprintf(output,
"OTHER_FORMAT ");
break;
1070 case XML_REGEXP_OTHER_PRIVATE:
1071 fprintf(output,
"OTHER_PRIVATE ");
break;
1072 case XML_REGEXP_OTHER_NA:
1073 fprintf(output,
"OTHER_NA ");
break;
1074 case XML_REGEXP_BLOCK_NAME:
1075 fprintf(output,
"BLOCK ");
break;
1080xmlRegPrintQuantType(
FILE *output, xmlRegQuantType
type) {
1082 case XML_REGEXP_QUANT_EPSILON:
1083 fprintf(output,
"epsilon ");
break;
1084 case XML_REGEXP_QUANT_ONCE:
1085 fprintf(output,
"once ");
break;
1086 case XML_REGEXP_QUANT_OPT:
1088 case XML_REGEXP_QUANT_MULT:
1090 case XML_REGEXP_QUANT_PLUS:
1092 case XML_REGEXP_QUANT_RANGE:
1093 fprintf(output,
"range ");
break;
1094 case XML_REGEXP_QUANT_ONCEONLY:
1095 fprintf(output,
"onceonly ");
break;
1096 case XML_REGEXP_QUANT_ALL:
1097 fprintf(output,
"all ");
break;
1101xmlRegPrintRange(
FILE *output, xmlRegRangePtr
range) {
1105 xmlRegPrintAtomType(output,
range->type);
1110xmlRegPrintAtom(
FILE *output, xmlRegAtomPtr atom) {
1118 xmlRegPrintAtomType(output, atom->type);
1119 xmlRegPrintQuantType(output, atom->quant);
1120 if (atom->quant == XML_REGEXP_QUANT_RANGE)
1121 fprintf(output,
"%d-%d ", atom->min, atom->max);
1122 if (atom->type == XML_REGEXP_STRING)
1123 fprintf(output,
"'%s' ", (
char *) atom->valuep);
1124 if (atom->type == XML_REGEXP_CHARVAL)
1125 fprintf(output,
"char %c\n", atom->codepoint);
1126 else if (atom->type == XML_REGEXP_RANGES) {
1128 fprintf(output,
"%d entries\n", atom->nbRanges);
1129 for (
i = 0;
i < atom->nbRanges;
i++)
1130 xmlRegPrintRange(output, atom->ranges[
i]);
1131 }
else if (atom->type == XML_REGEXP_SUBREG) {
1132 fprintf(output,
"start %d end %d\n", atom->start->no, atom->stop->no);
1139xmlRegPrintTrans(
FILE *output, xmlRegTransPtr trans) {
1141 if (trans ==
NULL) {
1145 if (trans->to < 0) {
1149 if (trans->nd != 0) {
1151 fprintf(output,
"last not determinist, ");
1153 fprintf(output,
"not determinist, ");
1155 if (trans->counter >= 0) {
1156 fprintf(output,
"counted %d, ", trans->counter);
1158 if (trans->count == REGEXP_ALL_COUNTER) {
1159 fprintf(output,
"all transition, ");
1160 }
else if (trans->count >= 0) {
1161 fprintf(output,
"count based %d, ", trans->count);
1163 if (trans->atom ==
NULL) {
1164 fprintf(output,
"epsilon to %d\n", trans->to);
1167 if (trans->atom->type == XML_REGEXP_CHARVAL)
1168 fprintf(output,
"char %c ", trans->atom->codepoint);
1169 fprintf(output,
"atom %d, to %d\n", trans->atom->no, trans->to);
1173xmlRegPrintState(
FILE *output, xmlRegStatePtr
state) {
1181 if (
state->type == XML_REGEXP_START_STATE)
1183 if (
state->type == XML_REGEXP_FINAL_STATE)
1187 for (
i = 0;
i <
state->nbTrans;
i++) {
1188 xmlRegPrintTrans(output, &(
state->trans[
i]));
1192#ifdef DEBUG_REGEXP_GRAPH
1194xmlRegPrintCtxt(
FILE *output, xmlRegParserCtxtPtr ctxt) {
1202 fprintf(output,
"'%s' ", ctxt->string);
1208 fprintf(output,
"%d atoms:\n", ctxt->nbAtoms);
1209 for (
i = 0;
i < ctxt->nbAtoms;
i++) {
1211 xmlRegPrintAtom(output, ctxt->atoms[
i]);
1213 if (ctxt->atom !=
NULL) {
1214 fprintf(output,
"current atom:\n");
1215 xmlRegPrintAtom(output, ctxt->atom);
1217 fprintf(output,
"%d states:", ctxt->nbStates);
1218 if (ctxt->start !=
NULL)
1219 fprintf(output,
" start: %d", ctxt->start->no);
1220 if (ctxt->end !=
NULL)
1221 fprintf(output,
" end: %d", ctxt->end->no);
1223 for (
i = 0;
i < ctxt->nbStates;
i++) {
1224 xmlRegPrintState(output, ctxt->states[
i]);
1226 fprintf(output,
"%d counters:\n", ctxt->nbCounters);
1227 for (
i = 0;
i < ctxt->nbCounters;
i++) {
1228 fprintf(output,
" %d: min %d max %d\n",
i, ctxt->counters[
i].min,
1229 ctxt->counters[
i].max);
1241xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1244 xmlRegRangePtr
range;
1247 ERROR(
"add range: atom is NULL");
1250 if (atom->type != XML_REGEXP_RANGES) {
1251 ERROR(
"add range: atom is not ranges");
1254 if (atom->maxRanges == 0) {
1255 atom->maxRanges = 4;
1256 atom->ranges = (xmlRegRangePtr *)
xmlMalloc(atom->maxRanges *
1257 sizeof(xmlRegRangePtr));
1258 if (atom->ranges ==
NULL) {
1259 xmlRegexpErrMemory(ctxt,
"adding ranges");
1260 atom->maxRanges = 0;
1263 }
else if (atom->nbRanges >= atom->maxRanges) {
1264 xmlRegRangePtr *tmp;
1265 atom->maxRanges *= 2;
1266 tmp = (xmlRegRangePtr *)
xmlRealloc(atom->ranges, atom->maxRanges *
1267 sizeof(xmlRegRangePtr));
1269 xmlRegexpErrMemory(ctxt,
"adding ranges");
1270 atom->maxRanges /= 2;
1278 range->blockName = blockName;
1279 atom->ranges[atom->nbRanges++] =
range;
1284xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1285 if (ctxt->maxCounters == 0) {
1286 ctxt->maxCounters = 4;
1287 ctxt->counters = (xmlRegCounter *)
xmlMalloc(ctxt->maxCounters *
1288 sizeof(xmlRegCounter));
1289 if (ctxt->counters ==
NULL) {
1290 xmlRegexpErrMemory(ctxt,
"allocating counter");
1291 ctxt->maxCounters = 0;
1294 }
else if (ctxt->nbCounters >= ctxt->maxCounters) {
1296 ctxt->maxCounters *= 2;
1297 tmp = (xmlRegCounter *)
xmlRealloc(ctxt->counters, ctxt->maxCounters *
1298 sizeof(xmlRegCounter));
1300 xmlRegexpErrMemory(ctxt,
"allocating counter");
1301 ctxt->maxCounters /= 2;
1304 ctxt->counters = tmp;
1306 ctxt->counters[ctxt->nbCounters].min = -1;
1307 ctxt->counters[ctxt->nbCounters].max = -1;
1308 return(ctxt->nbCounters++);
1312xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1314 ERROR(
"atom push: atom is NULL");
1317 if (ctxt->maxAtoms == 0) {
1319 ctxt->atoms = (xmlRegAtomPtr *)
xmlMalloc(ctxt->maxAtoms *
1320 sizeof(xmlRegAtomPtr));
1321 if (ctxt->atoms ==
NULL) {
1322 xmlRegexpErrMemory(ctxt,
"pushing atom");
1326 }
else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1328 ctxt->maxAtoms *= 2;
1329 tmp = (xmlRegAtomPtr *)
xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1330 sizeof(xmlRegAtomPtr));
1332 xmlRegexpErrMemory(ctxt,
"allocating counter");
1333 ctxt->maxAtoms /= 2;
1338 atom->no = ctxt->nbAtoms;
1339 ctxt->atoms[ctxt->nbAtoms++] = atom;
1344xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
target,
1346 if (
target->maxTransTo == 0) {
1351 xmlRegexpErrMemory(ctxt,
"adding transition");
1361 xmlRegexpErrMemory(ctxt,
"adding transition");
1372xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state,
1373 xmlRegAtomPtr atom, xmlRegStatePtr
target,
1379 ERROR(
"add state: state is NULL");
1383 ERROR(
"add state: target is NULL");
1392 for (nrtrans =
state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1393 xmlRegTransPtr trans = &(
state->trans[nrtrans]);
1394 if ((trans->atom == atom) &&
1395 (trans->to ==
target->no) &&
1396 (trans->counter ==
counter) &&
1397 (trans->count ==
count)) {
1398#ifdef DEBUG_REGEXP_GRAPH
1399 printf(
"Ignoring duplicate transition from %d to %d\n",
1406 if (
state->maxTrans == 0) {
1407 state->maxTrans = 8;
1409 sizeof(xmlRegTrans));
1411 xmlRegexpErrMemory(ctxt,
"adding transition");
1412 state->maxTrans = 0;
1415 }
else if (
state->nbTrans >=
state->maxTrans) {
1417 state->maxTrans *= 2;
1419 sizeof(xmlRegTrans));
1421 xmlRegexpErrMemory(ctxt,
"adding transition");
1422 state->maxTrans /= 2;
1427#ifdef DEBUG_REGEXP_GRAPH
1429 if (
count == REGEXP_ALL_COUNTER)
1430 printf(
"all transition\n");
1431 else if (
count >= 0)
1435 else if (atom ==
NULL)
1436 printf(
"epsilon transition\n");
1437 else if (atom !=
NULL)
1438 xmlRegPrintAtom(
stdout, atom);
1451xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state) {
1453 if (ctxt->maxStates == 0) {
1454 ctxt->maxStates = 4;
1455 ctxt->states = (xmlRegStatePtr *)
xmlMalloc(ctxt->maxStates *
1456 sizeof(xmlRegStatePtr));
1457 if (ctxt->states ==
NULL) {
1458 xmlRegexpErrMemory(ctxt,
"adding state");
1459 ctxt->maxStates = 0;
1462 }
else if (ctxt->nbStates >= ctxt->maxStates) {
1463 xmlRegStatePtr *tmp;
1464 ctxt->maxStates *= 2;
1465 tmp = (xmlRegStatePtr *)
xmlRealloc(ctxt->states, ctxt->maxStates *
1466 sizeof(xmlRegStatePtr));
1468 xmlRegexpErrMemory(ctxt,
"adding state");
1469 ctxt->maxStates /= 2;
1474 state->no = ctxt->nbStates;
1475 ctxt->states[ctxt->nbStates++] =
state;
1488xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1489 xmlRegStatePtr
from, xmlRegStatePtr to,
1492 to = xmlRegNewState(ctxt);
1493 xmlRegStatePush(ctxt, to);
1497 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1499 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, REGEXP_ALL_COUNTER);
1510xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1511 xmlRegStatePtr
from, xmlRegStatePtr to) {
1513 to = xmlRegNewState(ctxt);
1514 xmlRegStatePush(ctxt, to);
1517 xmlRegStateAddTrans(ctxt,
from,
NULL, to, -1, -1);
1529xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1530 xmlRegStatePtr
from, xmlRegStatePtr to,
int counter) {
1532 to = xmlRegNewState(ctxt);
1533 xmlRegStatePush(ctxt, to);
1548xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1549 xmlRegStatePtr
from, xmlRegStatePtr to,
int counter) {
1551 to = xmlRegNewState(ctxt);
1552 xmlRegStatePush(ctxt, to);
1568xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
from,
1569 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1574 ERROR(
"generate transition: atom == NULL");
1577 if (atom->type == XML_REGEXP_SUBREG) {
1582 if (xmlRegAtomPush(ctxt, atom) < 0) {
1585 if ((to !=
NULL) && (atom->stop != to) &&
1586 (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1590 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1592 }
else if ((to ==
NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1593 (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1594 to = xmlRegNewState(ctxt);
1595 xmlRegStatePush(ctxt, to);
1597 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1600 switch (atom->quant) {
1601 case XML_REGEXP_QUANT_OPT:
1602 atom->quant = XML_REGEXP_QUANT_ONCE;
1609 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1610 xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1613 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1616 case XML_REGEXP_QUANT_MULT:
1617 atom->quant = XML_REGEXP_QUANT_ONCE;
1618 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1619 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1621 case XML_REGEXP_QUANT_PLUS:
1622 atom->quant = XML_REGEXP_QUANT_ONCE;
1623 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1625 case XML_REGEXP_QUANT_RANGE: {
1627 xmlRegStatePtr inter, newstate;
1635 newstate = xmlRegNewState(ctxt);
1636 xmlRegStatePush(ctxt, newstate);
1645 if ((atom->min == 0) && (atom->start0 ==
NULL)) {
1656 copy = xmlRegCopyAtom(ctxt, atom);
1659 copy->quant = XML_REGEXP_QUANT_ONCE;
1663 if (xmlFAGenerateTransitions(ctxt, atom->start,
NULL,
copy)
1666 inter = ctxt->state;
1667 counter = xmlRegGetCounter(ctxt);
1668 ctxt->counters[
counter].min = atom->min - 1;
1669 ctxt->counters[
counter].max = atom->max - 1;
1671 xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1674 xmlFAGenerateCountedTransition(ctxt, inter,
1677 xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1685 counter = xmlRegGetCounter(ctxt);
1686 ctxt->counters[
counter].min = atom->min - 1;
1687 ctxt->counters[
counter].max = atom->max - 1;
1689 xmlFAGenerateCountedTransition(ctxt, atom->stop,
1692 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1696 xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1702 atom->quant = XML_REGEXP_QUANT_ONCE;
1703 ctxt->state = newstate;
1710 if ((atom->min == 0) && (atom->max == 0) &&
1711 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1716 to = xmlRegNewState(ctxt);
1718 xmlRegStatePush(ctxt, to);
1723 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1725 xmlRegFreeAtom(atom);
1729 to = xmlRegNewState(ctxt);
1731 xmlRegStatePush(ctxt, to);
1737 if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1738 (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1746 tmp = xmlRegNewState(ctxt);
1748 xmlRegStatePush(ctxt, tmp);
1752 xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1755 if (xmlRegAtomPush(ctxt, atom) < 0) {
1758 if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1759 (atom->min == 0) && (atom->max > 0)) {
1763 atom->quant = XML_REGEXP_QUANT_OPT;
1765 xmlRegStateAddTrans(ctxt,
from, atom, to, -1, -1);
1767 switch (atom->quant) {
1768 case XML_REGEXP_QUANT_OPT:
1769 atom->quant = XML_REGEXP_QUANT_ONCE;
1770 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1772 case XML_REGEXP_QUANT_MULT:
1773 atom->quant = XML_REGEXP_QUANT_ONCE;
1774 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1775 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1777 case XML_REGEXP_QUANT_PLUS:
1778 atom->quant = XML_REGEXP_QUANT_ONCE;
1779 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1781 case XML_REGEXP_QUANT_RANGE:
1783 xmlFAGenerateEpsilonTransition(ctxt,
from, to);
1800xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt,
int fromnr,
1803 xmlRegStatePtr
from;
1806#ifdef DEBUG_REGEXP_GRAPH
1807 printf(
"xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1809 from = ctxt->states[fromnr];
1812 to = ctxt->states[tonr];
1815 if ((to->mark == XML_REGEXP_MARK_START) ||
1816 (to->mark == XML_REGEXP_MARK_VISITED))
1819 to->mark = XML_REGEXP_MARK_VISITED;
1820 if (to->type == XML_REGEXP_FINAL_STATE) {
1821#ifdef DEBUG_REGEXP_GRAPH
1822 printf(
"State %d is final, so %d becomes final\n", tonr, fromnr);
1824 from->type = XML_REGEXP_FINAL_STATE;
1826 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1827 if (to->trans[transnr].to < 0)
1829 if (to->trans[transnr].atom ==
NULL) {
1834 if (to->trans[transnr].to != fromnr) {
1835 if (to->trans[transnr].count >= 0) {
1836 int newto = to->trans[transnr].to;
1838 xmlRegStateAddTrans(ctxt,
from,
NULL,
1839 ctxt->states[newto],
1840 -1, to->trans[transnr].count);
1842#ifdef DEBUG_REGEXP_GRAPH
1843 printf(
"Found epsilon trans %d from %d to %d\n",
1844 transnr, tonr, to->trans[transnr].to);
1846 if (to->trans[transnr].counter >= 0) {
1847 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1848 to->trans[transnr].to,
1849 to->trans[transnr].counter);
1851 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1852 to->trans[transnr].to,
1858 int newto = to->trans[transnr].to;
1860 if (to->trans[transnr].counter >= 0) {
1861 xmlRegStateAddTrans(ctxt,
from, to->trans[transnr].atom,
1862 ctxt->states[newto],
1863 to->trans[transnr].counter, -1);
1865 xmlRegStateAddTrans(ctxt,
from, to->trans[transnr].atom,
1866 ctxt->states[newto],
counter, -1);
1870 to->mark = XML_REGEXP_MARK_NORMAL;
1895xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1896 int statenr,
i,
j, newto;
1897 xmlRegStatePtr
state, tmp;
1899 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1900 state = ctxt->states[statenr];
1903 if (
state->nbTrans != 1)
1905 if (
state->type == XML_REGEXP_UNREACH_STATE ||
1906 state->type == XML_REGEXP_FINAL_STATE)
1910 (
state->trans[0].to >= 0) &&
1911 (
state->trans[0].to != statenr) &&
1912 (
state->trans[0].counter < 0) &&
1913 (
state->trans[0].count < 0)) {
1914 newto =
state->trans[0].to;
1916 if (
state->type == XML_REGEXP_START_STATE) {
1917#ifdef DEBUG_REGEXP_GRAPH
1918 printf(
"Found simple epsilon trans from start %d to %d\n",
1922#ifdef DEBUG_REGEXP_GRAPH
1923 printf(
"Found simple epsilon trans from %d to %d\n",
1926 for (
i = 0;
i <
state->nbTransTo;
i++) {
1927 tmp = ctxt->states[
state->transTo[
i]];
1928 for (
j = 0;
j < tmp->nbTrans;
j++) {
1929 if (tmp->trans[
j].to == statenr) {
1930#ifdef DEBUG_REGEXP_GRAPH
1931 printf(
"Changed transition %d on %d to go to %d\n",
1934 tmp->trans[
j].to = -1;
1935 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[
j].atom,
1936 ctxt->states[newto],
1937 tmp->trans[
j].counter,
1938 tmp->trans[
j].count);
1942 if (
state->type == XML_REGEXP_FINAL_STATE)
1943 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1947 state->type = XML_REGEXP_UNREACH_STATE;
1960xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1961 int statenr, transnr;
1962 xmlRegStatePtr
state;
1965 if (ctxt->states ==
NULL)
return;
1971 xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1972 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1973 state = ctxt->states[statenr];
1974 if ((
state !=
NULL) && (
state->type == XML_REGEXP_UNREACH_STATE)) {
1975#ifdef DEBUG_REGEXP_GRAPH
1976 printf(
"Removed unreachable state %d\n", statenr);
1978 xmlRegFreeState(
state);
1979 ctxt->states[statenr] =
NULL;
1993 for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1994 state = ctxt->states[statenr];
1997 if ((
state->nbTrans == 0) &&
1998 (
state->type != XML_REGEXP_FINAL_STATE)) {
1999 state->type = XML_REGEXP_SINK_STATE;
2001 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2002 if ((
state->trans[transnr].atom ==
NULL) &&
2003 (
state->trans[transnr].to >= 0)) {
2004 if (
state->trans[transnr].to == statenr) {
2005 state->trans[transnr].to = -1;
2006#ifdef DEBUG_REGEXP_GRAPH
2007 printf(
"Removed loopback epsilon trans %d on %d\n",
2010 }
else if (
state->trans[transnr].count < 0) {
2011 int newto =
state->trans[transnr].to;
2013#ifdef DEBUG_REGEXP_GRAPH
2014 printf(
"Found epsilon trans %d from %d to %d\n",
2015 transnr, statenr, newto);
2018 state->trans[transnr].to = -2;
2019 state->mark = XML_REGEXP_MARK_START;
2020 xmlFAReduceEpsilonTransitions(ctxt, statenr,
2021 newto,
state->trans[transnr].counter);
2022 state->mark = XML_REGEXP_MARK_NORMAL;
2023#ifdef DEBUG_REGEXP_GRAPH
2025 printf(
"Found counted transition %d on %d\n",
2036 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2037 state = ctxt->states[statenr];
2040 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2041 xmlRegTransPtr trans = &(
state->trans[transnr]);
2042 if ((trans->atom ==
NULL) &&
2043 (trans->count < 0) &&
2054 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2055 state = ctxt->states[statenr];
2057 state->reached = XML_REGEXP_MARK_NORMAL;
2059 state = ctxt->states[0];
2061 state->reached = XML_REGEXP_MARK_START;
2064 state->reached = XML_REGEXP_MARK_VISITED;
2068 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2069 if ((
state->trans[transnr].to >= 0) &&
2070 ((
state->trans[transnr].atom !=
NULL) ||
2071 (
state->trans[transnr].count >= 0))) {
2072 int newto =
state->trans[transnr].to;
2074 if (ctxt->states[newto] ==
NULL)
2076 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2077 ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2078 target = ctxt->states[newto];
2087 for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2088 state = ctxt->states[statenr];
2090 XML_REGEXP_MARK_START)) {
2098 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2099 state = ctxt->states[statenr];
2100 if ((
state !=
NULL) && (
state->reached == XML_REGEXP_MARK_NORMAL)) {
2101#ifdef DEBUG_REGEXP_GRAPH
2102 printf(
"Removed unreachable state %d\n", statenr);
2104 xmlRegFreeState(
state);
2105 ctxt->states[statenr] =
NULL;
2112xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2115 if ((range1->type == XML_REGEXP_RANGES) ||
2116 (range2->type == XML_REGEXP_RANGES) ||
2117 (range2->type == XML_REGEXP_SUBREG) ||
2118 (range1->type == XML_REGEXP_SUBREG) ||
2119 (range1->type == XML_REGEXP_STRING) ||
2120 (range2->type == XML_REGEXP_STRING))
2124 if (range1->type > range2->type) {
2131 if ((range1->type == XML_REGEXP_ANYCHAR) ||
2132 (range2->type == XML_REGEXP_ANYCHAR)) {
2134 }
else if ((range1->type == XML_REGEXP_EPSILON) ||
2135 (range2->type == XML_REGEXP_EPSILON)) {
2137 }
else if (range1->type == range2->type) {
2138 if (range1->type != XML_REGEXP_CHARVAL)
2140 else if ((range1->end < range2->start) ||
2141 (range2->end < range1->start))
2145 }
else if (range1->type == XML_REGEXP_CHARVAL) {
2155 if (((range1->neg == 0) && (range2->neg != 0)) ||
2156 ((range1->neg != 0) && (range2->neg == 0)))
2159 for (codepoint = range1->start;codepoint <= range1->
end ;codepoint++) {
2160 ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2161 0, range2->start, range2->end,
2165 if (((neg == 1) && (
ret == 0)) ||
2166 ((neg == 0) && (
ret == 1)))
2170 }
else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2171 (range2->type == XML_REGEXP_BLOCK_NAME)) {
2172 if (range1->type == range2->type) {
2183 }
else if ((range1->type < XML_REGEXP_LETTER) ||
2184 (range2->type < XML_REGEXP_LETTER)) {
2185 if ((range1->type == XML_REGEXP_ANYSPACE) &&
2186 (range2->type == XML_REGEXP_NOTSPACE))
2188 else if ((range1->type == XML_REGEXP_INITNAME) &&
2189 (range2->type == XML_REGEXP_NOTINITNAME))
2191 else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2192 (range2->type == XML_REGEXP_NOTNAMECHAR))
2194 else if ((range1->type == XML_REGEXP_DECIMAL) &&
2195 (range2->type == XML_REGEXP_NOTDECIMAL))
2197 else if ((range1->type == XML_REGEXP_REALCHAR) &&
2198 (range2->type == XML_REGEXP_NOTREALCHAR))
2207 switch (range1->type) {
2208 case XML_REGEXP_LETTER:
2210 if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2211 (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2212 (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2213 (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2214 (range2->type == XML_REGEXP_LETTER_OTHERS))
2217 case XML_REGEXP_MARK:
2218 if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2219 (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2220 (range2->type == XML_REGEXP_MARK_ENCLOSING))
2223 case XML_REGEXP_NUMBER:
2224 if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2225 (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2226 (range2->type == XML_REGEXP_NUMBER_OTHERS))
2229 case XML_REGEXP_PUNCT:
2230 if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2231 (range2->type == XML_REGEXP_PUNCT_DASH) ||
2232 (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2233 (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2234 (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2235 (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2236 (range2->type == XML_REGEXP_PUNCT_OTHERS))
2239 case XML_REGEXP_SEPAR:
2240 if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2241 (range2->type == XML_REGEXP_SEPAR_LINE) ||
2242 (range2->type == XML_REGEXP_SEPAR_PARA))
2245 case XML_REGEXP_SYMBOL:
2246 if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2247 (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2248 (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2249 (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2252 case XML_REGEXP_OTHER:
2253 if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2254 (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2255 (range2->type == XML_REGEXP_OTHER_PRIVATE))
2259 if ((range2->type >= XML_REGEXP_LETTER) &&
2260 (range2->type < XML_REGEXP_BLOCK_NAME))
2268 if (((range1->neg == 0) && (range2->neg != 0)) ||
2269 ((range1->neg != 0) && (range2->neg == 0)))
2285xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2286 if ((type1 == XML_REGEXP_EPSILON) ||
2287 (type1 == XML_REGEXP_CHARVAL) ||
2288 (type1 == XML_REGEXP_RANGES) ||
2289 (type1 == XML_REGEXP_SUBREG) ||
2290 (type1 == XML_REGEXP_STRING) ||
2291 (type1 == XML_REGEXP_ANYCHAR))
2293 if ((type2 == XML_REGEXP_EPSILON) ||
2294 (type2 == XML_REGEXP_CHARVAL) ||
2295 (type2 == XML_REGEXP_RANGES) ||
2296 (type2 == XML_REGEXP_SUBREG) ||
2297 (type2 == XML_REGEXP_STRING) ||
2298 (type2 == XML_REGEXP_ANYCHAR))
2301 if (type1 == type2)
return(1);
2304 if (type1 > type2) {
2305 xmlRegAtomType tmp = type1;
2310 case XML_REGEXP_ANYSPACE:
2312 if ((type2 == XML_REGEXP_NOTSPACE) ||
2313 ((type2 >= XML_REGEXP_LETTER) &&
2314 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2315 ((type2 >= XML_REGEXP_NUMBER) &&
2316 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2317 ((type2 >= XML_REGEXP_MARK) &&
2318 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2319 ((type2 >= XML_REGEXP_PUNCT) &&
2320 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2321 ((type2 >= XML_REGEXP_SYMBOL) &&
2322 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2325 case XML_REGEXP_NOTSPACE:
2327 case XML_REGEXP_INITNAME:
2329 if ((type2 == XML_REGEXP_NOTINITNAME) ||
2330 ((type2 >= XML_REGEXP_NUMBER) &&
2331 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2332 ((type2 >= XML_REGEXP_MARK) &&
2333 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2334 ((type2 >= XML_REGEXP_SEPAR) &&
2335 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2336 ((type2 >= XML_REGEXP_PUNCT) &&
2337 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2338 ((type2 >= XML_REGEXP_SYMBOL) &&
2339 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2340 ((type2 >= XML_REGEXP_OTHER) &&
2341 (type2 <= XML_REGEXP_OTHER_NA))
2344 case XML_REGEXP_NOTINITNAME:
2346 case XML_REGEXP_NAMECHAR:
2348 if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2349 ((type2 >= XML_REGEXP_MARK) &&
2350 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2351 ((type2 >= XML_REGEXP_PUNCT) &&
2352 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2353 ((type2 >= XML_REGEXP_SEPAR) &&
2354 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2355 ((type2 >= XML_REGEXP_SYMBOL) &&
2356 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2357 ((type2 >= XML_REGEXP_OTHER) &&
2358 (type2 <= XML_REGEXP_OTHER_NA))
2361 case XML_REGEXP_NOTNAMECHAR:
2363 case XML_REGEXP_DECIMAL:
2365 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2366 (type2 == XML_REGEXP_REALCHAR) ||
2367 ((type2 >= XML_REGEXP_LETTER) &&
2368 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2369 ((type2 >= XML_REGEXP_MARK) &&
2370 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2371 ((type2 >= XML_REGEXP_PUNCT) &&
2372 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2373 ((type2 >= XML_REGEXP_SEPAR) &&
2374 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2375 ((type2 >= XML_REGEXP_SYMBOL) &&
2376 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2377 ((type2 >= XML_REGEXP_OTHER) &&
2378 (type2 <= XML_REGEXP_OTHER_NA))
2381 case XML_REGEXP_NOTDECIMAL:
2383 case XML_REGEXP_REALCHAR:
2385 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2386 ((type2 >= XML_REGEXP_MARK) &&
2387 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2388 ((type2 >= XML_REGEXP_PUNCT) &&
2389 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2390 ((type2 >= XML_REGEXP_SEPAR) &&
2391 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2392 ((type2 >= XML_REGEXP_SYMBOL) &&
2393 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2394 ((type2 >= XML_REGEXP_OTHER) &&
2395 (type2 <= XML_REGEXP_OTHER_NA))
2398 case XML_REGEXP_NOTREALCHAR:
2405 case XML_REGEXP_LETTER:
2406 if (type2 <= XML_REGEXP_LETTER_OTHERS)
2409 case XML_REGEXP_LETTER_UPPERCASE:
2410 case XML_REGEXP_LETTER_LOWERCASE:
2411 case XML_REGEXP_LETTER_TITLECASE:
2412 case XML_REGEXP_LETTER_MODIFIER:
2413 case XML_REGEXP_LETTER_OTHERS:
2415 case XML_REGEXP_MARK:
2416 if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2419 case XML_REGEXP_MARK_NONSPACING:
2420 case XML_REGEXP_MARK_SPACECOMBINING:
2421 case XML_REGEXP_MARK_ENCLOSING:
2423 case XML_REGEXP_NUMBER:
2424 if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2427 case XML_REGEXP_NUMBER_DECIMAL:
2428 case XML_REGEXP_NUMBER_LETTER:
2429 case XML_REGEXP_NUMBER_OTHERS:
2431 case XML_REGEXP_PUNCT:
2432 if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2435 case XML_REGEXP_PUNCT_CONNECTOR:
2436 case XML_REGEXP_PUNCT_DASH:
2437 case XML_REGEXP_PUNCT_OPEN:
2438 case XML_REGEXP_PUNCT_CLOSE:
2439 case XML_REGEXP_PUNCT_INITQUOTE:
2440 case XML_REGEXP_PUNCT_FINQUOTE:
2441 case XML_REGEXP_PUNCT_OTHERS:
2443 case XML_REGEXP_SEPAR:
2444 if (type2 <= XML_REGEXP_SEPAR_PARA)
2447 case XML_REGEXP_SEPAR_SPACE:
2448 case XML_REGEXP_SEPAR_LINE:
2449 case XML_REGEXP_SEPAR_PARA:
2451 case XML_REGEXP_SYMBOL:
2452 if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2455 case XML_REGEXP_SYMBOL_MATH:
2456 case XML_REGEXP_SYMBOL_CURRENCY:
2457 case XML_REGEXP_SYMBOL_MODIFIER:
2458 case XML_REGEXP_SYMBOL_OTHERS:
2460 case XML_REGEXP_OTHER:
2461 if (type2 <= XML_REGEXP_OTHER_NA)
2464 case XML_REGEXP_OTHER_CONTROL:
2465 case XML_REGEXP_OTHER_FORMAT:
2466 case XML_REGEXP_OTHER_PRIVATE:
2467 case XML_REGEXP_OTHER_NA:
2487xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2,
int deep) {
2492 if ((atom1 ==
NULL) || (atom2 ==
NULL))
2495 if (atom1->type != atom2->type)
2497 switch (atom1->type) {
2498 case XML_REGEXP_EPSILON:
2501 case XML_REGEXP_STRING:
2503 ret = (atom1->valuep == atom2->valuep);
2508 case XML_REGEXP_CHARVAL:
2509 ret = (atom1->codepoint == atom2->codepoint);
2511 case XML_REGEXP_RANGES:
2532xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2,
int deep) {
2537 if ((atom1 ==
NULL) || (atom2 ==
NULL))
2540 if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2541 (atom2->type == XML_REGEXP_ANYCHAR))
2544 if (atom1->type > atom2->type) {
2550 if (atom1->type != atom2->type) {
2551 ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2556 switch (atom1->type) {
2557 case XML_REGEXP_STRING:
2559 ret = (atom1->valuep != atom2->valuep);
2567 if (compound1 != compound2)
2570 ret = xmlRegStrEqualWildcard(val1, val2);
2573 case XML_REGEXP_EPSILON:
2574 goto not_determinist;
2575 case XML_REGEXP_CHARVAL:
2576 if (atom2->type == XML_REGEXP_CHARVAL) {
2577 ret = (atom1->codepoint == atom2->codepoint);
2579 ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2584 case XML_REGEXP_RANGES:
2585 if (atom2->type == XML_REGEXP_RANGES) {
2587 xmlRegRangePtr
r1,
r2;
2592 for (
i = 0;
i < atom1->nbRanges;
i++) {
2593 for (
j = 0;
j < atom2->nbRanges;
j++) {
2594 r1 = atom1->ranges[
i];
2595 r2 = atom2->ranges[
j];
2596 res = xmlFACompareRanges(
r1,
r2);
2607 goto not_determinist;
2610 if (atom1->neg != atom2->neg) {
2628xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state,
2629 int to, xmlRegAtomPtr atom) {
2632 int transnr, nbTrans;
2638 if (
state->markd == XML_REGEXP_MARK_VISITED)
2641 if (ctxt->flags & AM_AUTOMATA_RNG)
2648 nbTrans =
state->nbTrans;
2649 for (transnr = 0;transnr < nbTrans;transnr++) {
2650 t1 = &(
state->trans[transnr]);
2654 if (t1->atom ==
NULL) {
2657 state->markd = XML_REGEXP_MARK_VISITED;
2658 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2668 if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2684xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr
state) {
2685 int transnr, nbTrans;
2689 if (
state->markd != XML_REGEXP_MARK_VISITED)
2693 nbTrans =
state->nbTrans;
2694 for (transnr = 0; transnr < nbTrans; transnr++) {
2695 xmlRegTransPtr t1 = &
state->trans[transnr];
2696 if ((t1->atom ==
NULL) && (t1->to >= 0))
2697 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2710xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2711 int statenr, transnr;
2712 xmlRegStatePtr
state;
2713 xmlRegTransPtr t1, t2,
last;
2718#ifdef DEBUG_REGEXP_GRAPH
2719 printf(
"xmlFAComputesDeterminism\n");
2720 xmlRegPrintCtxt(
stdout, ctxt);
2722 if (ctxt->determinist != -1)
2723 return(ctxt->determinist);
2725 if (ctxt->flags & AM_AUTOMATA_RNG)
2731 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2732 state = ctxt->states[statenr];
2735 if (
state->nbTrans < 2)
2737 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2738 t1 = &(
state->trans[transnr]);
2743 if (t1->atom ==
NULL) {
2749 for (
i = 0;
i < transnr;
i++) {
2753 if (t2->atom !=
NULL) {
2754 if (t1->to == t2->to) {
2759 if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2760 (t1->counter == t2->counter) &&
2761 (t1->count == t2->count))
2773 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2774 state = ctxt->states[statenr];
2777 if (
state->nbTrans < 2)
2780 for (transnr = 0;transnr <
state->nbTrans;transnr++) {
2781 t1 = &(
state->trans[transnr]);
2786 if (t1->atom ==
NULL) {
2791 for (
i = 0;
i < transnr;
i++) {
2795 if (t2->atom !=
NULL) {
2800 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2807 }
else if (t1->to != -1) {
2812 ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2814 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2848 ctxt->determinist =
ret;
2859xmlRegCheckCharacterRange(xmlRegAtomType
type,
int codepoint,
int neg,
2864 case XML_REGEXP_STRING:
2865 case XML_REGEXP_SUBREG:
2866 case XML_REGEXP_RANGES:
2867 case XML_REGEXP_EPSILON:
2869 case XML_REGEXP_ANYCHAR:
2870 ret = ((codepoint !=
'\n') && (codepoint !=
'\r'));
2872 case XML_REGEXP_CHARVAL:
2873 ret = ((codepoint >=
start) && (codepoint <=
end));
2875 case XML_REGEXP_NOTSPACE:
2878 case XML_REGEXP_ANYSPACE:
2879 ret = ((codepoint ==
'\n') || (codepoint ==
'\r') ||
2880 (codepoint ==
'\t') || (codepoint ==
' '));
2882 case XML_REGEXP_NOTINITNAME:
2885 case XML_REGEXP_INITNAME:
2887 (codepoint ==
'_') || (codepoint ==
':'));
2889 case XML_REGEXP_NOTNAMECHAR:
2892 case XML_REGEXP_NAMECHAR:
2894 (codepoint ==
'.') || (codepoint ==
'-') ||
2895 (codepoint ==
'_') || (codepoint ==
':') ||
2898 case XML_REGEXP_NOTDECIMAL:
2901 case XML_REGEXP_DECIMAL:
2902 ret = xmlUCSIsCatNd(codepoint);
2904 case XML_REGEXP_REALCHAR:
2907 case XML_REGEXP_NOTREALCHAR:
2908 ret = xmlUCSIsCatP(codepoint);
2910 ret = xmlUCSIsCatZ(codepoint);
2912 ret = xmlUCSIsCatC(codepoint);
2914 case XML_REGEXP_LETTER:
2915 ret = xmlUCSIsCatL(codepoint);
2917 case XML_REGEXP_LETTER_UPPERCASE:
2918 ret = xmlUCSIsCatLu(codepoint);
2920 case XML_REGEXP_LETTER_LOWERCASE:
2921 ret = xmlUCSIsCatLl(codepoint);
2923 case XML_REGEXP_LETTER_TITLECASE:
2924 ret = xmlUCSIsCatLt(codepoint);
2926 case XML_REGEXP_LETTER_MODIFIER:
2927 ret = xmlUCSIsCatLm(codepoint);
2929 case XML_REGEXP_LETTER_OTHERS:
2930 ret = xmlUCSIsCatLo(codepoint);
2932 case XML_REGEXP_MARK:
2933 ret = xmlUCSIsCatM(codepoint);
2935 case XML_REGEXP_MARK_NONSPACING:
2936 ret = xmlUCSIsCatMn(codepoint);
2938 case XML_REGEXP_MARK_SPACECOMBINING:
2939 ret = xmlUCSIsCatMc(codepoint);
2941 case XML_REGEXP_MARK_ENCLOSING:
2942 ret = xmlUCSIsCatMe(codepoint);
2944 case XML_REGEXP_NUMBER:
2945 ret = xmlUCSIsCatN(codepoint);
2947 case XML_REGEXP_NUMBER_DECIMAL:
2948 ret = xmlUCSIsCatNd(codepoint);
2950 case XML_REGEXP_NUMBER_LETTER:
2951 ret = xmlUCSIsCatNl(codepoint);
2953 case XML_REGEXP_NUMBER_OTHERS:
2954 ret = xmlUCSIsCatNo(codepoint);
2956 case XML_REGEXP_PUNCT:
2957 ret = xmlUCSIsCatP(codepoint);
2959 case XML_REGEXP_PUNCT_CONNECTOR:
2960 ret = xmlUCSIsCatPc(codepoint);
2962 case XML_REGEXP_PUNCT_DASH:
2963 ret = xmlUCSIsCatPd(codepoint);
2965 case XML_REGEXP_PUNCT_OPEN:
2966 ret = xmlUCSIsCatPs(codepoint);
2968 case XML_REGEXP_PUNCT_CLOSE:
2969 ret = xmlUCSIsCatPe(codepoint);
2971 case XML_REGEXP_PUNCT_INITQUOTE:
2972 ret = xmlUCSIsCatPi(codepoint);
2974 case XML_REGEXP_PUNCT_FINQUOTE:
2975 ret = xmlUCSIsCatPf(codepoint);
2977 case XML_REGEXP_PUNCT_OTHERS:
2978 ret = xmlUCSIsCatPo(codepoint);
2980 case XML_REGEXP_SEPAR:
2981 ret = xmlUCSIsCatZ(codepoint);
2983 case XML_REGEXP_SEPAR_SPACE:
2984 ret = xmlUCSIsCatZs(codepoint);
2986 case XML_REGEXP_SEPAR_LINE:
2987 ret = xmlUCSIsCatZl(codepoint);
2989 case XML_REGEXP_SEPAR_PARA:
2990 ret = xmlUCSIsCatZp(codepoint);
2992 case XML_REGEXP_SYMBOL:
2993 ret = xmlUCSIsCatS(codepoint);
2995 case XML_REGEXP_SYMBOL_MATH:
2996 ret = xmlUCSIsCatSm(codepoint);
2998 case XML_REGEXP_SYMBOL_CURRENCY:
2999 ret = xmlUCSIsCatSc(codepoint);
3001 case XML_REGEXP_SYMBOL_MODIFIER:
3002 ret = xmlUCSIsCatSk(codepoint);
3004 case XML_REGEXP_SYMBOL_OTHERS:
3005 ret = xmlUCSIsCatSo(codepoint);
3007 case XML_REGEXP_OTHER:
3008 ret = xmlUCSIsCatC(codepoint);
3010 case XML_REGEXP_OTHER_CONTROL:
3011 ret = xmlUCSIsCatCc(codepoint);
3013 case XML_REGEXP_OTHER_FORMAT:
3014 ret = xmlUCSIsCatCf(codepoint);
3016 case XML_REGEXP_OTHER_PRIVATE:
3017 ret = xmlUCSIsCatCo(codepoint);
3019 case XML_REGEXP_OTHER_NA:
3024 case XML_REGEXP_BLOCK_NAME:
3025 ret = xmlUCSIsBlock(codepoint, (
const char *) blockName);
3034xmlRegCheckCharacter(xmlRegAtomPtr atom,
int codepoint) {
3036 xmlRegRangePtr
range;
3041 switch (atom->type) {
3042 case XML_REGEXP_SUBREG:
3043 case XML_REGEXP_EPSILON:
3045 case XML_REGEXP_CHARVAL:
3046 return(codepoint == atom->codepoint);
3047 case XML_REGEXP_RANGES: {
3050 for (
i = 0;
i < atom->nbRanges;
i++) {
3052 if (
range->neg == 2) {
3053 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
3058 }
else if (
range->neg) {
3059 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
3067 ret = xmlRegCheckCharacterRange(
range->type, codepoint,
3076 case XML_REGEXP_STRING:
3077 printf(
"TODO: XML_REGEXP_STRING\n");
3079 case XML_REGEXP_ANYCHAR:
3080 case XML_REGEXP_ANYSPACE:
3081 case XML_REGEXP_NOTSPACE:
3082 case XML_REGEXP_INITNAME:
3083 case XML_REGEXP_NOTINITNAME:
3084 case XML_REGEXP_NAMECHAR:
3085 case XML_REGEXP_NOTNAMECHAR:
3086 case XML_REGEXP_DECIMAL:
3087 case XML_REGEXP_NOTDECIMAL:
3088 case XML_REGEXP_REALCHAR:
3089 case XML_REGEXP_NOTREALCHAR:
3090 case XML_REGEXP_LETTER:
3091 case XML_REGEXP_LETTER_UPPERCASE:
3092 case XML_REGEXP_LETTER_LOWERCASE:
3093 case XML_REGEXP_LETTER_TITLECASE:
3094 case XML_REGEXP_LETTER_MODIFIER:
3095 case XML_REGEXP_LETTER_OTHERS:
3096 case XML_REGEXP_MARK:
3097 case XML_REGEXP_MARK_NONSPACING:
3098 case XML_REGEXP_MARK_SPACECOMBINING:
3099 case XML_REGEXP_MARK_ENCLOSING:
3100 case XML_REGEXP_NUMBER:
3101 case XML_REGEXP_NUMBER_DECIMAL:
3102 case XML_REGEXP_NUMBER_LETTER:
3103 case XML_REGEXP_NUMBER_OTHERS:
3104 case XML_REGEXP_PUNCT:
3105 case XML_REGEXP_PUNCT_CONNECTOR:
3106 case XML_REGEXP_PUNCT_DASH:
3107 case XML_REGEXP_PUNCT_OPEN:
3108 case XML_REGEXP_PUNCT_CLOSE:
3109 case XML_REGEXP_PUNCT_INITQUOTE:
3110 case XML_REGEXP_PUNCT_FINQUOTE:
3111 case XML_REGEXP_PUNCT_OTHERS:
3112 case XML_REGEXP_SEPAR:
3113 case XML_REGEXP_SEPAR_SPACE:
3114 case XML_REGEXP_SEPAR_LINE:
3115 case XML_REGEXP_SEPAR_PARA:
3116 case XML_REGEXP_SYMBOL:
3117 case XML_REGEXP_SYMBOL_MATH:
3118 case XML_REGEXP_SYMBOL_CURRENCY:
3119 case XML_REGEXP_SYMBOL_MODIFIER:
3120 case XML_REGEXP_SYMBOL_OTHERS:
3121 case XML_REGEXP_OTHER:
3122 case XML_REGEXP_OTHER_CONTROL:
3123 case XML_REGEXP_OTHER_FORMAT:
3124 case XML_REGEXP_OTHER_PRIVATE:
3125 case XML_REGEXP_OTHER_NA:
3126 case XML_REGEXP_BLOCK_NAME:
3127 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3128 (
const xmlChar *)atom->valuep);
3142#ifdef DEBUG_REGEXP_EXEC
3144xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3145 printf(
"state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3146 if (exec->inputStack !=
NULL) {
3149 for (
i = 0;(
i < 3) && (i < exec->inputStackNr);
i++)
3150 printf(
"%s ", (
const char *)
3151 exec->inputStack[exec->inputStackNr - (
i + 1)].
value);
3153 printf(
": %s", &(exec->inputString[exec->index]));
3160xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3161#ifdef DEBUG_REGEXP_EXEC
3164 xmlFARegDebugExec(exec);
3168 if (exec->nbPush > MAX_PUSH) {
3174 if (exec->maxRollbacks == 0) {
3175 exec->maxRollbacks = 4;
3176 exec->rollbacks = (xmlRegExecRollback *)
xmlMalloc(exec->maxRollbacks *
3177 sizeof(xmlRegExecRollback));
3178 if (exec->rollbacks ==
NULL) {
3179 xmlRegexpErrMemory(
NULL,
"saving regexp");
3180 exec->maxRollbacks = 0;
3183 memset(exec->rollbacks, 0,
3184 exec->maxRollbacks *
sizeof(xmlRegExecRollback));
3185 }
else if (exec->nbRollbacks >= exec->maxRollbacks) {
3186 xmlRegExecRollback *tmp;
3187 int len = exec->maxRollbacks;
3189 exec->maxRollbacks *= 2;
3190 tmp = (xmlRegExecRollback *)
xmlRealloc(exec->rollbacks,
3191 exec->maxRollbacks *
sizeof(xmlRegExecRollback));
3193 xmlRegexpErrMemory(
NULL,
"saving regexp");
3194 exec->maxRollbacks /= 2;
3197 exec->rollbacks = tmp;
3198 tmp = &exec->rollbacks[
len];
3199 memset(tmp, 0, (exec->maxRollbacks -
len) *
sizeof(xmlRegExecRollback));
3201 exec->rollbacks[exec->nbRollbacks].state = exec->state;
3202 exec->rollbacks[exec->nbRollbacks].index = exec->index;
3203 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3204 if (exec->comp->nbCounters > 0) {
3205 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3206 exec->rollbacks[exec->nbRollbacks].counts = (
int *)
3207 xmlMalloc(exec->comp->nbCounters *
sizeof(
int));
3208 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3209 xmlRegexpErrMemory(
NULL,
"saving regexp");
3214 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3215 exec->comp->nbCounters *
sizeof(
int));
3217 exec->nbRollbacks++;
3221xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3222 if (exec->nbRollbacks <= 0) {
3224#ifdef DEBUG_REGEXP_EXEC
3225 printf(
"rollback failed on empty stack\n");
3229 exec->nbRollbacks--;
3230 exec->state = exec->rollbacks[exec->nbRollbacks].state;
3231 exec->index = exec->rollbacks[exec->nbRollbacks].index;
3232 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3233 if (exec->comp->nbCounters > 0) {
3234 if (exec->rollbacks[exec->nbRollbacks].counts ==
NULL) {
3240 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3241 exec->comp->nbCounters *
sizeof(
int));
3245#ifdef DEBUG_REGEXP_EXEC
3247 xmlFARegDebugExec(exec);
3259 xmlRegExecCtxt execval;
3260 xmlRegExecCtxtPtr exec = &execval;
3261 int ret, codepoint = 0,
len, deter;
3266 exec->determinist = 1;
3267 exec->maxRollbacks = 0;
3268 exec->nbRollbacks = 0;
3269 exec->rollbacks =
NULL;
3272 exec->state = comp->states[0];
3274 exec->transcount = 0;
3275 exec->inputStack =
NULL;
3276 exec->inputStackMax = 0;
3277 if (comp->nbCounters > 0) {
3278 exec->counts = (
int *)
xmlMalloc(comp->nbCounters *
sizeof(
int));
3279 if (exec->counts ==
NULL) {
3280 xmlRegexpErrMemory(
NULL,
"running regexp");
3283 memset(exec->counts, 0, comp->nbCounters *
sizeof(
int));
3285 exec->counts =
NULL;
3286 while ((exec->status == 0) && (exec->state !=
NULL) &&
3287 ((exec->inputString[exec->index] != 0) ||
3288 ((exec->state !=
NULL) &&
3289 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3290 xmlRegTransPtr trans;
3301 if ((exec->inputString[exec->index] == 0) && (exec->counts ==
NULL)) {
3306 if (exec->transno < exec->state->nbTrans) {
3307 trans = &exec->state->trans[exec->transno];
3308 if (trans->to >=0) {
3310 if (!((atom->min == 0) && (atom->max > 0)))
3317 exec->transcount = 0;
3318 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3319 trans = &exec->state->trans[exec->transno];
3325 if (trans->count >= 0) {
3329 if (exec->counts ==
NULL) {
3337 count = exec->counts[trans->count];
3338 counter = &exec->comp->counters[trans->count];
3339#ifdef DEBUG_REGEXP_EXEC
3340 printf(
"testing count %d: val %d, min %d, max %d\n",
3346 }
else if (atom ==
NULL) {
3350 }
else if (exec->inputString[exec->index] != 0) {
3351 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
len);
3352 ret = xmlRegCheckCharacter(atom, codepoint);
3353 if ((
ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3354 xmlRegStatePtr to = comp->states[trans->to];
3362 if (trans->counter >= 0) {
3365 if ((exec->counts ==
NULL) ||
3366 (exec->comp ==
NULL) ||
3367 (exec->comp->counters ==
NULL)) {
3371 counter = &exec->comp->counters[trans->counter];
3372 if (exec->counts[trans->counter] >=
counter->max)
3376 if (exec->state->nbTrans > exec->transno + 1) {
3377 xmlFARegExecSave(exec);
3379 if (trans->counter >= 0) {
3380#ifdef DEBUG_REGEXP_EXEC
3381 printf(
"Increasing count %d\n", trans->counter);
3383 exec->counts[trans->counter]++;
3385 exec->transcount = 1;
3390 if (exec->transcount == atom->max) {
3397 if (exec->inputString[exec->index] == 0) {
3401 if (exec->transcount >= atom->min) {
3402 int transno = exec->transno;
3403 xmlRegStatePtr
state = exec->state;
3410 xmlFARegExecSave(exec);
3411 exec->transno = transno;
3412 exec->state =
state;
3414 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
3416 ret = xmlRegCheckCharacter(atom, codepoint);
3419 if (exec->transcount < atom->min)
3431 if (trans->counter >= 0) {
3432 if (exec->counts ==
NULL) {
3436#ifdef DEBUG_REGEXP_EXEC
3437 printf(
"Decreasing count %d\n", trans->counter);
3439 exec->counts[trans->counter]--;
3441 }
else if ((
ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3447 exec->transcount = 1;
3451 }
else if ((atom->min == 0) && (atom->max > 0)) {
3453 exec->transcount = 1;
3458 if ((trans->nd == 1) ||
3459 ((trans->count >= 0) && (deter == 0) &&
3460 (exec->state->nbTrans > exec->transno + 1))) {
3461#ifdef DEBUG_REGEXP_EXEC
3463 printf(
"Saving on nd transition atom %d for %c at %d\n",
3464 trans->atom->no, codepoint, exec->index);
3466 printf(
"Saving on counted transition count %d for %c at %d\n",
3467 trans->count, codepoint, exec->index);
3469 xmlFARegExecSave(exec);
3471 if (trans->counter >= 0) {
3475 if ((exec->counts ==
NULL) ||
3476 (exec->comp ==
NULL) ||
3477 (exec->comp->counters ==
NULL)) {
3481 counter = &exec->comp->counters[trans->counter];
3482 if (exec->counts[trans->counter] >=
counter->max)
3484#ifdef DEBUG_REGEXP_EXEC
3485 printf(
"Increasing count %d\n", trans->counter);
3487 exec->counts[trans->counter]++;
3489 if ((trans->count >= 0) &&
3490 (trans->count < REGEXP_ALL_COUNTER)) {
3491 if (exec->counts ==
NULL) {
3495#ifdef DEBUG_REGEXP_EXEC
3496 printf(
"resetting count %d on transition\n",
3499 exec->counts[trans->count] = 0;
3501#ifdef DEBUG_REGEXP_EXEC
3502 printf(
"entering state %d\n", trans->to);
3504 exec->state = comp->states[trans->to];
3506 if (trans->atom !=
NULL) {
3510 }
else if (
ret < 0) {
3515 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3520 exec->determinist = 0;
3521#ifdef DEBUG_REGEXP_EXEC
3522 printf(
"rollback from state %d on %d:%c\n", exec->state->no,
3523 codepoint,codepoint);
3525 xmlFARegExecRollBack(exec);
3531 if (exec->rollbacks !=
NULL) {
3532 if (exec->counts !=
NULL) {
3535 for (
i = 0;
i < exec->maxRollbacks;
i++)
3536 if (exec->rollbacks[
i].counts !=
NULL)
3537 xmlFree(exec->rollbacks[
i].counts);
3541 if (exec->state ==
NULL)
3543 if (exec->counts !=
NULL)
3545 if (exec->status == 0)
3547 if (exec->status == -1) {
3548 if (exec->nbPush > MAX_PUSH)
3552 return(exec->status);
3561static void testerr(xmlRegExecCtxtPtr exec);
3576xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks
callback,
void *
data) {
3577 xmlRegExecCtxtPtr exec;
3581 if ((comp->compact ==
NULL) && (comp->states ==
NULL))
3583 exec = (xmlRegExecCtxtPtr)
xmlMalloc(
sizeof(xmlRegExecCtxt));
3585 xmlRegexpErrMemory(
NULL,
"creating execution context");
3588 memset(exec, 0,
sizeof(xmlRegExecCtxt));
3589 exec->inputString =
NULL;
3591 exec->determinist = 1;
3592 exec->maxRollbacks = 0;
3593 exec->nbRollbacks = 0;
3594 exec->rollbacks =
NULL;
3597 if (comp->compact ==
NULL)
3598 exec->state = comp->states[0];
3600 exec->transcount = 0;
3603 if (comp->nbCounters > 0) {
3608 exec->counts = (
int *)
xmlMalloc(comp->nbCounters *
sizeof(
int)
3610 if (exec->counts ==
NULL) {
3611 xmlRegexpErrMemory(
NULL,
"creating execution context");
3615 memset(exec->counts, 0, comp->nbCounters *
sizeof(
int) * 2);
3616 exec->errCounts = &exec->counts[comp->nbCounters];
3618 exec->counts =
NULL;
3619 exec->errCounts =
NULL;
3621 exec->inputStackMax = 0;
3622 exec->inputStackNr = 0;
3623 exec->inputStack =
NULL;
3624 exec->errStateNo = -1;
3625 exec->errString =
NULL;
3637xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3641 if (exec->rollbacks !=
NULL) {
3642 if (exec->counts !=
NULL) {
3645 for (
i = 0;
i < exec->maxRollbacks;
i++)
3646 if (exec->rollbacks[
i].counts !=
NULL)
3647 xmlFree(exec->rollbacks[
i].counts);
3651 if (exec->counts !=
NULL)
3653 if (exec->inputStack !=
NULL) {
3656 for (
i = 0;
i < exec->inputStackNr;
i++) {
3657 if (exec->inputStack[
i].value !=
NULL)
3658 xmlFree(exec->inputStack[
i].value);
3662 if (exec->errString !=
NULL)
3668xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
3671 printf(
"saving value: %d:%s\n", exec->inputStackNr,
value);
3673 if (exec->inputStackMax == 0) {
3674 exec->inputStackMax = 4;
3675 exec->inputStack = (xmlRegInputTokenPtr)
3676 xmlMalloc(exec->inputStackMax *
sizeof(xmlRegInputToken));
3677 if (exec->inputStack ==
NULL) {
3678 xmlRegexpErrMemory(
NULL,
"pushing input string");
3679 exec->inputStackMax = 0;
3682 }
else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3683 xmlRegInputTokenPtr tmp;
3685 exec->inputStackMax *= 2;
3686 tmp = (xmlRegInputTokenPtr)
xmlRealloc(exec->inputStack,
3687 exec->inputStackMax *
sizeof(xmlRegInputToken));
3689 xmlRegexpErrMemory(
NULL,
"pushing input string");
3690 exec->inputStackMax /= 2;
3693 exec->inputStack = tmp;
3696 exec->inputStack[exec->inputStackNr].data =
data;
3697 exec->inputStackNr++;
3698 exec->inputStack[exec->inputStackNr].value =
NULL;
3699 exec->inputStack[exec->inputStackNr].data =
NULL;
3716xmlRegStrEqualWildcard(
const xmlChar *expStr,
const xmlChar *valStr) {
3717 if (expStr == valStr)
return(1);
3718 if (expStr ==
NULL)
return(0);
3719 if (valStr ==
NULL)
return(0);
3724 if (*expStr != *valStr) {
3726 if (*valStr ==
'*') {
3733 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ ==
'*')) {
3735 if (*valStr == XML_REG_STRING_SEPARATOR)
3738 }
while (*valStr != 0);
3745 }
while (*valStr != 0);
3765xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3769 int state = exec->index;
3772 if ((comp ==
NULL) || (comp->compact ==
NULL) || (comp->stringMap ==
NULL))
3779 if (comp->compact[
state * (comp->nbstrings + 1)] ==
3780 XML_REGEXP_FINAL_STATE)
3792 for (
i = 0;
i < comp->nbstrings;
i++) {
3793 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
3796 if (xmlRegStrEqualWildcard(comp->stringMap[
i],
value)) {
3798 if ((exec->callback !=
NULL) && (comp->transdata !=
NULL)) {
3799 exec->callback(exec->data,
value,
3800 comp->transdata[
state * comp->nbstrings +
i],
data);
3805 if (comp->compact[
target * (comp->nbstrings + 1)] ==
3806 XML_REGEXP_SINK_STATE)
3809 if (comp->compact[
target * (comp->nbstrings + 1)] ==
3810 XML_REGEXP_FINAL_STATE)
3824 if (exec->errString !=
NULL)
3827 exec->errStateNo =
state;
3848xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
3849 void *
data,
int compound) {
3850 xmlRegTransPtr trans;
3858 if (exec->comp ==
NULL)
3860 if (exec->status != 0)
3861 return(exec->status);
3863 if (exec->comp->compact !=
NULL)
3864 return(xmlRegCompactPushString(exec, exec->comp,
value,
data));
3867 if (exec->state->type == XML_REGEXP_FINAL_STATE)
3879 if ((
value !=
NULL) && (exec->inputStackNr > 0)) {
3880 xmlFARegExecSaveInputString(exec,
value,
data);
3881 value = exec->inputStack[exec->index].value;
3882 data = exec->inputStack[exec->index].data;
3888 while ((exec->status == 0) &&
3891 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3901 exec->transcount = 0;
3902 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3903 trans = &exec->state->trans[exec->transno];
3908 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3917 printf(
"testing all lax %d\n", trans->count);
3925 for (
i = 0;
i < exec->state->nbTrans;
i++) {
3926 t = &exec->state->trans[
i];
3927 if ((
t->counter < 0) || (
t == trans))
3929 counter = &exec->comp->counters[
t->counter];
3930 count = exec->counts[
t->counter];
3931 if ((count < counter->
max) &&
3932 (
t->atom !=
NULL) &&
3938 (count < counter->
max) &&
3939 (
t->atom !=
NULL) &&
3946 }
else if (trans->count == REGEXP_ALL_COUNTER) {
3955 printf(
"testing all %d\n", trans->count);
3960 for (
i = 0;
i < exec->state->nbTrans;
i++) {
3961 t = &exec->state->trans[
i];
3962 if ((
t->counter < 0) || (
t == trans))
3964 counter = &exec->comp->counters[
t->counter];
3965 count = exec->counts[
t->counter];
3971 }
else if (trans->count >= 0) {
3979 count = exec->counts[trans->count];
3980 counter = &exec->comp->counters[trans->count];
3982 printf(
"testing count %d: val %d, min %d, max %d\n",
3986 }
else if (atom ==
NULL) {
3991 ret = xmlRegStrEqualWildcard(atom->valuep,
value);
3997 if ((
ret == 1) && (trans->counter >= 0)) {
4001 count = exec->counts[trans->counter];
4002 counter = &exec->comp->counters[trans->counter];
4007 if ((
ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4008 xmlRegStatePtr to = exec->comp->states[trans->to];
4013 if (exec->state->nbTrans > exec->transno + 1) {
4014 if (exec->inputStackNr <= 0) {
4015 xmlFARegExecSaveInputString(exec,
value,
data);
4017 xmlFARegExecSave(exec);
4019 exec->transcount = 1;
4024 if (exec->transcount == atom->max) {
4028 value = exec->inputStack[exec->index].value;
4029 data = exec->inputStack[exec->index].data;
4041 if (exec->transcount >= atom->min) {
4042 int transno = exec->transno;
4043 xmlRegStatePtr
state = exec->state;
4050 if (exec->inputStackNr <= 0) {
4051 xmlFARegExecSaveInputString(exec,
value,
data);
4053 xmlFARegExecSave(exec);
4054 exec->transno = transno;
4055 exec->state =
state;
4060 if (exec->transcount < atom->min)
4075 if ((exec->callback !=
NULL) && (atom !=
NULL) &&
4077 exec->callback(exec->data, atom->valuep,
4080 if (exec->state->nbTrans > exec->transno + 1) {
4081 if (exec->inputStackNr <= 0) {
4082 xmlFARegExecSaveInputString(exec,
value,
data);
4084 xmlFARegExecSave(exec);
4086 if (trans->counter >= 0) {
4088 printf(
"Increasing count %d\n", trans->counter);
4090 exec->counts[trans->counter]++;
4092 if ((trans->count >= 0) &&
4093 (trans->count < REGEXP_ALL_COUNTER)) {
4094#ifdef DEBUG_REGEXP_EXEC
4095 printf(
"resetting count %d on transition\n",
4098 exec->counts[trans->count] = 0;
4101 printf(
"entering state %d\n", trans->to);
4103 if ((exec->comp->states[trans->to] !=
NULL) &&
4104 (exec->comp->states[trans->to]->type ==
4105 XML_REGEXP_SINK_STATE)) {
4110 if (exec->errString !=
NULL)
4113 exec->errState = exec->state;
4114 memcpy(exec->errCounts, exec->counts,
4115 exec->comp->nbCounters *
sizeof(
int));
4117 exec->state = exec->comp->states[trans->to];
4119 if (trans->atom !=
NULL) {
4120 if (exec->inputStack !=
NULL) {
4122 if (exec->index < exec->inputStackNr) {
4123 value = exec->inputStack[exec->index].value;
4124 data = exec->inputStack[exec->index].data;
4132 printf(
"end of input\n");
4139 printf(
"end of input\n");
4144 }
else if (
ret < 0) {
4149 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4156 (exec->state->type != XML_REGEXP_SINK_STATE)) {
4158 if (exec->errString !=
NULL)
4161 exec->errState = exec->state;
4162 if (exec->comp->nbCounters)
4163 memcpy(exec->errCounts, exec->counts,
4164 exec->comp->nbCounters *
sizeof(
int));
4170 exec->determinist = 0;
4171 xmlFARegExecRollBack(exec);
4172 if ((exec->inputStack !=
NULL ) && (exec->status == 0)) {
4173 value = exec->inputStack[exec->index].value;
4174 data = exec->inputStack[exec->index].data;
4185 if (exec->status == 0) {
4186 return(exec->state->type == XML_REGEXP_FINAL_STATE);
4189 if (exec->status < 0) {
4193 return(exec->status);
4208xmlRegExecPushString(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
4210 return(xmlRegExecPushStringInternal(exec,
value,
data, 0));
4226xmlRegExecPushString2(xmlRegExecCtxtPtr exec,
const xmlChar *
value,
4229 int lenn, lenp,
ret;
4234 if (exec->comp ==
NULL)
4236 if (exec->status != 0)
4237 return(exec->status);
4240 return(xmlRegExecPushString(exec,
value,
data));
4242 lenn =
strlen((
char *) value2);
4245 if (150 < lenn + lenp + 2) {
4255 str[lenp] = XML_REG_STRING_SEPARATOR;
4257 str[lenn + lenp + 1] = 0;
4259 if (exec->comp->compact !=
NULL)
4260 ret = xmlRegCompactPushString(exec, exec->comp,
str,
data);
4262 ret = xmlRegExecPushStringInternal(exec,
str,
data, 1);
4284xmlRegExecGetValues(xmlRegExecCtxtPtr exec,
int err,
4285 int *nbval,
int *nbneg,
4290 if ((exec ==
NULL) || (nbval ==
NULL) || (nbneg ==
NULL) ||
4297 if ((exec->comp !=
NULL) && (exec->comp->compact !=
NULL)) {
4304 if (exec->errStateNo == -1)
return(-1);
4305 state = exec->errStateNo;
4307 state = exec->index;
4309 if (terminal !=
NULL) {
4310 if (comp->compact[
state * (comp->nbstrings + 1)] ==
4311 XML_REGEXP_FINAL_STATE)
4316 for (
i = 0;(
i < comp->nbstrings) && (nb < maxval);
i++) {
4317 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
4319 (comp->compact[(
target - 1) * (comp->nbstrings + 1)] !=
4320 XML_REGEXP_SINK_STATE)) {
4321 values[nb++] = comp->stringMap[
i];
4325 for (
i = 0;(
i < comp->nbstrings) && (nb < maxval);
i++) {
4326 target = comp->compact[
state * (comp->nbstrings + 1) +
i + 1];
4328 (comp->compact[(
target - 1) * (comp->nbstrings + 1)] ==
4329 XML_REGEXP_SINK_STATE)) {
4330 values[nb++] = comp->stringMap[
i];
4336 xmlRegTransPtr trans;
4338 xmlRegStatePtr
state;
4340 if (terminal !=
NULL) {
4341 if (exec->state->type == XML_REGEXP_FINAL_STATE)
4348 if (exec->errState ==
NULL)
return(-1);
4349 state = exec->errState;
4351 if (exec->state ==
NULL)
return(-1);
4352 state = exec->state;
4355 (transno <
state->nbTrans) && (nb < maxval);
4357 trans = &
state->trans[transno];
4361 if ((atom ==
NULL) || (atom->valuep ==
NULL))
4363 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4366 }
else if (trans->count == REGEXP_ALL_COUNTER) {
4369 }
else if (trans->counter >= 0) {
4374 count = exec->errCounts[trans->counter];
4376 count = exec->counts[trans->counter];
4377 if (exec->comp !=
NULL)
4378 counter = &exec->comp->counters[trans->counter];
4387 if ((exec->comp !=
NULL) && (exec->comp->states[trans->to] !=
NULL) &&
4388 (exec->comp->states[trans->to]->type !=
4389 XML_REGEXP_SINK_STATE)) {
4399 (transno <
state->nbTrans) && (nb < maxval);
4401 trans = &
state->trans[transno];
4405 if ((atom ==
NULL) || (atom->valuep ==
NULL))
4407 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4409 }
else if (trans->count == REGEXP_ALL_COUNTER) {
4411 }
else if (trans->counter >= 0) {
4414 if ((exec->comp->states[trans->to] !=
NULL) &&
4415 (exec->comp->states[trans->to]->type ==
4416 XML_REGEXP_SINK_STATE)) {
4447xmlRegExecNextValues(xmlRegExecCtxtPtr exec,
int *nbval,
int *nbneg,
4449 return(xmlRegExecGetValues(exec, 0, nbval, nbneg,
values, terminal));
4472xmlRegExecErrInfo(xmlRegExecCtxtPtr exec,
const xmlChar **
string,
4476 if (
string !=
NULL) {
4477 if (exec->status != 0)
4478 *
string = exec->errString;
4482 return(xmlRegExecGetValues(exec, 1, nbval, nbneg,
values, terminal));
4486static void testerr(xmlRegExecCtxtPtr exec) {
4492 xmlRegExecErrInfo(exec, &
string, &nb, &nbneg, &
values[0], &terminal);
4498xmlRegExecPushChar(xmlRegExecCtxtPtr exec,
int UCS) {
4499 xmlRegTransPtr trans;
4506 if (exec->status != 0)
4507 return(exec->status);
4509 while ((exec->status == 0) &&
4510 ((exec->inputString[exec->index] != 0) ||
4511 (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4518 if ((exec->inputString[exec->index] == 0) && (exec->counts ==
NULL))
4521 exec->transcount = 0;
4522 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4523 trans = &exec->state->trans[exec->transno];
4528 if (trans->count >= 0) {
4536 count = exec->counts[trans->count];
4537 counter = &exec->comp->counters[trans->count];
4538#ifdef DEBUG_REGEXP_EXEC
4539 printf(
"testing count %d: val %d, min %d, max %d\n",
4543 }
else if (atom ==
NULL) {
4547 }
else if (exec->inputString[exec->index] != 0) {
4548 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
len);
4549 ret = xmlRegCheckCharacter(atom, codepoint);
4550 if ((
ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4551 xmlRegStatePtr to = exec->comp->states[trans->to];
4556 if (exec->state->nbTrans > exec->transno + 1) {
4557 xmlFARegExecSave(exec);
4559 exec->transcount = 1;
4564 if (exec->transcount == atom->max) {
4571 if (exec->inputString[exec->index] == 0) {
4575 if (exec->transcount >= atom->min) {
4576 int transno = exec->transno;
4577 xmlRegStatePtr
state = exec->state;
4584 xmlFARegExecSave(exec);
4585 exec->transno = transno;
4586 exec->state =
state;
4588 codepoint =
CUR_SCHAR(&(exec->inputString[exec->index]),
4590 ret = xmlRegCheckCharacter(atom, codepoint);
4593 if (exec->transcount < atom->min)
4608 if (exec->state->nbTrans > exec->transno + 1) {
4609 xmlFARegExecSave(exec);
4614 if (trans->count >= 0) {
4615#ifdef DEBUG_REGEXP_EXEC
4616 printf(
"Reset count %d\n", trans->count);
4618 exec->counts[trans->count] = 0;
4620 if (trans->counter >= 0) {
4621#ifdef DEBUG_REGEXP_EXEC
4622 printf(
"Increasing count %d\n", trans->counter);
4624 exec->counts[trans->counter]++;
4626#ifdef DEBUG_REGEXP_EXEC
4627 printf(
"entering state %d\n", trans->to);
4629 exec->state = exec->comp->states[trans->to];
4631 if (trans->atom !=
NULL) {
4635 }
else if (
ret < 0) {
4640 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4645 exec->determinist = 0;
4646 xmlFARegExecRollBack(exec);
4667xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4672 if ((
cur ==
'.') || (
cur ==
'\\') || (
cur ==
'?') ||
4673 (
cur ==
'*') || (
cur ==
'+') || (
cur ==
'(') ||
4674 (
cur ==
')') || (
cur ==
'|') || (
cur == 0x5B) ||
4675 (
cur == 0x5D) || (
cur == 0))
4697xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4699 xmlRegAtomType
type = (xmlRegAtomType) 0;
4708 type = XML_REGEXP_LETTER_UPPERCASE;
4709 }
else if (
cur ==
'l') {
4711 type = XML_REGEXP_LETTER_LOWERCASE;
4712 }
else if (
cur ==
't') {
4714 type = XML_REGEXP_LETTER_TITLECASE;
4715 }
else if (
cur ==
'm') {
4717 type = XML_REGEXP_LETTER_MODIFIER;
4718 }
else if (
cur ==
'o') {
4720 type = XML_REGEXP_LETTER_OTHERS;
4722 type = XML_REGEXP_LETTER;
4724 }
else if (
cur ==
'M') {
4730 type = XML_REGEXP_MARK_NONSPACING;
4731 }
else if (
cur ==
'c') {
4734 type = XML_REGEXP_MARK_SPACECOMBINING;
4735 }
else if (
cur ==
'e') {
4738 type = XML_REGEXP_MARK_ENCLOSING;
4741 type = XML_REGEXP_MARK;
4743 }
else if (
cur ==
'N') {
4749 type = XML_REGEXP_NUMBER_DECIMAL;
4750 }
else if (
cur ==
'l') {
4753 type = XML_REGEXP_NUMBER_LETTER;
4754 }
else if (
cur ==
'o') {
4757 type = XML_REGEXP_NUMBER_OTHERS;
4760 type = XML_REGEXP_NUMBER;
4762 }
else if (
cur ==
'P') {
4768 type = XML_REGEXP_PUNCT_CONNECTOR;
4769 }
else if (
cur ==
'd') {
4772 type = XML_REGEXP_PUNCT_DASH;
4773 }
else if (
cur ==
's') {
4776 type = XML_REGEXP_PUNCT_OPEN;
4777 }
else if (
cur ==
'e') {
4780 type = XML_REGEXP_PUNCT_CLOSE;
4781 }
else if (
cur ==
'i') {
4784 type = XML_REGEXP_PUNCT_INITQUOTE;
4785 }
else if (
cur ==
'f') {
4788 type = XML_REGEXP_PUNCT_FINQUOTE;
4789 }
else if (
cur ==
'o') {
4792 type = XML_REGEXP_PUNCT_OTHERS;
4795 type = XML_REGEXP_PUNCT;
4797 }
else if (
cur ==
'Z') {
4803 type = XML_REGEXP_SEPAR_SPACE;
4804 }
else if (
cur ==
'l') {
4807 type = XML_REGEXP_SEPAR_LINE;
4808 }
else if (
cur ==
'p') {
4811 type = XML_REGEXP_SEPAR_PARA;
4814 type = XML_REGEXP_SEPAR;
4816 }
else if (
cur ==
'S') {
4821 type = XML_REGEXP_SYMBOL_MATH;
4823 }
else if (
cur ==
'c') {
4825 type = XML_REGEXP_SYMBOL_CURRENCY;
4827 }
else if (
cur ==
'k') {
4829 type = XML_REGEXP_SYMBOL_MODIFIER;
4831 }
else if (
cur ==
'o') {
4833 type = XML_REGEXP_SYMBOL_OTHERS;
4837 type = XML_REGEXP_SYMBOL;
4839 }
else if (
cur ==
'C') {
4845 type = XML_REGEXP_OTHER_CONTROL;
4846 }
else if (
cur ==
'f') {
4849 type = XML_REGEXP_OTHER_FORMAT;
4850 }
else if (
cur ==
'o') {
4853 type = XML_REGEXP_OTHER_PRIVATE;
4854 }
else if (
cur ==
'n') {
4857 type = XML_REGEXP_OTHER_NA;
4860 type = XML_REGEXP_OTHER;
4862 }
else if (
cur ==
'I') {
4867 ERROR(
"IsXXXX expected");
4873 if (((
cur >=
'a') && (
cur <=
'z')) ||
4874 ((
cur >=
'A') && (
cur <=
'Z')) ||
4875 ((
cur >=
'0') && (
cur <=
'9')) ||
4879 while (((
cur >=
'a') && (
cur <=
'z')) ||
4880 ((
cur >=
'A') && (
cur <=
'Z')) ||
4881 ((
cur >=
'0') && (
cur <=
'9')) ||
4887 type = XML_REGEXP_BLOCK_NAME;
4890 ERROR(
"Unknown char property");
4893 if (ctxt->atom ==
NULL) {
4894 ctxt->atom = xmlRegNewAtom(ctxt,
type);
4895 if (ctxt->atom !=
NULL)
4896 ctxt->atom->valuep = blockName;
4897 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4898 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4899 type, 0, 0, blockName);
4903static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
4906 for (
i = 0;
i < 4;
i++) {
4910 if (
cur >=
'0' &&
cur <=
'9') {
4912 }
else if (
cur >=
'A' &&
cur <=
'F') {
4914 }
else if (
cur >=
'a' &&
cur <=
'f') {
4917 ERROR(
"Expecting hex digit");
4924static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
4926 int val = parse_escaped_codeunit(ctxt);
4927 if (0xD800 <=
val &&
val <= 0xDBFF) {
4932 int low = parse_escaped_codeunit(ctxt);
4933 if (0xDC00 <= low && low <= 0xDFFF) {
4934 return (
val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
4938 ERROR(
"Invalid low surrogate pair code unit");
4955xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4959 if (ctxt->atom ==
NULL) {
4960 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4961 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4962 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4963 XML_REGEXP_ANYCHAR, 0, 0,
NULL);
4969 ERROR(
"Escaped sequence: expecting \\");
4977 ERROR(
"Expecting '{'");
4981 xmlFAParseCharProp(ctxt);
4983 ERROR(
"Expecting '}'");
4987 }
else if (
cur ==
'P') {
4990 ERROR(
"Expecting '{'");
4994 xmlFAParseCharProp(ctxt);
4995 if (ctxt->atom !=
NULL)
4996 ctxt->atom->neg = 1;
4998 ERROR(
"Expecting '}'");
5002 }
else if ((
cur ==
'n') || (
cur ==
'r') || (
cur ==
't') || (
cur ==
'\\') ||
5003 (
cur ==
'|') || (
cur ==
'.') || (
cur ==
'?') || (
cur ==
'*') ||
5004 (
cur ==
'+') || (
cur ==
'(') || (
cur ==
')') || (
cur ==
'{') ||
5005 (
cur ==
'}') || (
cur == 0x2D) || (
cur == 0x5B) || (
cur == 0x5D) ||
5025 if (ctxt->atom ==
NULL) {
5026 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5027 if (ctxt->atom !=
NULL) {
5030 ctxt->atom->codepoint =
'\n';
5033 ctxt->atom->codepoint =
'\r';
5036 ctxt->atom->codepoint =
'\t';
5039 cur = parse_escaped_codepoint(ctxt);
5043 ctxt->atom->codepoint =
cur;
5046 ctxt->atom->codepoint =
cur;
5049 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5061 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5065 }
else if ((
cur ==
's') || (
cur ==
'S') || (
cur ==
'i') || (
cur ==
'I') ||
5066 (
cur ==
'c') || (
cur ==
'C') || (
cur ==
'd') || (
cur ==
'D') ||
5067 (
cur ==
'w') || (
cur ==
'W')) {
5068 xmlRegAtomType
type = XML_REGEXP_ANYSPACE;
5072 type = XML_REGEXP_ANYSPACE;
5075 type = XML_REGEXP_NOTSPACE;
5078 type = XML_REGEXP_INITNAME;
5081 type = XML_REGEXP_NOTINITNAME;
5084 type = XML_REGEXP_NAMECHAR;
5087 type = XML_REGEXP_NOTNAMECHAR;
5090 type = XML_REGEXP_DECIMAL;
5093 type = XML_REGEXP_NOTDECIMAL;
5096 type = XML_REGEXP_REALCHAR;
5099 type = XML_REGEXP_NOTREALCHAR;
5103 if (ctxt->atom ==
NULL) {
5104 ctxt->atom = xmlRegNewAtom(ctxt,
type);
5105 }
else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5106 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5110 ERROR(
"Wrong escape sequence, misuse of character '\\'");
5125xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
5131 ERROR(
"Expecting ']'");
5140 case 'n':
start = 0xA;
break;
5141 case 'r':
start = 0xD;
break;
5142 case 't':
start = 0x9;
break;
5143 case '\\':
case '|':
case '.':
case '-':
case '^':
case '?':
5144 case '*':
case '+':
case '{':
case '}':
case '(':
case ')':
5148 ERROR(
"Invalid escape value");
5153 }
else if ((
cur != 0x5B) && (
cur != 0x5D)) {
5156 ERROR(
"Expecting a char range");
5163 if ((
start ==
'-') && (
NXT(1) !=
']') && (PREV !=
'[') && (PREV !=
'^')) {
5169 if ((
cur !=
'-') || (
NXT(1) ==
'[') || (
NXT(1) ==
']')) {
5170 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5180 case 'n':
end = 0xA;
break;
5181 case 'r':
end = 0xD;
break;
5182 case 't':
end = 0x9;
break;
5183 case '\\':
case '|':
case '.':
case '-':
case '^':
case '?':
5184 case '*':
case '+':
case '{':
case '}':
case '(':
case ')':
5188 ERROR(
"Invalid escape value");
5192 }
else if ((
cur !=
'\0') && (
cur != 0x5B) && (
cur != 0x5D)) {
5195 ERROR(
"Expecting the end of a char range");
5201 ERROR(
"End of range is before start of range");
5204 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5217xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5220 xmlFAParseCharClassEsc(ctxt);
5222 xmlFAParseCharRange(ctxt);
5224 }
while ((
CUR !=
']') && (
CUR !=
'-') &&
5225 (
CUR != 0) && (ctxt->error == 0));
5238xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5239 int neg = ctxt->neg;
5243 ctxt->neg = !ctxt->neg;
5244 xmlFAParsePosCharGroup(ctxt);
5247 while ((
CUR !=
']') && (ctxt->error == 0)) {
5248 if ((
CUR ==
'-') && (
NXT(1) ==
'[')) {
5252 xmlFAParseCharGroup(ctxt);
5257 ERROR(
"charClassExpr: ']' expected");
5261 xmlFAParsePosCharGroup(ctxt);
5274xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5277 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5278 if (ctxt->atom ==
NULL)
5280 xmlFAParseCharGroup(ctxt);
5284 ERROR(
"xmlFAParseCharClass: ']' expected");
5287 xmlFAParseCharClassEsc(ctxt);
5300xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5305 while ((
CUR >=
'0') && (
CUR <=
'9')) {
5309 int digit =
CUR -
'0';
5320 if ((
ok != 1) || (overflow == 1)) {
5337xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5341 if ((
cur ==
'?') || (
cur ==
'*') || (
cur ==
'+')) {
5342 if (ctxt->atom !=
NULL) {
5344 ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5345 else if (
cur ==
'*')
5346 ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5347 else if (
cur ==
'+')
5348 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5357 cur = xmlFAParseQuantExact(ctxt);
5361 ERROR(
"Improper quantifier");
5368 cur = xmlFAParseQuantExact(ctxt);
5372 ERROR(
"Improper quantifier");
5379 ERROR(
"Unterminated quantifier");
5383 if (ctxt->atom !=
NULL) {
5384 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5385 ctxt->atom->min =
min;
5386 ctxt->atom->max =
max;
5400xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5403 codepoint = xmlFAIsChar(ctxt);
5404 if (codepoint > 0) {
5405 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5406 if (ctxt->atom ==
NULL)
5409 ctxt->atom->codepoint = codepoint;
5412 }
else if (
CUR ==
'|') {
5414 }
else if (
CUR == 0) {
5416 }
else if (
CUR ==
')') {
5418 }
else if (
CUR ==
'(') {
5419 xmlRegStatePtr
start, oldend, start0;
5422 if (ctxt->depth >= 50) {
5423 ERROR(
"xmlFAParseAtom: maximum nesting depth exceeded");
5430 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state,
NULL);
5431 start0 = ctxt->state;
5432 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state,
NULL);
5433 start = ctxt->state;
5438 xmlFAParseRegExp(ctxt, 0);
5443 ERROR(
"xmlFAParseAtom: expecting ')'");
5445 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5446 if (ctxt->atom ==
NULL)
5448 ctxt->atom->start =
start;
5449 ctxt->atom->start0 = start0;
5450 ctxt->atom->stop = ctxt->state;
5453 }
else if ((
CUR ==
'[') || (
CUR ==
'\\') || (
CUR ==
'.')) {
5454 xmlFAParseCharClass(ctxt);
5467xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5471 ret = xmlFAParseAtom(ctxt);
5474 if (ctxt->atom ==
NULL) {
5475 ERROR(
"internal: no atom generated");
5477 xmlFAParseQuantifier(ctxt);
5492xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5493 xmlRegStatePtr previous;
5496 previous = ctxt->state;
5497 ret = xmlFAParsePiece(ctxt);
5500 xmlFAGenerateEpsilonTransition(ctxt, previous, to);
5502 if (xmlFAGenerateTransitions(ctxt, previous,
5503 (
CUR==
'|' ||
CUR==
')' ||
CUR==0) ? to :
NULL, ctxt->atom) < 0)
5505 previous = ctxt->state;
5508 while ((
ret != 0) && (ctxt->error == 0)) {
5509 ret = xmlFAParsePiece(ctxt);
5511 if (xmlFAGenerateTransitions(ctxt, previous,
5515 previous = ctxt->state;
5530xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt,
int top) {
5534 start = ctxt->state;
5536 xmlFAParseBranch(ctxt,
NULL);
5538#ifdef DEBUG_REGEXP_GRAPH
5539 printf(
"State %d is final\n", ctxt->state->no);
5541 ctxt->state->type = XML_REGEXP_FINAL_STATE;
5544 ctxt->end = ctxt->state;
5548 while ((
CUR ==
'|') && (ctxt->error == 0)) {
5550 ctxt->state =
start;
5552 xmlFAParseBranch(ctxt,
end);
5574xmlRegexpPrint(
FILE *output, xmlRegexpPtr regexp) {
5580 if (regexp ==
NULL) {
5584 fprintf(output,
"'%s' ", regexp->string);
5586 fprintf(output,
"%d atoms:\n", regexp->nbAtoms);
5587 for (
i = 0;
i < regexp->nbAtoms;
i++) {
5589 xmlRegPrintAtom(output, regexp->atoms[
i]);
5591 fprintf(output,
"%d states:", regexp->nbStates);
5593 for (
i = 0;
i < regexp->nbStates;
i++) {
5594 xmlRegPrintState(output, regexp->states[
i]);
5596 fprintf(output,
"%d counters:\n", regexp->nbCounters);
5597 for (
i = 0;
i < regexp->nbCounters;
i++) {
5598 fprintf(output,
" %d: min %d max %d\n",
i, regexp->counters[
i].min,
5599 regexp->counters[
i].max);
5614xmlRegexpCompile(
const xmlChar *regexp) {
5616 xmlRegParserCtxtPtr ctxt;
5618 ctxt = xmlRegNewParserCtxt(regexp);
5624 ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5625 xmlRegStatePush(ctxt, ctxt->start);
5628 xmlFAParseRegExp(ctxt, 1);
5630 ERROR(
"xmlFAParseRegExp: extra characters");
5632 if (ctxt->error != 0) {
5633 xmlRegFreeParserCtxt(ctxt);
5636 ctxt->end = ctxt->state;
5637 ctxt->start->type = XML_REGEXP_START_STATE;
5638 ctxt->end->type = XML_REGEXP_FINAL_STATE;
5641 xmlFAEliminateEpsilonTransitions(ctxt);
5644 if (ctxt->error != 0) {
5645 xmlRegFreeParserCtxt(ctxt);
5648 ret = xmlRegEpxFromParse(ctxt);
5649 xmlRegFreeParserCtxt(ctxt);
5666 return(xmlFARegExec(comp,
content));
5678xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5684 if (comp->determinist != -1)
5685 return(comp->determinist);
5687 am = xmlNewAutomata();
5690 if (am->states !=
NULL) {
5693 for (
i = 0;
i < am->nbStates;
i++)
5694 xmlRegFreeState(am->states[
i]);
5697 am->nbAtoms = comp->nbAtoms;
5698 am->atoms = comp->atoms;
5699 am->nbStates = comp->nbStates;
5700 am->states = comp->states;
5701 am->determinist = -1;
5702 am->flags = comp->flags;
5703 ret = xmlFAComputesDeterminism(am);
5706 xmlFreeAutomata(am);
5707 comp->determinist =
ret;
5718xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5723 if (regexp->string !=
NULL)
5725 if (regexp->states !=
NULL) {
5726 for (
i = 0;
i < regexp->nbStates;
i++)
5727 xmlRegFreeState(regexp->states[
i]);
5730 if (regexp->atoms !=
NULL) {
5731 for (
i = 0;
i < regexp->nbAtoms;
i++)
5732 xmlRegFreeAtom(regexp->atoms[
i]);
5735 if (regexp->counters !=
NULL)
5737 if (regexp->compact !=
NULL)
5739 if (regexp->transdata !=
NULL)
5741 if (regexp->stringMap !=
NULL) {
5742 for (
i = 0;
i < regexp->nbstrings;
i++)
5750#ifdef LIBXML_AUTOMATA_ENABLED
5765xmlNewAutomata(
void) {
5766 xmlAutomataPtr ctxt;
5768 ctxt = xmlRegNewParserCtxt(
NULL);
5774 ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5775 if (ctxt->start ==
NULL) {
5776 xmlFreeAutomata(ctxt);
5779 ctxt->start->type = XML_REGEXP_START_STATE;
5780 if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5781 xmlRegFreeState(ctxt->start);
5782 xmlFreeAutomata(ctxt);
5797xmlFreeAutomata(xmlAutomataPtr am) {
5800 xmlRegFreeParserCtxt(am);
5811xmlAutomataSetFlags(xmlAutomataPtr am,
int flags) {
5826xmlAutomataGetInitState(xmlAutomataPtr am) {
5842xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr
state) {
5845 state->type = XML_REGEXP_FINAL_STATE;
5864xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr
from,
5871 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);