ReactOS 0.4.15-dev-8092-ge0ba2f3
xmlregexp.c
Go to the documentation of this file.
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/schemas mechanisms now available in
6 * XML related specifications these include:
7 * - XML-1.0 DTD validation
8 * - XML Schemas structure part 1
9 * - XML Schemas Datatypes part 2 especially Appendix F
10 * - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22/* #define DEBUG_ERR */
23
24#include <stdio.h>
25#include <string.h>
26#include <limits.h>
27
28#include <libxml/tree.h>
30#include <libxml/xmlregexp.h>
31#include <libxml/xmlautomata.h>
32#include <libxml/xmlunicode.h>
33
34#ifndef SIZE_MAX
35#define SIZE_MAX ((size_t) -1)
36#endif
37
38/* #define DEBUG_REGEXP_GRAPH */
39/* #define DEBUG_REGEXP_EXEC */
40/* #define DEBUG_PUSH */
41/* #define DEBUG_COMPACTION */
42
43#define MAX_PUSH 10000000
44
45#ifdef ERROR
46#undef ERROR
47#endif
48#define ERROR(str) \
49 ctxt->error = XML_REGEXP_COMPILE_ERROR; \
50 xmlRegexpErrCompile(ctxt, str);
51#define NEXT ctxt->cur++
52#define CUR (*(ctxt->cur))
53#define NXT(index) (ctxt->cur[index])
54
55#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
56#define NEXTL(l) ctxt->cur += l;
57#define XML_REG_STRING_SEPARATOR '|'
58/*
59 * Need PREV to check on a '-' within a Character Group. May only be used
60 * when it's guaranteed that cur is not at the beginning of ctxt->string!
61 */
62#define PREV (ctxt->cur[-1])
63
69#define TODO \
70 xmlGenericError(xmlGenericErrorContext, \
71 "Unimplemented block at %s:%d\n", \
72 __FILE__, __LINE__);
73
74/************************************************************************
75 * *
76 * Datatypes and structures *
77 * *
78 ************************************************************************/
79
80/*
81 * Note: the order of the enums below is significant, do not shuffle
82 */
83typedef enum {
84 XML_REGEXP_EPSILON = 1,
85 XML_REGEXP_CHARVAL,
86 XML_REGEXP_RANGES,
87 XML_REGEXP_SUBREG, /* used for () sub regexps */
88 XML_REGEXP_STRING,
89 XML_REGEXP_ANYCHAR, /* . */
90 XML_REGEXP_ANYSPACE, /* \s */
91 XML_REGEXP_NOTSPACE, /* \S */
92 XML_REGEXP_INITNAME, /* \l */
93 XML_REGEXP_NOTINITNAME, /* \L */
94 XML_REGEXP_NAMECHAR, /* \c */
95 XML_REGEXP_NOTNAMECHAR, /* \C */
96 XML_REGEXP_DECIMAL, /* \d */
97 XML_REGEXP_NOTDECIMAL, /* \D */
98 XML_REGEXP_REALCHAR, /* \w */
99 XML_REGEXP_NOTREALCHAR, /* \W */
100 XML_REGEXP_LETTER = 100,
101 XML_REGEXP_LETTER_UPPERCASE,
102 XML_REGEXP_LETTER_LOWERCASE,
103 XML_REGEXP_LETTER_TITLECASE,
104 XML_REGEXP_LETTER_MODIFIER,
105 XML_REGEXP_LETTER_OTHERS,
106 XML_REGEXP_MARK,
107 XML_REGEXP_MARK_NONSPACING,
108 XML_REGEXP_MARK_SPACECOMBINING,
109 XML_REGEXP_MARK_ENCLOSING,
110 XML_REGEXP_NUMBER,
111 XML_REGEXP_NUMBER_DECIMAL,
112 XML_REGEXP_NUMBER_LETTER,
113 XML_REGEXP_NUMBER_OTHERS,
114 XML_REGEXP_PUNCT,
115 XML_REGEXP_PUNCT_CONNECTOR,
116 XML_REGEXP_PUNCT_DASH,
117 XML_REGEXP_PUNCT_OPEN,
118 XML_REGEXP_PUNCT_CLOSE,
119 XML_REGEXP_PUNCT_INITQUOTE,
120 XML_REGEXP_PUNCT_FINQUOTE,
121 XML_REGEXP_PUNCT_OTHERS,
122 XML_REGEXP_SEPAR,
123 XML_REGEXP_SEPAR_SPACE,
124 XML_REGEXP_SEPAR_LINE,
125 XML_REGEXP_SEPAR_PARA,
126 XML_REGEXP_SYMBOL,
127 XML_REGEXP_SYMBOL_MATH,
128 XML_REGEXP_SYMBOL_CURRENCY,
129 XML_REGEXP_SYMBOL_MODIFIER,
130 XML_REGEXP_SYMBOL_OTHERS,
131 XML_REGEXP_OTHER,
132 XML_REGEXP_OTHER_CONTROL,
133 XML_REGEXP_OTHER_FORMAT,
134 XML_REGEXP_OTHER_PRIVATE,
135 XML_REGEXP_OTHER_NA,
136 XML_REGEXP_BLOCK_NAME
137} xmlRegAtomType;
138
139typedef enum {
140 XML_REGEXP_QUANT_EPSILON = 1,
141 XML_REGEXP_QUANT_ONCE,
142 XML_REGEXP_QUANT_OPT,
143 XML_REGEXP_QUANT_MULT,
144 XML_REGEXP_QUANT_PLUS,
145 XML_REGEXP_QUANT_ONCEONLY,
146 XML_REGEXP_QUANT_ALL,
147 XML_REGEXP_QUANT_RANGE
148} xmlRegQuantType;
149
150typedef enum {
151 XML_REGEXP_START_STATE = 1,
152 XML_REGEXP_FINAL_STATE,
153 XML_REGEXP_TRANS_STATE,
154 XML_REGEXP_SINK_STATE,
155 XML_REGEXP_UNREACH_STATE
156} xmlRegStateType;
157
158typedef enum {
159 XML_REGEXP_MARK_NORMAL = 0,
160 XML_REGEXP_MARK_START,
161 XML_REGEXP_MARK_VISITED
162} xmlRegMarkedType;
163
164typedef struct _xmlRegRange xmlRegRange;
165typedef xmlRegRange *xmlRegRangePtr;
166
167struct _xmlRegRange {
168 int neg; /* 0 normal, 1 not, 2 exclude */
169 xmlRegAtomType type;
170 int start;
171 int end;
172 xmlChar *blockName;
173};
174
175typedef struct _xmlRegAtom xmlRegAtom;
176typedef xmlRegAtom *xmlRegAtomPtr;
177
178typedef struct _xmlAutomataState xmlRegState;
179typedef xmlRegState *xmlRegStatePtr;
180
181struct _xmlRegAtom {
182 int no;
183 xmlRegAtomType type;
184 xmlRegQuantType quant;
185 int min;
186 int max;
187
188 void *valuep;
189 void *valuep2;
190 int neg;
191 int codepoint;
192 xmlRegStatePtr start;
193 xmlRegStatePtr start0;
194 xmlRegStatePtr stop;
195 int maxRanges;
196 int nbRanges;
197 xmlRegRangePtr *ranges;
198 void *data;
199};
200
201typedef struct _xmlRegCounter xmlRegCounter;
202typedef xmlRegCounter *xmlRegCounterPtr;
203
204struct _xmlRegCounter {
205 int min;
206 int max;
207};
208
209typedef struct _xmlRegTrans xmlRegTrans;
210typedef xmlRegTrans *xmlRegTransPtr;
211
212struct _xmlRegTrans {
213 xmlRegAtomPtr atom;
214 int to;
215 int counter;
216 int count;
217 int nd;
218};
219
220struct _xmlAutomataState {
221 xmlRegStateType type;
222 xmlRegMarkedType mark;
223 xmlRegMarkedType markd;
224 xmlRegMarkedType reached;
225 int no;
226 int maxTrans;
227 int nbTrans;
228 xmlRegTrans *trans;
229 /* knowing states pointing to us can speed things up */
230 int maxTransTo;
231 int nbTransTo;
232 int *transTo;
233};
234
235typedef struct _xmlAutomata xmlRegParserCtxt;
236typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
237
238#define AM_AUTOMATA_RNG 1
239
240struct _xmlAutomata {
242 xmlChar *cur;
243
244 int error;
245 int neg;
246
247 xmlRegStatePtr start;
248 xmlRegStatePtr end;
249 xmlRegStatePtr state;
250
251 xmlRegAtomPtr atom;
252
253 int maxAtoms;
254 int nbAtoms;
255 xmlRegAtomPtr *atoms;
256
257 int maxStates;
258 int nbStates;
259 xmlRegStatePtr *states;
260
261 int maxCounters;
262 int nbCounters;
263 xmlRegCounter *counters;
264
265 int determinist;
266 int negs;
267 int flags;
268
269 int depth;
270};
271
272struct _xmlRegexp {
274 int nbStates;
275 xmlRegStatePtr *states;
276 int nbAtoms;
277 xmlRegAtomPtr *atoms;
278 int nbCounters;
279 xmlRegCounter *counters;
280 int determinist;
281 int flags;
282 /*
283 * That's the compact form for determinists automatas
284 */
285 int nbstates;
286 int *compact;
287 void **transdata;
288 int nbstrings;
289 xmlChar **stringMap;
290};
291
292typedef struct _xmlRegExecRollback xmlRegExecRollback;
293typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
294
295struct _xmlRegExecRollback {
296 xmlRegStatePtr state;/* the current state */
297 int index; /* the index in the input stack */
298 int nextbranch; /* the next transition to explore in that state */
299 int *counts; /* save the automata state if it has some */
300};
301
302typedef struct _xmlRegInputToken xmlRegInputToken;
303typedef xmlRegInputToken *xmlRegInputTokenPtr;
304
305struct _xmlRegInputToken {
306 xmlChar *value;
307 void *data;
308};
309
310struct _xmlRegExecCtxt {
311 int status; /* execution status != 0 indicate an error */
312 int determinist; /* did we find an indeterministic behaviour */
313 xmlRegexpPtr comp; /* the compiled regexp */
314 xmlRegExecCallbacks callback;
315 void *data;
316
317 xmlRegStatePtr state;/* the current state */
318 int transno; /* the current transition on that state */
319 int transcount; /* the number of chars in char counted transitions */
320
321 /*
322 * A stack of rollback states
323 */
324 int maxRollbacks;
325 int nbRollbacks;
326 xmlRegExecRollback *rollbacks;
327
328 /*
329 * The state of the automata if any
330 */
331 int *counts;
332
333 /*
334 * The input stack
335 */
336 int inputStackMax;
337 int inputStackNr;
338 int index;
339 int *charStack;
340 const xmlChar *inputString; /* when operating on characters */
341 xmlRegInputTokenPtr inputStack;/* when operating on strings */
342
343 /*
344 * error handling
345 */
346 int errStateNo; /* the error state number */
347 xmlRegStatePtr errState; /* the error state */
348 xmlChar *errString; /* the string raising the error */
349 int *errCounts; /* counters at the error state */
350 int nbPush;
351};
352
353#define REGEXP_ALL_COUNTER 0x123456
354#define REGEXP_ALL_LAX_COUNTER 0x123457
355
356static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
357static void xmlRegFreeState(xmlRegStatePtr state);
358static void xmlRegFreeAtom(xmlRegAtomPtr atom);
359static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
360static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
361static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
362 int neg, int start, int end, const xmlChar *blockName);
363
364void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
365
366/************************************************************************
367 * *
368 * Regexp memory error handler *
369 * *
370 ************************************************************************/
377static void
378xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
379{
380 const char *regexp = NULL;
381 if (ctxt != NULL) {
382 regexp = (const char *) ctxt->string;
383 ctxt->error = XML_ERR_NO_MEMORY;
384 }
385 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
387 regexp, NULL, 0, 0,
388 "Memory allocation failed : %s\n", extra);
389}
390
397static void
398xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
399{
400 const char *regexp = NULL;
401 int idx = 0;
402
403 if (ctxt != NULL) {
404 regexp = (const char *) ctxt->string;
405 idx = ctxt->cur - ctxt->string;
406 ctxt->error = XML_REGEXP_COMPILE_ERROR;
407 }
408 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
410 regexp, NULL, idx, 0,
411 "failed to compile: %s\n", extra);
412}
413
414/************************************************************************
415 * *
416 * Allocation/Deallocation *
417 * *
418 ************************************************************************/
419
420static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
421
432static void*
433xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
434 size_t totalSize;
435 void *ret;
436
437 /* Check for overflow */
438 if (dim1 > SIZE_MAX / dim2 / elemSize)
439 return (NULL);
440 totalSize = dim1 * dim2 * elemSize;
441 ret = xmlMalloc(totalSize);
442 if (ret != NULL)
443 memset(ret, 0, totalSize);
444 return (ret);
445}
446
455static xmlRegexpPtr
456xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
457 xmlRegexpPtr ret;
458
459 ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
460 if (ret == NULL) {
461 xmlRegexpErrMemory(ctxt, "compiling regexp");
462 return(NULL);
463 }
464 memset(ret, 0, sizeof(xmlRegexp));
465 ret->string = ctxt->string;
466 ret->nbStates = ctxt->nbStates;
467 ret->states = ctxt->states;
468 ret->nbAtoms = ctxt->nbAtoms;
469 ret->atoms = ctxt->atoms;
470 ret->nbCounters = ctxt->nbCounters;
471 ret->counters = ctxt->counters;
472 ret->determinist = ctxt->determinist;
473 ret->flags = ctxt->flags;
474 if (ret->determinist == -1) {
475 xmlRegexpIsDeterminist(ret);
476 }
477
478 if ((ret->determinist != 0) &&
479 (ret->nbCounters == 0) &&
480 (ctxt->negs == 0) &&
481 (ret->atoms != NULL) &&
482 (ret->atoms[0] != NULL) &&
483 (ret->atoms[0]->type == XML_REGEXP_STRING)) {
484 int i, j, nbstates = 0, nbatoms = 0;
485 int *stateRemap;
486 int *stringRemap;
487 int *transitions;
488 void **transdata;
489 xmlChar **stringMap;
490 xmlChar *value;
491
492 /*
493 * Switch to a compact representation
494 * 1/ counting the effective number of states left
495 * 2/ counting the unique number of atoms, and check that
496 * they are all of the string type
497 * 3/ build a table state x atom for the transitions
498 */
499
500 stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
501 if (stateRemap == NULL) {
502 xmlRegexpErrMemory(ctxt, "compiling regexp");
503 xmlFree(ret);
504 return(NULL);
505 }
506 for (i = 0;i < ret->nbStates;i++) {
507 if (ret->states[i] != NULL) {
508 stateRemap[i] = nbstates;
509 nbstates++;
510 } else {
511 stateRemap[i] = -1;
512 }
513 }
514#ifdef DEBUG_COMPACTION
515 printf("Final: %d states\n", nbstates);
516#endif
517 stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
518 if (stringMap == NULL) {
519 xmlRegexpErrMemory(ctxt, "compiling regexp");
520 xmlFree(stateRemap);
521 xmlFree(ret);
522 return(NULL);
523 }
524 stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
525 if (stringRemap == NULL) {
526 xmlRegexpErrMemory(ctxt, "compiling regexp");
527 xmlFree(stringMap);
528 xmlFree(stateRemap);
529 xmlFree(ret);
530 return(NULL);
531 }
532 for (i = 0;i < ret->nbAtoms;i++) {
533 if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
534 (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
535 value = ret->atoms[i]->valuep;
536 for (j = 0;j < nbatoms;j++) {
537 if (xmlStrEqual(stringMap[j], value)) {
538 stringRemap[i] = j;
539 break;
540 }
541 }
542 if (j >= nbatoms) {
543 stringRemap[i] = nbatoms;
544 stringMap[nbatoms] = xmlStrdup(value);
545 if (stringMap[nbatoms] == NULL) {
546 for (i = 0;i < nbatoms;i++)
547 xmlFree(stringMap[i]);
548 xmlFree(stringRemap);
549 xmlFree(stringMap);
550 xmlFree(stateRemap);
551 xmlFree(ret);
552 return(NULL);
553 }
554 nbatoms++;
555 }
556 } else {
557 xmlFree(stateRemap);
558 xmlFree(stringRemap);
559 for (i = 0;i < nbatoms;i++)
560 xmlFree(stringMap[i]);
561 xmlFree(stringMap);
562 xmlFree(ret);
563 return(NULL);
564 }
565 }
566#ifdef DEBUG_COMPACTION
567 printf("Final: %d atoms\n", nbatoms);
568#endif
569 transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
570 sizeof(int));
571 if (transitions == NULL) {
572 xmlFree(stateRemap);
573 xmlFree(stringRemap);
574 for (i = 0;i < nbatoms;i++)
575 xmlFree(stringMap[i]);
576 xmlFree(stringMap);
577 xmlFree(ret);
578 return(NULL);
579 }
580
581 /*
582 * Allocate the transition table. The first entry for each
583 * state corresponds to the state type.
584 */
585 transdata = NULL;
586
587 for (i = 0;i < ret->nbStates;i++) {
588 int stateno, atomno, targetno, prev;
589 xmlRegStatePtr state;
590 xmlRegTransPtr trans;
591
592 stateno = stateRemap[i];
593 if (stateno == -1)
594 continue;
595 state = ret->states[i];
596
597 transitions[stateno * (nbatoms + 1)] = state->type;
598
599 for (j = 0;j < state->nbTrans;j++) {
600 trans = &(state->trans[j]);
601 if ((trans->to == -1) || (trans->atom == NULL))
602 continue;
603 atomno = stringRemap[trans->atom->no];
604 if ((trans->atom->data != NULL) && (transdata == NULL)) {
605 transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
606 sizeof(void *));
607 if (transdata == NULL) {
608 xmlRegexpErrMemory(ctxt, "compiling regexp");
609 break;
610 }
611 }
612 targetno = stateRemap[trans->to];
613 /*
614 * if the same atom can generate transitions to 2 different
615 * states then it means the automata is not deterministic and
616 * the compact form can't be used !
617 */
618 prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
619 if (prev != 0) {
620 if (prev != targetno + 1) {
621 ret->determinist = 0;
622#ifdef DEBUG_COMPACTION
623 printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
624 i, j, trans->atom->no, trans->to, atomno, targetno);
625 printf(" previous to is %d\n", prev);
626#endif
627 if (transdata != NULL)
628 xmlFree(transdata);
629 xmlFree(transitions);
630 xmlFree(stateRemap);
631 xmlFree(stringRemap);
632 for (i = 0;i < nbatoms;i++)
633 xmlFree(stringMap[i]);
634 xmlFree(stringMap);
635 goto not_determ;
636 }
637 } else {
638#if 0
639 printf("State %d trans %d: atom %d to %d : %d to %d\n",
640 i, j, trans->atom->no, trans->to, atomno, targetno);
641#endif
642 transitions[stateno * (nbatoms + 1) + atomno + 1] =
643 targetno + 1; /* to avoid 0 */
644 if (transdata != NULL)
645 transdata[stateno * nbatoms + atomno] =
646 trans->atom->data;
647 }
648 }
649 }
650 ret->determinist = 1;
651#ifdef DEBUG_COMPACTION
652 /*
653 * Debug
654 */
655 for (i = 0;i < nbstates;i++) {
656 for (j = 0;j < nbatoms + 1;j++) {
657 printf("%02d ", transitions[i * (nbatoms + 1) + j]);
658 }
659 printf("\n");
660 }
661 printf("\n");
662#endif
663 /*
664 * Cleanup of the old data
665 */
666 if (ret->states != NULL) {
667 for (i = 0;i < ret->nbStates;i++)
668 xmlRegFreeState(ret->states[i]);
669 xmlFree(ret->states);
670 }
671 ret->states = NULL;
672 ret->nbStates = 0;
673 if (ret->atoms != NULL) {
674 for (i = 0;i < ret->nbAtoms;i++)
675 xmlRegFreeAtom(ret->atoms[i]);
676 xmlFree(ret->atoms);
677 }
678 ret->atoms = NULL;
679 ret->nbAtoms = 0;
680
681 ret->compact = transitions;
682 ret->transdata = transdata;
683 ret->stringMap = stringMap;
684 ret->nbstrings = nbatoms;
685 ret->nbstates = nbstates;
686 xmlFree(stateRemap);
687 xmlFree(stringRemap);
688 }
689not_determ:
690 ctxt->string = NULL;
691 ctxt->nbStates = 0;
692 ctxt->states = NULL;
693 ctxt->nbAtoms = 0;
694 ctxt->atoms = NULL;
695 ctxt->nbCounters = 0;
696 ctxt->counters = NULL;
697 return(ret);
698}
699
708static xmlRegParserCtxtPtr
709xmlRegNewParserCtxt(const xmlChar *string) {
710 xmlRegParserCtxtPtr ret;
711
712 ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
713 if (ret == NULL)
714 return(NULL);
715 memset(ret, 0, sizeof(xmlRegParserCtxt));
716 if (string != NULL)
717 ret->string = xmlStrdup(string);
718 ret->cur = ret->string;
719 ret->neg = 0;
720 ret->negs = 0;
721 ret->error = 0;
722 ret->determinist = -1;
723 return(ret);
724}
725
738static xmlRegRangePtr
739xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
740 int neg, xmlRegAtomType type, int start, int end) {
741 xmlRegRangePtr ret;
742
743 ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
744 if (ret == NULL) {
745 xmlRegexpErrMemory(ctxt, "allocating range");
746 return(NULL);
747 }
748 ret->neg = neg;
749 ret->type = type;
750 ret->start = start;
751 ret->end = end;
752 return(ret);
753}
754
761static void
762xmlRegFreeRange(xmlRegRangePtr range) {
763 if (range == NULL)
764 return;
765
766 if (range->blockName != NULL)
767 xmlFree(range->blockName);
768 xmlFree(range);
769}
770
779static xmlRegRangePtr
780xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
781 xmlRegRangePtr ret;
782
783 if (range == NULL)
784 return(NULL);
785
786 ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
787 range->end);
788 if (ret == NULL)
789 return(NULL);
790 if (range->blockName != NULL) {
791 ret->blockName = xmlStrdup(range->blockName);
792 if (ret->blockName == NULL) {
793 xmlRegexpErrMemory(ctxt, "allocating range");
794 xmlRegFreeRange(ret);
795 return(NULL);
796 }
797 }
798 return(ret);
799}
800
810static xmlRegAtomPtr
811xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
812 xmlRegAtomPtr ret;
813
814 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
815 if (ret == NULL) {
816 xmlRegexpErrMemory(ctxt, "allocating atom");
817 return(NULL);
818 }
819 memset(ret, 0, sizeof(xmlRegAtom));
820 ret->type = type;
821 ret->quant = XML_REGEXP_QUANT_ONCE;
822 ret->min = 0;
823 ret->max = 0;
824 return(ret);
825}
826
833static void
834xmlRegFreeAtom(xmlRegAtomPtr atom) {
835 int i;
836
837 if (atom == NULL)
838 return;
839
840 for (i = 0;i < atom->nbRanges;i++)
841 xmlRegFreeRange(atom->ranges[i]);
842 if (atom->ranges != NULL)
843 xmlFree(atom->ranges);
844 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
845 xmlFree(atom->valuep);
846 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
847 xmlFree(atom->valuep2);
848 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
849 xmlFree(atom->valuep);
850 xmlFree(atom);
851}
852
862static xmlRegAtomPtr
863xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
864 xmlRegAtomPtr ret;
865
866 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
867 if (ret == NULL) {
868 xmlRegexpErrMemory(ctxt, "copying atom");
869 return(NULL);
870 }
871 memset(ret, 0, sizeof(xmlRegAtom));
872 ret->type = atom->type;
873 ret->quant = atom->quant;
874 ret->min = atom->min;
875 ret->max = atom->max;
876 if (atom->nbRanges > 0) {
877 int i;
878
879 ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
880 atom->nbRanges);
881 if (ret->ranges == NULL) {
882 xmlRegexpErrMemory(ctxt, "copying atom");
883 goto error;
884 }
885 for (i = 0;i < atom->nbRanges;i++) {
886 ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
887 if (ret->ranges[i] == NULL)
888 goto error;
889 ret->nbRanges = i + 1;
890 }
891 }
892 return(ret);
893
894error:
895 xmlRegFreeAtom(ret);
896 return(NULL);
897}
898
899static xmlRegStatePtr
900xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
901 xmlRegStatePtr ret;
902
903 ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
904 if (ret == NULL) {
905 xmlRegexpErrMemory(ctxt, "allocating state");
906 return(NULL);
907 }
908 memset(ret, 0, sizeof(xmlRegState));
909 ret->type = XML_REGEXP_TRANS_STATE;
910 ret->mark = XML_REGEXP_MARK_NORMAL;
911 return(ret);
912}
913
920static void
921xmlRegFreeState(xmlRegStatePtr state) {
922 if (state == NULL)
923 return;
924
925 if (state->trans != NULL)
926 xmlFree(state->trans);
927 if (state->transTo != NULL)
928 xmlFree(state->transTo);
929 xmlFree(state);
930}
931
938static void
939xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
940 int i;
941 if (ctxt == NULL)
942 return;
943
944 if (ctxt->string != NULL)
945 xmlFree(ctxt->string);
946 if (ctxt->states != NULL) {
947 for (i = 0;i < ctxt->nbStates;i++)
948 xmlRegFreeState(ctxt->states[i]);
949 xmlFree(ctxt->states);
950 }
951 if (ctxt->atoms != NULL) {
952 for (i = 0;i < ctxt->nbAtoms;i++)
953 xmlRegFreeAtom(ctxt->atoms[i]);
954 xmlFree(ctxt->atoms);
955 }
956 if (ctxt->counters != NULL)
957 xmlFree(ctxt->counters);
958 xmlFree(ctxt);
959}
960
961/************************************************************************
962 * *
963 * Display of Data structures *
964 * *
965 ************************************************************************/
966
967static void
968xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
969 switch (type) {
970 case XML_REGEXP_EPSILON:
971 fprintf(output, "epsilon "); break;
972 case XML_REGEXP_CHARVAL:
973 fprintf(output, "charval "); break;
974 case XML_REGEXP_RANGES:
975 fprintf(output, "ranges "); break;
976 case XML_REGEXP_SUBREG:
977 fprintf(output, "subexpr "); break;
978 case XML_REGEXP_STRING:
979 fprintf(output, "string "); break;
980 case XML_REGEXP_ANYCHAR:
981 fprintf(output, "anychar "); break;
982 case XML_REGEXP_ANYSPACE:
983 fprintf(output, "anyspace "); break;
984 case XML_REGEXP_NOTSPACE:
985 fprintf(output, "notspace "); break;
986 case XML_REGEXP_INITNAME:
987 fprintf(output, "initname "); break;
988 case XML_REGEXP_NOTINITNAME:
989 fprintf(output, "notinitname "); break;
990 case XML_REGEXP_NAMECHAR:
991 fprintf(output, "namechar "); break;
992 case XML_REGEXP_NOTNAMECHAR:
993 fprintf(output, "notnamechar "); break;
994 case XML_REGEXP_DECIMAL:
995 fprintf(output, "decimal "); break;
996 case XML_REGEXP_NOTDECIMAL:
997 fprintf(output, "notdecimal "); break;
998 case XML_REGEXP_REALCHAR:
999 fprintf(output, "realchar "); break;
1000 case XML_REGEXP_NOTREALCHAR:
1001 fprintf(output, "notrealchar "); break;
1002 case XML_REGEXP_LETTER:
1003 fprintf(output, "LETTER "); break;
1004 case XML_REGEXP_LETTER_UPPERCASE:
1005 fprintf(output, "LETTER_UPPERCASE "); break;
1006 case XML_REGEXP_LETTER_LOWERCASE:
1007 fprintf(output, "LETTER_LOWERCASE "); break;
1008 case XML_REGEXP_LETTER_TITLECASE:
1009 fprintf(output, "LETTER_TITLECASE "); break;
1010 case XML_REGEXP_LETTER_MODIFIER:
1011 fprintf(output, "LETTER_MODIFIER "); break;
1012 case XML_REGEXP_LETTER_OTHERS:
1013 fprintf(output, "LETTER_OTHERS "); break;
1014 case XML_REGEXP_MARK:
1015 fprintf(output, "MARK "); break;
1016 case XML_REGEXP_MARK_NONSPACING:
1017 fprintf(output, "MARK_NONSPACING "); break;
1018 case XML_REGEXP_MARK_SPACECOMBINING:
1019 fprintf(output, "MARK_SPACECOMBINING "); break;
1020 case XML_REGEXP_MARK_ENCLOSING:
1021 fprintf(output, "MARK_ENCLOSING "); break;
1022 case XML_REGEXP_NUMBER:
1023 fprintf(output, "NUMBER "); break;
1024 case XML_REGEXP_NUMBER_DECIMAL:
1025 fprintf(output, "NUMBER_DECIMAL "); break;
1026 case XML_REGEXP_NUMBER_LETTER:
1027 fprintf(output, "NUMBER_LETTER "); break;
1028 case XML_REGEXP_NUMBER_OTHERS:
1029 fprintf(output, "NUMBER_OTHERS "); break;
1030 case XML_REGEXP_PUNCT:
1031 fprintf(output, "PUNCT "); break;
1032 case XML_REGEXP_PUNCT_CONNECTOR:
1033 fprintf(output, "PUNCT_CONNECTOR "); break;
1034 case XML_REGEXP_PUNCT_DASH:
1035 fprintf(output, "PUNCT_DASH "); break;
1036 case XML_REGEXP_PUNCT_OPEN:
1037 fprintf(output, "PUNCT_OPEN "); break;
1038 case XML_REGEXP_PUNCT_CLOSE:
1039 fprintf(output, "PUNCT_CLOSE "); break;
1040 case XML_REGEXP_PUNCT_INITQUOTE:
1041 fprintf(output, "PUNCT_INITQUOTE "); break;
1042 case XML_REGEXP_PUNCT_FINQUOTE:
1043 fprintf(output, "PUNCT_FINQUOTE "); break;
1044 case XML_REGEXP_PUNCT_OTHERS:
1045 fprintf(output, "PUNCT_OTHERS "); break;
1046 case XML_REGEXP_SEPAR:
1047 fprintf(output, "SEPAR "); break;
1048 case XML_REGEXP_SEPAR_SPACE:
1049 fprintf(output, "SEPAR_SPACE "); break;
1050 case XML_REGEXP_SEPAR_LINE:
1051 fprintf(output, "SEPAR_LINE "); break;
1052 case XML_REGEXP_SEPAR_PARA:
1053 fprintf(output, "SEPAR_PARA "); break;
1054 case XML_REGEXP_SYMBOL:
1055 fprintf(output, "SYMBOL "); break;
1056 case XML_REGEXP_SYMBOL_MATH:
1057 fprintf(output, "SYMBOL_MATH "); break;
1058 case XML_REGEXP_SYMBOL_CURRENCY:
1059 fprintf(output, "SYMBOL_CURRENCY "); break;
1060 case XML_REGEXP_SYMBOL_MODIFIER:
1061 fprintf(output, "SYMBOL_MODIFIER "); break;
1062 case XML_REGEXP_SYMBOL_OTHERS:
1063 fprintf(output, "SYMBOL_OTHERS "); break;
1064 case XML_REGEXP_OTHER:
1065 fprintf(output, "OTHER "); break;
1066 case XML_REGEXP_OTHER_CONTROL:
1067 fprintf(output, "OTHER_CONTROL "); break;
1068 case XML_REGEXP_OTHER_FORMAT:
1069 fprintf(output, "OTHER_FORMAT "); break;
1070 case XML_REGEXP_OTHER_PRIVATE:
1071 fprintf(output, "OTHER_PRIVATE "); break;
1072 case XML_REGEXP_OTHER_NA:
1073 fprintf(output, "OTHER_NA "); break;
1074 case XML_REGEXP_BLOCK_NAME:
1075 fprintf(output, "BLOCK "); break;
1076 }
1077}
1078
1079static void
1080xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1081 switch (type) {
1082 case XML_REGEXP_QUANT_EPSILON:
1083 fprintf(output, "epsilon "); break;
1084 case XML_REGEXP_QUANT_ONCE:
1085 fprintf(output, "once "); break;
1086 case XML_REGEXP_QUANT_OPT:
1087 fprintf(output, "? "); break;
1088 case XML_REGEXP_QUANT_MULT:
1089 fprintf(output, "* "); break;
1090 case XML_REGEXP_QUANT_PLUS:
1091 fprintf(output, "+ "); break;
1092 case XML_REGEXP_QUANT_RANGE:
1093 fprintf(output, "range "); break;
1094 case XML_REGEXP_QUANT_ONCEONLY:
1095 fprintf(output, "onceonly "); break;
1096 case XML_REGEXP_QUANT_ALL:
1097 fprintf(output, "all "); break;
1098 }
1099}
1100static void
1101xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1102 fprintf(output, " range: ");
1103 if (range->neg)
1104 fprintf(output, "negative ");
1105 xmlRegPrintAtomType(output, range->type);
1106 fprintf(output, "%c - %c\n", range->start, range->end);
1107}
1108
1109static void
1110xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1111 fprintf(output, " atom: ");
1112 if (atom == NULL) {
1113 fprintf(output, "NULL\n");
1114 return;
1115 }
1116 if (atom->neg)
1117 fprintf(output, "not ");
1118 xmlRegPrintAtomType(output, atom->type);
1119 xmlRegPrintQuantType(output, atom->quant);
1120 if (atom->quant == XML_REGEXP_QUANT_RANGE)
1121 fprintf(output, "%d-%d ", atom->min, atom->max);
1122 if (atom->type == XML_REGEXP_STRING)
1123 fprintf(output, "'%s' ", (char *) atom->valuep);
1124 if (atom->type == XML_REGEXP_CHARVAL)
1125 fprintf(output, "char %c\n", atom->codepoint);
1126 else if (atom->type == XML_REGEXP_RANGES) {
1127 int i;
1128 fprintf(output, "%d entries\n", atom->nbRanges);
1129 for (i = 0; i < atom->nbRanges;i++)
1130 xmlRegPrintRange(output, atom->ranges[i]);
1131 } else if (atom->type == XML_REGEXP_SUBREG) {
1132 fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1133 } else {
1134 fprintf(output, "\n");
1135 }
1136}
1137
1138static void
1139xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1140 fprintf(output, " trans: ");
1141 if (trans == NULL) {
1142 fprintf(output, "NULL\n");
1143 return;
1144 }
1145 if (trans->to < 0) {
1146 fprintf(output, "removed\n");
1147 return;
1148 }
1149 if (trans->nd != 0) {
1150 if (trans->nd == 2)
1151 fprintf(output, "last not determinist, ");
1152 else
1153 fprintf(output, "not determinist, ");
1154 }
1155 if (trans->counter >= 0) {
1156 fprintf(output, "counted %d, ", trans->counter);
1157 }
1158 if (trans->count == REGEXP_ALL_COUNTER) {
1159 fprintf(output, "all transition, ");
1160 } else if (trans->count >= 0) {
1161 fprintf(output, "count based %d, ", trans->count);
1162 }
1163 if (trans->atom == NULL) {
1164 fprintf(output, "epsilon to %d\n", trans->to);
1165 return;
1166 }
1167 if (trans->atom->type == XML_REGEXP_CHARVAL)
1168 fprintf(output, "char %c ", trans->atom->codepoint);
1169 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1170}
1171
1172static void
1173xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1174 int i;
1175
1176 fprintf(output, " state: ");
1177 if (state == NULL) {
1178 fprintf(output, "NULL\n");
1179 return;
1180 }
1181 if (state->type == XML_REGEXP_START_STATE)
1182 fprintf(output, "START ");
1183 if (state->type == XML_REGEXP_FINAL_STATE)
1184 fprintf(output, "FINAL ");
1185
1186 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1187 for (i = 0;i < state->nbTrans; i++) {
1188 xmlRegPrintTrans(output, &(state->trans[i]));
1189 }
1190}
1191
1192#ifdef DEBUG_REGEXP_GRAPH
1193static void
1194xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1195 int i;
1196
1197 fprintf(output, " ctxt: ");
1198 if (ctxt == NULL) {
1199 fprintf(output, "NULL\n");
1200 return;
1201 }
1202 fprintf(output, "'%s' ", ctxt->string);
1203 if (ctxt->error)
1204 fprintf(output, "error ");
1205 if (ctxt->neg)
1206 fprintf(output, "neg ");
1207 fprintf(output, "\n");
1208 fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1209 for (i = 0;i < ctxt->nbAtoms; i++) {
1210 fprintf(output, " %02d ", i);
1211 xmlRegPrintAtom(output, ctxt->atoms[i]);
1212 }
1213 if (ctxt->atom != NULL) {
1214 fprintf(output, "current atom:\n");
1215 xmlRegPrintAtom(output, ctxt->atom);
1216 }
1217 fprintf(output, "%d states:", ctxt->nbStates);
1218 if (ctxt->start != NULL)
1219 fprintf(output, " start: %d", ctxt->start->no);
1220 if (ctxt->end != NULL)
1221 fprintf(output, " end: %d", ctxt->end->no);
1222 fprintf(output, "\n");
1223 for (i = 0;i < ctxt->nbStates; i++) {
1224 xmlRegPrintState(output, ctxt->states[i]);
1225 }
1226 fprintf(output, "%d counters:\n", ctxt->nbCounters);
1227 for (i = 0;i < ctxt->nbCounters; i++) {
1228 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1229 ctxt->counters[i].max);
1230 }
1231}
1232#endif
1233
1234/************************************************************************
1235 * *
1236 * Finite Automata structures manipulations *
1237 * *
1238 ************************************************************************/
1239
1240static void
1241xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1242 int neg, xmlRegAtomType type, int start, int end,
1243 xmlChar *blockName) {
1244 xmlRegRangePtr range;
1245
1246 if (atom == NULL) {
1247 ERROR("add range: atom is NULL");
1248 return;
1249 }
1250 if (atom->type != XML_REGEXP_RANGES) {
1251 ERROR("add range: atom is not ranges");
1252 return;
1253 }
1254 if (atom->maxRanges == 0) {
1255 atom->maxRanges = 4;
1256 atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1257 sizeof(xmlRegRangePtr));
1258 if (atom->ranges == NULL) {
1259 xmlRegexpErrMemory(ctxt, "adding ranges");
1260 atom->maxRanges = 0;
1261 return;
1262 }
1263 } else if (atom->nbRanges >= atom->maxRanges) {
1264 xmlRegRangePtr *tmp;
1265 atom->maxRanges *= 2;
1266 tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1267 sizeof(xmlRegRangePtr));
1268 if (tmp == NULL) {
1269 xmlRegexpErrMemory(ctxt, "adding ranges");
1270 atom->maxRanges /= 2;
1271 return;
1272 }
1273 atom->ranges = tmp;
1274 }
1275 range = xmlRegNewRange(ctxt, neg, type, start, end);
1276 if (range == NULL)
1277 return;
1278 range->blockName = blockName;
1279 atom->ranges[atom->nbRanges++] = range;
1280
1281}
1282
1283static int
1284xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1285 if (ctxt->maxCounters == 0) {
1286 ctxt->maxCounters = 4;
1287 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1288 sizeof(xmlRegCounter));
1289 if (ctxt->counters == NULL) {
1290 xmlRegexpErrMemory(ctxt, "allocating counter");
1291 ctxt->maxCounters = 0;
1292 return(-1);
1293 }
1294 } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1295 xmlRegCounter *tmp;
1296 ctxt->maxCounters *= 2;
1297 tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1298 sizeof(xmlRegCounter));
1299 if (tmp == NULL) {
1300 xmlRegexpErrMemory(ctxt, "allocating counter");
1301 ctxt->maxCounters /= 2;
1302 return(-1);
1303 }
1304 ctxt->counters = tmp;
1305 }
1306 ctxt->counters[ctxt->nbCounters].min = -1;
1307 ctxt->counters[ctxt->nbCounters].max = -1;
1308 return(ctxt->nbCounters++);
1309}
1310
1311static int
1312xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1313 if (atom == NULL) {
1314 ERROR("atom push: atom is NULL");
1315 return(-1);
1316 }
1317 if (ctxt->maxAtoms == 0) {
1318 ctxt->maxAtoms = 4;
1319 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1320 sizeof(xmlRegAtomPtr));
1321 if (ctxt->atoms == NULL) {
1322 xmlRegexpErrMemory(ctxt, "pushing atom");
1323 ctxt->maxAtoms = 0;
1324 return(-1);
1325 }
1326 } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1327 xmlRegAtomPtr *tmp;
1328 ctxt->maxAtoms *= 2;
1329 tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1330 sizeof(xmlRegAtomPtr));
1331 if (tmp == NULL) {
1332 xmlRegexpErrMemory(ctxt, "allocating counter");
1333 ctxt->maxAtoms /= 2;
1334 return(-1);
1335 }
1336 ctxt->atoms = tmp;
1337 }
1338 atom->no = ctxt->nbAtoms;
1339 ctxt->atoms[ctxt->nbAtoms++] = atom;
1340 return(0);
1341}
1342
1343static void
1344xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1345 int from) {
1346 if (target->maxTransTo == 0) {
1347 target->maxTransTo = 8;
1348 target->transTo = (int *) xmlMalloc(target->maxTransTo *
1349 sizeof(int));
1350 if (target->transTo == NULL) {
1351 xmlRegexpErrMemory(ctxt, "adding transition");
1352 target->maxTransTo = 0;
1353 return;
1354 }
1355 } else if (target->nbTransTo >= target->maxTransTo) {
1356 int *tmp;
1357 target->maxTransTo *= 2;
1358 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1359 sizeof(int));
1360 if (tmp == NULL) {
1361 xmlRegexpErrMemory(ctxt, "adding transition");
1362 target->maxTransTo /= 2;
1363 return;
1364 }
1365 target->transTo = tmp;
1366 }
1367 target->transTo[target->nbTransTo] = from;
1368 target->nbTransTo++;
1369}
1370
1371static void
1372xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1373 xmlRegAtomPtr atom, xmlRegStatePtr target,
1374 int counter, int count) {
1375
1376 int nrtrans;
1377
1378 if (state == NULL) {
1379 ERROR("add state: state is NULL");
1380 return;
1381 }
1382 if (target == NULL) {
1383 ERROR("add state: target is NULL");
1384 return;
1385 }
1386 /*
1387 * Other routines follow the philosophy 'When in doubt, add a transition'
1388 * so we check here whether such a transition is already present and, if
1389 * so, silently ignore this request.
1390 */
1391
1392 for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1393 xmlRegTransPtr trans = &(state->trans[nrtrans]);
1394 if ((trans->atom == atom) &&
1395 (trans->to == target->no) &&
1396 (trans->counter == counter) &&
1397 (trans->count == count)) {
1398#ifdef DEBUG_REGEXP_GRAPH
1399 printf("Ignoring duplicate transition from %d to %d\n",
1400 state->no, target->no);
1401#endif
1402 return;
1403 }
1404 }
1405
1406 if (state->maxTrans == 0) {
1407 state->maxTrans = 8;
1408 state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1409 sizeof(xmlRegTrans));
1410 if (state->trans == NULL) {
1411 xmlRegexpErrMemory(ctxt, "adding transition");
1412 state->maxTrans = 0;
1413 return;
1414 }
1415 } else if (state->nbTrans >= state->maxTrans) {
1416 xmlRegTrans *tmp;
1417 state->maxTrans *= 2;
1418 tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1419 sizeof(xmlRegTrans));
1420 if (tmp == NULL) {
1421 xmlRegexpErrMemory(ctxt, "adding transition");
1422 state->maxTrans /= 2;
1423 return;
1424 }
1425 state->trans = tmp;
1426 }
1427#ifdef DEBUG_REGEXP_GRAPH
1428 printf("Add trans from %d to %d ", state->no, target->no);
1429 if (count == REGEXP_ALL_COUNTER)
1430 printf("all transition\n");
1431 else if (count >= 0)
1432 printf("count based %d\n", count);
1433 else if (counter >= 0)
1434 printf("counted %d\n", counter);
1435 else if (atom == NULL)
1436 printf("epsilon transition\n");
1437 else if (atom != NULL)
1438 xmlRegPrintAtom(stdout, atom);
1439#endif
1440
1441 state->trans[state->nbTrans].atom = atom;
1442 state->trans[state->nbTrans].to = target->no;
1443 state->trans[state->nbTrans].counter = counter;
1444 state->trans[state->nbTrans].count = count;
1445 state->trans[state->nbTrans].nd = 0;
1446 state->nbTrans++;
1447 xmlRegStateAddTransTo(ctxt, target, state->no);
1448}
1449
1450static int
1451xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1452 if (state == NULL) return(-1);
1453 if (ctxt->maxStates == 0) {
1454 ctxt->maxStates = 4;
1455 ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1456 sizeof(xmlRegStatePtr));
1457 if (ctxt->states == NULL) {
1458 xmlRegexpErrMemory(ctxt, "adding state");
1459 ctxt->maxStates = 0;
1460 return(-1);
1461 }
1462 } else if (ctxt->nbStates >= ctxt->maxStates) {
1463 xmlRegStatePtr *tmp;
1464 ctxt->maxStates *= 2;
1465 tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1466 sizeof(xmlRegStatePtr));
1467 if (tmp == NULL) {
1468 xmlRegexpErrMemory(ctxt, "adding state");
1469 ctxt->maxStates /= 2;
1470 return(-1);
1471 }
1472 ctxt->states = tmp;
1473 }
1474 state->no = ctxt->nbStates;
1475 ctxt->states[ctxt->nbStates++] = state;
1476 return(0);
1477}
1478
1487static void
1488xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1489 xmlRegStatePtr from, xmlRegStatePtr to,
1490 int lax) {
1491 if (to == NULL) {
1492 to = xmlRegNewState(ctxt);
1493 xmlRegStatePush(ctxt, to);
1494 ctxt->state = to;
1495 }
1496 if (lax)
1497 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1498 else
1499 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1500}
1501
1509static void
1510xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1511 xmlRegStatePtr from, xmlRegStatePtr to) {
1512 if (to == NULL) {
1513 to = xmlRegNewState(ctxt);
1514 xmlRegStatePush(ctxt, to);
1515 ctxt->state = to;
1516 }
1517 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1518}
1519
1528static void
1529xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1530 xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1531 if (to == NULL) {
1532 to = xmlRegNewState(ctxt);
1533 xmlRegStatePush(ctxt, to);
1534 ctxt->state = to;
1535 }
1536 xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1537}
1538
1547static void
1548xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1549 xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1550 if (to == NULL) {
1551 to = xmlRegNewState(ctxt);
1552 xmlRegStatePush(ctxt, to);
1553 ctxt->state = to;
1554 }
1555 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1556}
1557
1567static int
1568xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1569 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1570 xmlRegStatePtr end;
1571 int nullable = 0;
1572
1573 if (atom == NULL) {
1574 ERROR("generate transition: atom == NULL");
1575 return(-1);
1576 }
1577 if (atom->type == XML_REGEXP_SUBREG) {
1578 /*
1579 * this is a subexpression handling one should not need to
1580 * create a new node except for XML_REGEXP_QUANT_RANGE.
1581 */
1582 if (xmlRegAtomPush(ctxt, atom) < 0) {
1583 return(-1);
1584 }
1585 if ((to != NULL) && (atom->stop != to) &&
1586 (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1587 /*
1588 * Generate an epsilon transition to link to the target
1589 */
1590 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1591#ifdef DV
1592 } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1593 (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1594 to = xmlRegNewState(ctxt);
1595 xmlRegStatePush(ctxt, to);
1596 ctxt->state = to;
1597 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1598#endif
1599 }
1600 switch (atom->quant) {
1601 case XML_REGEXP_QUANT_OPT:
1602 atom->quant = XML_REGEXP_QUANT_ONCE;
1603 /*
1604 * transition done to the state after end of atom.
1605 * 1. set transition from atom start to new state
1606 * 2. set transition from atom end to this state.
1607 */
1608 if (to == NULL) {
1609 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1610 xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1611 ctxt->state);
1612 } else {
1613 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1614 }
1615 break;
1616 case XML_REGEXP_QUANT_MULT:
1617 atom->quant = XML_REGEXP_QUANT_ONCE;
1618 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1619 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1620 break;
1621 case XML_REGEXP_QUANT_PLUS:
1622 atom->quant = XML_REGEXP_QUANT_ONCE;
1623 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1624 break;
1625 case XML_REGEXP_QUANT_RANGE: {
1626 int counter;
1627 xmlRegStatePtr inter, newstate;
1628
1629 /*
1630 * create the final state now if needed
1631 */
1632 if (to != NULL) {
1633 newstate = to;
1634 } else {
1635 newstate = xmlRegNewState(ctxt);
1636 xmlRegStatePush(ctxt, newstate);
1637 }
1638
1639 /*
1640 * The principle here is to use counted transition
1641 * to avoid explosion in the number of states in the
1642 * graph. This is clearly more complex but should not
1643 * be exploitable at runtime.
1644 */
1645 if ((atom->min == 0) && (atom->start0 == NULL)) {
1646 xmlRegAtomPtr copy;
1647 /*
1648 * duplicate a transition based on atom to count next
1649 * occurrences after 1. We cannot loop to atom->start
1650 * directly because we need an epsilon transition to
1651 * newstate.
1652 */
1653 /* ???? For some reason it seems we never reach that
1654 case, I suppose this got optimized out before when
1655 building the automata */
1656 copy = xmlRegCopyAtom(ctxt, atom);
1657 if (copy == NULL)
1658 return(-1);
1659 copy->quant = XML_REGEXP_QUANT_ONCE;
1660 copy->min = 0;
1661 copy->max = 0;
1662
1663 if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1664 < 0)
1665 return(-1);
1666 inter = ctxt->state;
1667 counter = xmlRegGetCounter(ctxt);
1668 ctxt->counters[counter].min = atom->min - 1;
1669 ctxt->counters[counter].max = atom->max - 1;
1670 /* count the number of times we see it again */
1671 xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1672 atom->stop, counter);
1673 /* allow a way out based on the count */
1674 xmlFAGenerateCountedTransition(ctxt, inter,
1675 newstate, counter);
1676 /* and also allow a direct exit for 0 */
1677 xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1678 newstate);
1679 } else {
1680 /*
1681 * either we need the atom at least once or there
1682 * is an atom->start0 allowing to easily plug the
1683 * epsilon transition.
1684 */
1685 counter = xmlRegGetCounter(ctxt);
1686 ctxt->counters[counter].min = atom->min - 1;
1687 ctxt->counters[counter].max = atom->max - 1;
1688 /* allow a way out based on the count */
1689 xmlFAGenerateCountedTransition(ctxt, atom->stop,
1690 newstate, counter);
1691 /* count the number of times we see it again */
1692 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1693 atom->start, counter);
1694 /* and if needed allow a direct exit for 0 */
1695 if (atom->min == 0)
1696 xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1697 newstate);
1698
1699 }
1700 atom->min = 0;
1701 atom->max = 0;
1702 atom->quant = XML_REGEXP_QUANT_ONCE;
1703 ctxt->state = newstate;
1704 }
1705 default:
1706 break;
1707 }
1708 return(0);
1709 }
1710 if ((atom->min == 0) && (atom->max == 0) &&
1711 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1712 /*
1713 * we can discard the atom and generate an epsilon transition instead
1714 */
1715 if (to == NULL) {
1716 to = xmlRegNewState(ctxt);
1717 if (to != NULL)
1718 xmlRegStatePush(ctxt, to);
1719 else {
1720 return(-1);
1721 }
1722 }
1723 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1724 ctxt->state = to;
1725 xmlRegFreeAtom(atom);
1726 return(0);
1727 }
1728 if (to == NULL) {
1729 to = xmlRegNewState(ctxt);
1730 if (to != NULL)
1731 xmlRegStatePush(ctxt, to);
1732 else {
1733 return(-1);
1734 }
1735 }
1736 end = to;
1737 if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1738 (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1739 /*
1740 * Do not pollute the target state by adding transitions from
1741 * it as it is likely to be the shared target of multiple branches.
1742 * So isolate with an epsilon transition.
1743 */
1744 xmlRegStatePtr tmp;
1745
1746 tmp = xmlRegNewState(ctxt);
1747 if (tmp != NULL)
1748 xmlRegStatePush(ctxt, tmp);
1749 else {
1750 return(-1);
1751 }
1752 xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1753 to = tmp;
1754 }
1755 if (xmlRegAtomPush(ctxt, atom) < 0) {
1756 return(-1);
1757 }
1758 if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1759 (atom->min == 0) && (atom->max > 0)) {
1760 nullable = 1;
1761 atom->min = 1;
1762 if (atom->max == 1)
1763 atom->quant = XML_REGEXP_QUANT_OPT;
1764 }
1765 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1766 ctxt->state = end;
1767 switch (atom->quant) {
1768 case XML_REGEXP_QUANT_OPT:
1769 atom->quant = XML_REGEXP_QUANT_ONCE;
1770 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1771 break;
1772 case XML_REGEXP_QUANT_MULT:
1773 atom->quant = XML_REGEXP_QUANT_ONCE;
1774 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1775 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1776 break;
1777 case XML_REGEXP_QUANT_PLUS:
1778 atom->quant = XML_REGEXP_QUANT_ONCE;
1779 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1780 break;
1781 case XML_REGEXP_QUANT_RANGE:
1782 if (nullable)
1783 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1784 break;
1785 default:
1786 break;
1787 }
1788 return(0);
1789}
1790
1799static void
1800xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1801 int tonr, int counter) {
1802 int transnr;
1803 xmlRegStatePtr from;
1804 xmlRegStatePtr to;
1805
1806#ifdef DEBUG_REGEXP_GRAPH
1807 printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1808#endif
1809 from = ctxt->states[fromnr];
1810 if (from == NULL)
1811 return;
1812 to = ctxt->states[tonr];
1813 if (to == NULL)
1814 return;
1815 if ((to->mark == XML_REGEXP_MARK_START) ||
1816 (to->mark == XML_REGEXP_MARK_VISITED))
1817 return;
1818
1819 to->mark = XML_REGEXP_MARK_VISITED;
1820 if (to->type == XML_REGEXP_FINAL_STATE) {
1821#ifdef DEBUG_REGEXP_GRAPH
1822 printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1823#endif
1824 from->type = XML_REGEXP_FINAL_STATE;
1825 }
1826 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1827 if (to->trans[transnr].to < 0)
1828 continue;
1829 if (to->trans[transnr].atom == NULL) {
1830 /*
1831 * Don't remove counted transitions
1832 * Don't loop either
1833 */
1834 if (to->trans[transnr].to != fromnr) {
1835 if (to->trans[transnr].count >= 0) {
1836 int newto = to->trans[transnr].to;
1837
1838 xmlRegStateAddTrans(ctxt, from, NULL,
1839 ctxt->states[newto],
1840 -1, to->trans[transnr].count);
1841 } else {
1842#ifdef DEBUG_REGEXP_GRAPH
1843 printf("Found epsilon trans %d from %d to %d\n",
1844 transnr, tonr, to->trans[transnr].to);
1845#endif
1846 if (to->trans[transnr].counter >= 0) {
1847 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1848 to->trans[transnr].to,
1849 to->trans[transnr].counter);
1850 } else {
1851 xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1852 to->trans[transnr].to,
1853 counter);
1854 }
1855 }
1856 }
1857 } else {
1858 int newto = to->trans[transnr].to;
1859
1860 if (to->trans[transnr].counter >= 0) {
1861 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1862 ctxt->states[newto],
1863 to->trans[transnr].counter, -1);
1864 } else {
1865 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1866 ctxt->states[newto], counter, -1);
1867 }
1868 }
1869 }
1870 to->mark = XML_REGEXP_MARK_NORMAL;
1871}
1872
1894static void
1895xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1896 int statenr, i, j, newto;
1897 xmlRegStatePtr state, tmp;
1898
1899 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1900 state = ctxt->states[statenr];
1901 if (state == NULL)
1902 continue;
1903 if (state->nbTrans != 1)
1904 continue;
1905 if (state->type == XML_REGEXP_UNREACH_STATE ||
1906 state->type == XML_REGEXP_FINAL_STATE)
1907 continue;
1908 /* is the only transition out a basic transition */
1909 if ((state->trans[0].atom == NULL) &&
1910 (state->trans[0].to >= 0) &&
1911 (state->trans[0].to != statenr) &&
1912 (state->trans[0].counter < 0) &&
1913 (state->trans[0].count < 0)) {
1914 newto = state->trans[0].to;
1915
1916 if (state->type == XML_REGEXP_START_STATE) {
1917#ifdef DEBUG_REGEXP_GRAPH
1918 printf("Found simple epsilon trans from start %d to %d\n",
1919 statenr, newto);
1920#endif
1921 } else {
1922#ifdef DEBUG_REGEXP_GRAPH
1923 printf("Found simple epsilon trans from %d to %d\n",
1924 statenr, newto);
1925#endif
1926 for (i = 0;i < state->nbTransTo;i++) {
1927 tmp = ctxt->states[state->transTo[i]];
1928 for (j = 0;j < tmp->nbTrans;j++) {
1929 if (tmp->trans[j].to == statenr) {
1930#ifdef DEBUG_REGEXP_GRAPH
1931 printf("Changed transition %d on %d to go to %d\n",
1932 j, tmp->no, newto);
1933#endif
1934 tmp->trans[j].to = -1;
1935 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1936 ctxt->states[newto],
1937 tmp->trans[j].counter,
1938 tmp->trans[j].count);
1939 }
1940 }
1941 }
1942 if (state->type == XML_REGEXP_FINAL_STATE)
1943 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1944 /* eliminate the transition completely */
1945 state->nbTrans = 0;
1946
1947 state->type = XML_REGEXP_UNREACH_STATE;
1948
1949 }
1950
1951 }
1952 }
1953}
1959static void
1960xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1961 int statenr, transnr;
1962 xmlRegStatePtr state;
1963 int has_epsilon;
1964
1965 if (ctxt->states == NULL) return;
1966
1967 /*
1968 * Eliminate simple epsilon transition and the associated unreachable
1969 * states.
1970 */
1971 xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1972 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1973 state = ctxt->states[statenr];
1974 if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1975#ifdef DEBUG_REGEXP_GRAPH
1976 printf("Removed unreachable state %d\n", statenr);
1977#endif
1978 xmlRegFreeState(state);
1979 ctxt->states[statenr] = NULL;
1980 }
1981 }
1982
1983 has_epsilon = 0;
1984
1985 /*
1986 * Build the completed transitions bypassing the epsilons
1987 * Use a marking algorithm to avoid loops
1988 * Mark sink states too.
1989 * Process from the latest states backward to the start when
1990 * there is long cascading epsilon chains this minimize the
1991 * recursions and transition compares when adding the new ones
1992 */
1993 for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1994 state = ctxt->states[statenr];
1995 if (state == NULL)
1996 continue;
1997 if ((state->nbTrans == 0) &&
1998 (state->type != XML_REGEXP_FINAL_STATE)) {
1999 state->type = XML_REGEXP_SINK_STATE;
2000 }
2001 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2002 if ((state->trans[transnr].atom == NULL) &&
2003 (state->trans[transnr].to >= 0)) {
2004 if (state->trans[transnr].to == statenr) {
2005 state->trans[transnr].to = -1;
2006#ifdef DEBUG_REGEXP_GRAPH
2007 printf("Removed loopback epsilon trans %d on %d\n",
2008 transnr, statenr);
2009#endif
2010 } else if (state->trans[transnr].count < 0) {
2011 int newto = state->trans[transnr].to;
2012
2013#ifdef DEBUG_REGEXP_GRAPH
2014 printf("Found epsilon trans %d from %d to %d\n",
2015 transnr, statenr, newto);
2016#endif
2017 has_epsilon = 1;
2018 state->trans[transnr].to = -2;
2019 state->mark = XML_REGEXP_MARK_START;
2020 xmlFAReduceEpsilonTransitions(ctxt, statenr,
2021 newto, state->trans[transnr].counter);
2022 state->mark = XML_REGEXP_MARK_NORMAL;
2023#ifdef DEBUG_REGEXP_GRAPH
2024 } else {
2025 printf("Found counted transition %d on %d\n",
2026 transnr, statenr);
2027#endif
2028 }
2029 }
2030 }
2031 }
2032 /*
2033 * Eliminate the epsilon transitions
2034 */
2035 if (has_epsilon) {
2036 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2037 state = ctxt->states[statenr];
2038 if (state == NULL)
2039 continue;
2040 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2041 xmlRegTransPtr trans = &(state->trans[transnr]);
2042 if ((trans->atom == NULL) &&
2043 (trans->count < 0) &&
2044 (trans->to >= 0)) {
2045 trans->to = -1;
2046 }
2047 }
2048 }
2049 }
2050
2051 /*
2052 * Use this pass to detect unreachable states too
2053 */
2054 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2055 state = ctxt->states[statenr];
2056 if (state != NULL)
2057 state->reached = XML_REGEXP_MARK_NORMAL;
2058 }
2059 state = ctxt->states[0];
2060 if (state != NULL)
2061 state->reached = XML_REGEXP_MARK_START;
2062 while (state != NULL) {
2063 xmlRegStatePtr target = NULL;
2064 state->reached = XML_REGEXP_MARK_VISITED;
2065 /*
2066 * Mark all states reachable from the current reachable state
2067 */
2068 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2069 if ((state->trans[transnr].to >= 0) &&
2070 ((state->trans[transnr].atom != NULL) ||
2071 (state->trans[transnr].count >= 0))) {
2072 int newto = state->trans[transnr].to;
2073
2074 if (ctxt->states[newto] == NULL)
2075 continue;
2076 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2077 ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2078 target = ctxt->states[newto];
2079 }
2080 }
2081 }
2082
2083 /*
2084 * find the next accessible state not explored
2085 */
2086 if (target == NULL) {
2087 for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2088 state = ctxt->states[statenr];
2089 if ((state != NULL) && (state->reached ==
2090 XML_REGEXP_MARK_START)) {
2091 target = state;
2092 break;
2093 }
2094 }
2095 }
2096 state = target;
2097 }
2098 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2099 state = ctxt->states[statenr];
2100 if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2101#ifdef DEBUG_REGEXP_GRAPH
2102 printf("Removed unreachable state %d\n", statenr);
2103#endif
2104 xmlRegFreeState(state);
2105 ctxt->states[statenr] = NULL;
2106 }
2107 }
2108
2109}
2110
2111static int
2112xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2113 int ret = 0;
2114
2115 if ((range1->type == XML_REGEXP_RANGES) ||
2116 (range2->type == XML_REGEXP_RANGES) ||
2117 (range2->type == XML_REGEXP_SUBREG) ||
2118 (range1->type == XML_REGEXP_SUBREG) ||
2119 (range1->type == XML_REGEXP_STRING) ||
2120 (range2->type == XML_REGEXP_STRING))
2121 return(-1);
2122
2123 /* put them in order */
2124 if (range1->type > range2->type) {
2125 xmlRegRangePtr tmp;
2126
2127 tmp = range1;
2128 range1 = range2;
2129 range2 = tmp;
2130 }
2131 if ((range1->type == XML_REGEXP_ANYCHAR) ||
2132 (range2->type == XML_REGEXP_ANYCHAR)) {
2133 ret = 1;
2134 } else if ((range1->type == XML_REGEXP_EPSILON) ||
2135 (range2->type == XML_REGEXP_EPSILON)) {
2136 return(0);
2137 } else if (range1->type == range2->type) {
2138 if (range1->type != XML_REGEXP_CHARVAL)
2139 ret = 1;
2140 else if ((range1->end < range2->start) ||
2141 (range2->end < range1->start))
2142 ret = 0;
2143 else
2144 ret = 1;
2145 } else if (range1->type == XML_REGEXP_CHARVAL) {
2146 int codepoint;
2147 int neg = 0;
2148
2149 /*
2150 * just check all codepoints in the range for acceptance,
2151 * this is usually way cheaper since done only once at
2152 * compilation than testing over and over at runtime or
2153 * pushing too many states when evaluating.
2154 */
2155 if (((range1->neg == 0) && (range2->neg != 0)) ||
2156 ((range1->neg != 0) && (range2->neg == 0)))
2157 neg = 1;
2158
2159 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2160 ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2161 0, range2->start, range2->end,
2162 range2->blockName);
2163 if (ret < 0)
2164 return(-1);
2165 if (((neg == 1) && (ret == 0)) ||
2166 ((neg == 0) && (ret == 1)))
2167 return(1);
2168 }
2169 return(0);
2170 } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2171 (range2->type == XML_REGEXP_BLOCK_NAME)) {
2172 if (range1->type == range2->type) {
2173 ret = xmlStrEqual(range1->blockName, range2->blockName);
2174 } else {
2175 /*
2176 * comparing a block range with anything else is way
2177 * too costly, and maintaining the table is like too much
2178 * memory too, so let's force the automata to save state
2179 * here.
2180 */
2181 return(1);
2182 }
2183 } else if ((range1->type < XML_REGEXP_LETTER) ||
2184 (range2->type < XML_REGEXP_LETTER)) {
2185 if ((range1->type == XML_REGEXP_ANYSPACE) &&
2186 (range2->type == XML_REGEXP_NOTSPACE))
2187 ret = 0;
2188 else if ((range1->type == XML_REGEXP_INITNAME) &&
2189 (range2->type == XML_REGEXP_NOTINITNAME))
2190 ret = 0;
2191 else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2192 (range2->type == XML_REGEXP_NOTNAMECHAR))
2193 ret = 0;
2194 else if ((range1->type == XML_REGEXP_DECIMAL) &&
2195 (range2->type == XML_REGEXP_NOTDECIMAL))
2196 ret = 0;
2197 else if ((range1->type == XML_REGEXP_REALCHAR) &&
2198 (range2->type == XML_REGEXP_NOTREALCHAR))
2199 ret = 0;
2200 else {
2201 /* same thing to limit complexity */
2202 return(1);
2203 }
2204 } else {
2205 ret = 0;
2206 /* range1->type < range2->type here */
2207 switch (range1->type) {
2208 case XML_REGEXP_LETTER:
2209 /* all disjoint except in the subgroups */
2210 if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2211 (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2212 (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2213 (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2214 (range2->type == XML_REGEXP_LETTER_OTHERS))
2215 ret = 1;
2216 break;
2217 case XML_REGEXP_MARK:
2218 if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2219 (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2220 (range2->type == XML_REGEXP_MARK_ENCLOSING))
2221 ret = 1;
2222 break;
2223 case XML_REGEXP_NUMBER:
2224 if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2225 (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2226 (range2->type == XML_REGEXP_NUMBER_OTHERS))
2227 ret = 1;
2228 break;
2229 case XML_REGEXP_PUNCT:
2230 if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2231 (range2->type == XML_REGEXP_PUNCT_DASH) ||
2232 (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2233 (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2234 (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2235 (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2236 (range2->type == XML_REGEXP_PUNCT_OTHERS))
2237 ret = 1;
2238 break;
2239 case XML_REGEXP_SEPAR:
2240 if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2241 (range2->type == XML_REGEXP_SEPAR_LINE) ||
2242 (range2->type == XML_REGEXP_SEPAR_PARA))
2243 ret = 1;
2244 break;
2245 case XML_REGEXP_SYMBOL:
2246 if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2247 (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2248 (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2249 (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2250 ret = 1;
2251 break;
2252 case XML_REGEXP_OTHER:
2253 if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2254 (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2255 (range2->type == XML_REGEXP_OTHER_PRIVATE))
2256 ret = 1;
2257 break;
2258 default:
2259 if ((range2->type >= XML_REGEXP_LETTER) &&
2260 (range2->type < XML_REGEXP_BLOCK_NAME))
2261 ret = 0;
2262 else {
2263 /* safety net ! */
2264 return(1);
2265 }
2266 }
2267 }
2268 if (((range1->neg == 0) && (range2->neg != 0)) ||
2269 ((range1->neg != 0) && (range2->neg == 0)))
2270 ret = !ret;
2271 return(ret);
2272}
2273
2284static int
2285xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2286 if ((type1 == XML_REGEXP_EPSILON) ||
2287 (type1 == XML_REGEXP_CHARVAL) ||
2288 (type1 == XML_REGEXP_RANGES) ||
2289 (type1 == XML_REGEXP_SUBREG) ||
2290 (type1 == XML_REGEXP_STRING) ||
2291 (type1 == XML_REGEXP_ANYCHAR))
2292 return(1);
2293 if ((type2 == XML_REGEXP_EPSILON) ||
2294 (type2 == XML_REGEXP_CHARVAL) ||
2295 (type2 == XML_REGEXP_RANGES) ||
2296 (type2 == XML_REGEXP_SUBREG) ||
2297 (type2 == XML_REGEXP_STRING) ||
2298 (type2 == XML_REGEXP_ANYCHAR))
2299 return(1);
2300
2301 if (type1 == type2) return(1);
2302
2303 /* simplify subsequent compares by making sure type1 < type2 */
2304 if (type1 > type2) {
2305 xmlRegAtomType tmp = type1;
2306 type1 = type2;
2307 type2 = tmp;
2308 }
2309 switch (type1) {
2310 case XML_REGEXP_ANYSPACE: /* \s */
2311 /* can't be a letter, number, mark, punctuation, symbol */
2312 if ((type2 == XML_REGEXP_NOTSPACE) ||
2313 ((type2 >= XML_REGEXP_LETTER) &&
2314 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2315 ((type2 >= XML_REGEXP_NUMBER) &&
2316 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2317 ((type2 >= XML_REGEXP_MARK) &&
2318 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2319 ((type2 >= XML_REGEXP_PUNCT) &&
2320 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2321 ((type2 >= XML_REGEXP_SYMBOL) &&
2322 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2323 ) return(0);
2324 break;
2325 case XML_REGEXP_NOTSPACE: /* \S */
2326 break;
2327 case XML_REGEXP_INITNAME: /* \l */
2328 /* can't be a number, mark, separator, punctuation, symbol or other */
2329 if ((type2 == XML_REGEXP_NOTINITNAME) ||
2330 ((type2 >= XML_REGEXP_NUMBER) &&
2331 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2332 ((type2 >= XML_REGEXP_MARK) &&
2333 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2334 ((type2 >= XML_REGEXP_SEPAR) &&
2335 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2336 ((type2 >= XML_REGEXP_PUNCT) &&
2337 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2338 ((type2 >= XML_REGEXP_SYMBOL) &&
2339 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2340 ((type2 >= XML_REGEXP_OTHER) &&
2341 (type2 <= XML_REGEXP_OTHER_NA))
2342 ) return(0);
2343 break;
2344 case XML_REGEXP_NOTINITNAME: /* \L */
2345 break;
2346 case XML_REGEXP_NAMECHAR: /* \c */
2347 /* can't be a mark, separator, punctuation, symbol or other */
2348 if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2349 ((type2 >= XML_REGEXP_MARK) &&
2350 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2351 ((type2 >= XML_REGEXP_PUNCT) &&
2352 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2353 ((type2 >= XML_REGEXP_SEPAR) &&
2354 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2355 ((type2 >= XML_REGEXP_SYMBOL) &&
2356 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2357 ((type2 >= XML_REGEXP_OTHER) &&
2358 (type2 <= XML_REGEXP_OTHER_NA))
2359 ) return(0);
2360 break;
2361 case XML_REGEXP_NOTNAMECHAR: /* \C */
2362 break;
2363 case XML_REGEXP_DECIMAL: /* \d */
2364 /* can't be a letter, mark, separator, punctuation, symbol or other */
2365 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2366 (type2 == XML_REGEXP_REALCHAR) ||
2367 ((type2 >= XML_REGEXP_LETTER) &&
2368 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2369 ((type2 >= XML_REGEXP_MARK) &&
2370 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2371 ((type2 >= XML_REGEXP_PUNCT) &&
2372 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2373 ((type2 >= XML_REGEXP_SEPAR) &&
2374 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2375 ((type2 >= XML_REGEXP_SYMBOL) &&
2376 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2377 ((type2 >= XML_REGEXP_OTHER) &&
2378 (type2 <= XML_REGEXP_OTHER_NA))
2379 )return(0);
2380 break;
2381 case XML_REGEXP_NOTDECIMAL: /* \D */
2382 break;
2383 case XML_REGEXP_REALCHAR: /* \w */
2384 /* can't be a mark, separator, punctuation, symbol or other */
2385 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2386 ((type2 >= XML_REGEXP_MARK) &&
2387 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2388 ((type2 >= XML_REGEXP_PUNCT) &&
2389 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2390 ((type2 >= XML_REGEXP_SEPAR) &&
2391 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2392 ((type2 >= XML_REGEXP_SYMBOL) &&
2393 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2394 ((type2 >= XML_REGEXP_OTHER) &&
2395 (type2 <= XML_REGEXP_OTHER_NA))
2396 )return(0);
2397 break;
2398 case XML_REGEXP_NOTREALCHAR: /* \W */
2399 break;
2400 /*
2401 * at that point we know both type 1 and type2 are from
2402 * character categories are ordered and are different,
2403 * it becomes simple because this is a partition
2404 */
2405 case XML_REGEXP_LETTER:
2406 if (type2 <= XML_REGEXP_LETTER_OTHERS)
2407 return(1);
2408 return(0);
2409 case XML_REGEXP_LETTER_UPPERCASE:
2410 case XML_REGEXP_LETTER_LOWERCASE:
2411 case XML_REGEXP_LETTER_TITLECASE:
2412 case XML_REGEXP_LETTER_MODIFIER:
2413 case XML_REGEXP_LETTER_OTHERS:
2414 return(0);
2415 case XML_REGEXP_MARK:
2416 if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2417 return(1);
2418 return(0);
2419 case XML_REGEXP_MARK_NONSPACING:
2420 case XML_REGEXP_MARK_SPACECOMBINING:
2421 case XML_REGEXP_MARK_ENCLOSING:
2422 return(0);
2423 case XML_REGEXP_NUMBER:
2424 if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2425 return(1);
2426 return(0);
2427 case XML_REGEXP_NUMBER_DECIMAL:
2428 case XML_REGEXP_NUMBER_LETTER:
2429 case XML_REGEXP_NUMBER_OTHERS:
2430 return(0);
2431 case XML_REGEXP_PUNCT:
2432 if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2433 return(1);
2434 return(0);
2435 case XML_REGEXP_PUNCT_CONNECTOR:
2436 case XML_REGEXP_PUNCT_DASH:
2437 case XML_REGEXP_PUNCT_OPEN:
2438 case XML_REGEXP_PUNCT_CLOSE:
2439 case XML_REGEXP_PUNCT_INITQUOTE:
2440 case XML_REGEXP_PUNCT_FINQUOTE:
2441 case XML_REGEXP_PUNCT_OTHERS:
2442 return(0);
2443 case XML_REGEXP_SEPAR:
2444 if (type2 <= XML_REGEXP_SEPAR_PARA)
2445 return(1);
2446 return(0);
2447 case XML_REGEXP_SEPAR_SPACE:
2448 case XML_REGEXP_SEPAR_LINE:
2449 case XML_REGEXP_SEPAR_PARA:
2450 return(0);
2451 case XML_REGEXP_SYMBOL:
2452 if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2453 return(1);
2454 return(0);
2455 case XML_REGEXP_SYMBOL_MATH:
2456 case XML_REGEXP_SYMBOL_CURRENCY:
2457 case XML_REGEXP_SYMBOL_MODIFIER:
2458 case XML_REGEXP_SYMBOL_OTHERS:
2459 return(0);
2460 case XML_REGEXP_OTHER:
2461 if (type2 <= XML_REGEXP_OTHER_NA)
2462 return(1);
2463 return(0);
2464 case XML_REGEXP_OTHER_CONTROL:
2465 case XML_REGEXP_OTHER_FORMAT:
2466 case XML_REGEXP_OTHER_PRIVATE:
2467 case XML_REGEXP_OTHER_NA:
2468 return(0);
2469 default:
2470 break;
2471 }
2472 return(1);
2473}
2474
2486static int
2487xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2488 int ret = 0;
2489
2490 if (atom1 == atom2)
2491 return(1);
2492 if ((atom1 == NULL) || (atom2 == NULL))
2493 return(0);
2494
2495 if (atom1->type != atom2->type)
2496 return(0);
2497 switch (atom1->type) {
2498 case XML_REGEXP_EPSILON:
2499 ret = 0;
2500 break;
2501 case XML_REGEXP_STRING:
2502 if (!deep)
2503 ret = (atom1->valuep == atom2->valuep);
2504 else
2505 ret = xmlStrEqual((xmlChar *)atom1->valuep,
2506 (xmlChar *)atom2->valuep);
2507 break;
2508 case XML_REGEXP_CHARVAL:
2509 ret = (atom1->codepoint == atom2->codepoint);
2510 break;
2511 case XML_REGEXP_RANGES:
2512 /* too hard to do in the general case */
2513 ret = 0;
2514 default:
2515 break;
2516 }
2517 return(ret);
2518}
2519
2531static int
2532xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2533 int ret = 1;
2534
2535 if (atom1 == atom2)
2536 return(1);
2537 if ((atom1 == NULL) || (atom2 == NULL))
2538 return(0);
2539
2540 if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2541 (atom2->type == XML_REGEXP_ANYCHAR))
2542 return(1);
2543
2544 if (atom1->type > atom2->type) {
2545 xmlRegAtomPtr tmp;
2546 tmp = atom1;
2547 atom1 = atom2;
2548 atom2 = tmp;
2549 }
2550 if (atom1->type != atom2->type) {
2551 ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2552 /* if they can't intersect at the type level break now */
2553 if (ret == 0)
2554 return(0);
2555 }
2556 switch (atom1->type) {
2557 case XML_REGEXP_STRING:
2558 if (!deep)
2559 ret = (atom1->valuep != atom2->valuep);
2560 else {
2561 xmlChar *val1 = (xmlChar *)atom1->valuep;
2562 xmlChar *val2 = (xmlChar *)atom2->valuep;
2563 int compound1 = (xmlStrchr(val1, '|') != NULL);
2564 int compound2 = (xmlStrchr(val2, '|') != NULL);
2565
2566 /* Ignore negative match flag for ##other namespaces */
2567 if (compound1 != compound2)
2568 return(0);
2569
2570 ret = xmlRegStrEqualWildcard(val1, val2);
2571 }
2572 break;
2573 case XML_REGEXP_EPSILON:
2574 goto not_determinist;
2575 case XML_REGEXP_CHARVAL:
2576 if (atom2->type == XML_REGEXP_CHARVAL) {
2577 ret = (atom1->codepoint == atom2->codepoint);
2578 } else {
2579 ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2580 if (ret < 0)
2581 ret = 1;
2582 }
2583 break;
2584 case XML_REGEXP_RANGES:
2585 if (atom2->type == XML_REGEXP_RANGES) {
2586 int i, j, res;
2587 xmlRegRangePtr r1, r2;
2588
2589 /*
2590 * need to check that none of the ranges eventually matches
2591 */
2592 for (i = 0;i < atom1->nbRanges;i++) {
2593 for (j = 0;j < atom2->nbRanges;j++) {
2594 r1 = atom1->ranges[i];
2595 r2 = atom2->ranges[j];
2596 res = xmlFACompareRanges(r1, r2);
2597 if (res == 1) {
2598 ret = 1;
2599 goto done;
2600 }
2601 }
2602 }
2603 ret = 0;
2604 }
2605 break;
2606 default:
2607 goto not_determinist;
2608 }
2609done:
2610 if (atom1->neg != atom2->neg) {
2611 ret = !ret;
2612 }
2613 if (ret == 0)
2614 return(0);
2615not_determinist:
2616 return(1);
2617}
2618
2627static int
2628xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2629 int to, xmlRegAtomPtr atom) {
2630 int ret = 1;
2631 int res;
2632 int transnr, nbTrans;
2633 xmlRegTransPtr t1;
2634 int deep = 1;
2635
2636 if (state == NULL)
2637 return(ret);
2638 if (state->markd == XML_REGEXP_MARK_VISITED)
2639 return(ret);
2640
2641 if (ctxt->flags & AM_AUTOMATA_RNG)
2642 deep = 0;
2643
2644 /*
2645 * don't recurse on transitions potentially added in the course of
2646 * the elimination.
2647 */
2648 nbTrans = state->nbTrans;
2649 for (transnr = 0;transnr < nbTrans;transnr++) {
2650 t1 = &(state->trans[transnr]);
2651 /*
2652 * check transitions conflicting with the one looked at
2653 */
2654 if (t1->atom == NULL) {
2655 if (t1->to < 0)
2656 continue;
2657 state->markd = XML_REGEXP_MARK_VISITED;
2658 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2659 to, atom);
2660 if (res == 0) {
2661 ret = 0;
2662 /* t1->nd = 1; */
2663 }
2664 continue;
2665 }
2666 if (t1->to != to)
2667 continue;
2668 if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2669 ret = 0;
2670 /* mark the transition as non-deterministic */
2671 t1->nd = 1;
2672 }
2673 }
2674 return(ret);
2675}
2676
2683static void
2684xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
2685 int transnr, nbTrans;
2686
2687 if (state == NULL)
2688 return;
2689 if (state->markd != XML_REGEXP_MARK_VISITED)
2690 return;
2691 state->markd = 0;
2692
2693 nbTrans = state->nbTrans;
2694 for (transnr = 0; transnr < nbTrans; transnr++) {
2695 xmlRegTransPtr t1 = &state->trans[transnr];
2696 if ((t1->atom == NULL) && (t1->to >= 0))
2697 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2698 }
2699}
2700
2709static int
2710xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2711 int statenr, transnr;
2712 xmlRegStatePtr state;
2713 xmlRegTransPtr t1, t2, last;
2714 int i;
2715 int ret = 1;
2716 int deep = 1;
2717
2718#ifdef DEBUG_REGEXP_GRAPH
2719 printf("xmlFAComputesDeterminism\n");
2720 xmlRegPrintCtxt(stdout, ctxt);
2721#endif
2722 if (ctxt->determinist != -1)
2723 return(ctxt->determinist);
2724
2725 if (ctxt->flags & AM_AUTOMATA_RNG)
2726 deep = 0;
2727
2728 /*
2729 * First cleanup the automata removing cancelled transitions
2730 */
2731 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2732 state = ctxt->states[statenr];
2733 if (state == NULL)
2734 continue;
2735 if (state->nbTrans < 2)
2736 continue;
2737 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2738 t1 = &(state->trans[transnr]);
2739 /*
2740 * Determinism checks in case of counted or all transitions
2741 * will have to be handled separately
2742 */
2743 if (t1->atom == NULL) {
2744 /* t1->nd = 1; */
2745 continue;
2746 }
2747 if (t1->to == -1) /* eliminated */
2748 continue;
2749 for (i = 0;i < transnr;i++) {
2750 t2 = &(state->trans[i]);
2751 if (t2->to == -1) /* eliminated */
2752 continue;
2753 if (t2->atom != NULL) {
2754 if (t1->to == t2->to) {
2755 /*
2756 * Here we use deep because we want to keep the
2757 * transitions which indicate a conflict
2758 */
2759 if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2760 (t1->counter == t2->counter) &&
2761 (t1->count == t2->count))
2762 t2->to = -1; /* eliminated */
2763 }
2764 }
2765 }
2766 }
2767 }
2768
2769 /*
2770 * Check for all states that there aren't 2 transitions
2771 * with the same atom and a different target.
2772 */
2773 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2774 state = ctxt->states[statenr];
2775 if (state == NULL)
2776 continue;
2777 if (state->nbTrans < 2)
2778 continue;
2779 last = NULL;
2780 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2781 t1 = &(state->trans[transnr]);
2782 /*
2783 * Determinism checks in case of counted or all transitions
2784 * will have to be handled separately
2785 */
2786 if (t1->atom == NULL) {
2787 continue;
2788 }
2789 if (t1->to == -1) /* eliminated */
2790 continue;
2791 for (i = 0;i < transnr;i++) {
2792 t2 = &(state->trans[i]);
2793 if (t2->to == -1) /* eliminated */
2794 continue;
2795 if (t2->atom != NULL) {
2796 /*
2797 * But here we don't use deep because we want to
2798 * find transitions which indicate a conflict
2799 */
2800 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2801 ret = 0;
2802 /* mark the transitions as non-deterministic ones */
2803 t1->nd = 1;
2804 t2->nd = 1;
2805 last = t1;
2806 }
2807 } else if (t1->to != -1) {
2808 /*
2809 * do the closure in case of remaining specific
2810 * epsilon transitions like choices or all
2811 */
2812 ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2813 t2->to, t2->atom);
2814 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2815 /* don't shortcut the computation so all non deterministic
2816 transition get marked down
2817 if (ret == 0)
2818 return(0);
2819 */
2820 if (ret == 0) {
2821 t1->nd = 1;
2822 /* t2->nd = 1; */
2823 last = t1;
2824 }
2825 }
2826 }
2827 /* don't shortcut the computation so all non deterministic
2828 transition get marked down
2829 if (ret == 0)
2830 break; */
2831 }
2832
2833 /*
2834 * mark specifically the last non-deterministic transition
2835 * from a state since there is no need to set-up rollback
2836 * from it
2837 */
2838 if (last != NULL) {
2839 last->nd = 2;
2840 }
2841
2842 /* don't shortcut the computation so all non deterministic
2843 transition get marked down
2844 if (ret == 0)
2845 break; */
2846 }
2847
2848 ctxt->determinist = ret;
2849 return(ret);
2850}
2851
2852/************************************************************************
2853 * *
2854 * Routines to check input against transition atoms *
2855 * *
2856 ************************************************************************/
2857
2858static int
2859xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2860 int start, int end, const xmlChar *blockName) {
2861 int ret = 0;
2862
2863 switch (type) {
2864 case XML_REGEXP_STRING:
2865 case XML_REGEXP_SUBREG:
2866 case XML_REGEXP_RANGES:
2867 case XML_REGEXP_EPSILON:
2868 return(-1);
2869 case XML_REGEXP_ANYCHAR:
2870 ret = ((codepoint != '\n') && (codepoint != '\r'));
2871 break;
2872 case XML_REGEXP_CHARVAL:
2873 ret = ((codepoint >= start) && (codepoint <= end));
2874 break;
2875 case XML_REGEXP_NOTSPACE:
2876 neg = !neg;
2877 /* Falls through. */
2878 case XML_REGEXP_ANYSPACE:
2879 ret = ((codepoint == '\n') || (codepoint == '\r') ||
2880 (codepoint == '\t') || (codepoint == ' '));
2881 break;
2882 case XML_REGEXP_NOTINITNAME:
2883 neg = !neg;
2884 /* Falls through. */
2885 case XML_REGEXP_INITNAME:
2886 ret = (IS_LETTER(codepoint) ||
2887 (codepoint == '_') || (codepoint == ':'));
2888 break;
2889 case XML_REGEXP_NOTNAMECHAR:
2890 neg = !neg;
2891 /* Falls through. */
2892 case XML_REGEXP_NAMECHAR:
2893 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2894 (codepoint == '.') || (codepoint == '-') ||
2895 (codepoint == '_') || (codepoint == ':') ||
2896 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2897 break;
2898 case XML_REGEXP_NOTDECIMAL:
2899 neg = !neg;
2900 /* Falls through. */
2901 case XML_REGEXP_DECIMAL:
2902 ret = xmlUCSIsCatNd(codepoint);
2903 break;
2904 case XML_REGEXP_REALCHAR:
2905 neg = !neg;
2906 /* Falls through. */
2907 case XML_REGEXP_NOTREALCHAR:
2908 ret = xmlUCSIsCatP(codepoint);
2909 if (ret == 0)
2910 ret = xmlUCSIsCatZ(codepoint);
2911 if (ret == 0)
2912 ret = xmlUCSIsCatC(codepoint);
2913 break;
2914 case XML_REGEXP_LETTER:
2915 ret = xmlUCSIsCatL(codepoint);
2916 break;
2917 case XML_REGEXP_LETTER_UPPERCASE:
2918 ret = xmlUCSIsCatLu(codepoint);
2919 break;
2920 case XML_REGEXP_LETTER_LOWERCASE:
2921 ret = xmlUCSIsCatLl(codepoint);
2922 break;
2923 case XML_REGEXP_LETTER_TITLECASE:
2924 ret = xmlUCSIsCatLt(codepoint);
2925 break;
2926 case XML_REGEXP_LETTER_MODIFIER:
2927 ret = xmlUCSIsCatLm(codepoint);
2928 break;
2929 case XML_REGEXP_LETTER_OTHERS:
2930 ret = xmlUCSIsCatLo(codepoint);
2931 break;
2932 case XML_REGEXP_MARK:
2933 ret = xmlUCSIsCatM(codepoint);
2934 break;
2935 case XML_REGEXP_MARK_NONSPACING:
2936 ret = xmlUCSIsCatMn(codepoint);
2937 break;
2938 case XML_REGEXP_MARK_SPACECOMBINING:
2939 ret = xmlUCSIsCatMc(codepoint);
2940 break;
2941 case XML_REGEXP_MARK_ENCLOSING:
2942 ret = xmlUCSIsCatMe(codepoint);
2943 break;
2944 case XML_REGEXP_NUMBER:
2945 ret = xmlUCSIsCatN(codepoint);
2946 break;
2947 case XML_REGEXP_NUMBER_DECIMAL:
2948 ret = xmlUCSIsCatNd(codepoint);
2949 break;
2950 case XML_REGEXP_NUMBER_LETTER:
2951 ret = xmlUCSIsCatNl(codepoint);
2952 break;
2953 case XML_REGEXP_NUMBER_OTHERS:
2954 ret = xmlUCSIsCatNo(codepoint);
2955 break;
2956 case XML_REGEXP_PUNCT:
2957 ret = xmlUCSIsCatP(codepoint);
2958 break;
2959 case XML_REGEXP_PUNCT_CONNECTOR:
2960 ret = xmlUCSIsCatPc(codepoint);
2961 break;
2962 case XML_REGEXP_PUNCT_DASH:
2963 ret = xmlUCSIsCatPd(codepoint);
2964 break;
2965 case XML_REGEXP_PUNCT_OPEN:
2966 ret = xmlUCSIsCatPs(codepoint);
2967 break;
2968 case XML_REGEXP_PUNCT_CLOSE:
2969 ret = xmlUCSIsCatPe(codepoint);
2970 break;
2971 case XML_REGEXP_PUNCT_INITQUOTE:
2972 ret = xmlUCSIsCatPi(codepoint);
2973 break;
2974 case XML_REGEXP_PUNCT_FINQUOTE:
2975 ret = xmlUCSIsCatPf(codepoint);
2976 break;
2977 case XML_REGEXP_PUNCT_OTHERS:
2978 ret = xmlUCSIsCatPo(codepoint);
2979 break;
2980 case XML_REGEXP_SEPAR:
2981 ret = xmlUCSIsCatZ(codepoint);
2982 break;
2983 case XML_REGEXP_SEPAR_SPACE:
2984 ret = xmlUCSIsCatZs(codepoint);
2985 break;
2986 case XML_REGEXP_SEPAR_LINE:
2987 ret = xmlUCSIsCatZl(codepoint);
2988 break;
2989 case XML_REGEXP_SEPAR_PARA:
2990 ret = xmlUCSIsCatZp(codepoint);
2991 break;
2992 case XML_REGEXP_SYMBOL:
2993 ret = xmlUCSIsCatS(codepoint);
2994 break;
2995 case XML_REGEXP_SYMBOL_MATH:
2996 ret = xmlUCSIsCatSm(codepoint);
2997 break;
2998 case XML_REGEXP_SYMBOL_CURRENCY:
2999 ret = xmlUCSIsCatSc(codepoint);
3000 break;
3001 case XML_REGEXP_SYMBOL_MODIFIER:
3002 ret = xmlUCSIsCatSk(codepoint);
3003 break;
3004 case XML_REGEXP_SYMBOL_OTHERS:
3005 ret = xmlUCSIsCatSo(codepoint);
3006 break;
3007 case XML_REGEXP_OTHER:
3008 ret = xmlUCSIsCatC(codepoint);
3009 break;
3010 case XML_REGEXP_OTHER_CONTROL:
3011 ret = xmlUCSIsCatCc(codepoint);
3012 break;
3013 case XML_REGEXP_OTHER_FORMAT:
3014 ret = xmlUCSIsCatCf(codepoint);
3015 break;
3016 case XML_REGEXP_OTHER_PRIVATE:
3017 ret = xmlUCSIsCatCo(codepoint);
3018 break;
3019 case XML_REGEXP_OTHER_NA:
3020 /* ret = xmlUCSIsCatCn(codepoint); */
3021 /* Seems it doesn't exist anymore in recent Unicode releases */
3022 ret = 0;
3023 break;
3024 case XML_REGEXP_BLOCK_NAME:
3025 ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
3026 break;
3027 }
3028 if (neg)
3029 return(!ret);
3030 return(ret);
3031}
3032
3033static int
3034xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
3035 int i, ret = 0;
3036 xmlRegRangePtr range;
3037
3038 if ((atom == NULL) || (!IS_CHAR(codepoint)))
3039 return(-1);
3040
3041 switch (atom->type) {
3042 case XML_REGEXP_SUBREG:
3043 case XML_REGEXP_EPSILON:
3044 return(-1);
3045 case XML_REGEXP_CHARVAL:
3046 return(codepoint == atom->codepoint);
3047 case XML_REGEXP_RANGES: {
3048 int accept = 0;
3049
3050 for (i = 0;i < atom->nbRanges;i++) {
3051 range = atom->ranges[i];
3052 if (range->neg == 2) {
3053 ret = xmlRegCheckCharacterRange(range->type, codepoint,
3054 0, range->start, range->end,
3055 range->blockName);
3056 if (ret != 0)
3057 return(0); /* excluded char */
3058 } else if (range->neg) {
3059 ret = xmlRegCheckCharacterRange(range->type, codepoint,
3060 0, range->start, range->end,
3061 range->blockName);
3062 if (ret == 0)
3063 accept = 1;
3064 else
3065 return(0);
3066 } else {
3067 ret = xmlRegCheckCharacterRange(range->type, codepoint,
3068 0, range->start, range->end,
3069 range->blockName);
3070 if (ret != 0)
3071 accept = 1; /* might still be excluded */
3072 }
3073 }
3074 return(accept);
3075 }
3076 case XML_REGEXP_STRING:
3077 printf("TODO: XML_REGEXP_STRING\n");
3078 return(-1);
3079 case XML_REGEXP_ANYCHAR:
3080 case XML_REGEXP_ANYSPACE:
3081 case XML_REGEXP_NOTSPACE:
3082 case XML_REGEXP_INITNAME:
3083 case XML_REGEXP_NOTINITNAME:
3084 case XML_REGEXP_NAMECHAR:
3085 case XML_REGEXP_NOTNAMECHAR:
3086 case XML_REGEXP_DECIMAL:
3087 case XML_REGEXP_NOTDECIMAL:
3088 case XML_REGEXP_REALCHAR:
3089 case XML_REGEXP_NOTREALCHAR:
3090 case XML_REGEXP_LETTER:
3091 case XML_REGEXP_LETTER_UPPERCASE:
3092 case XML_REGEXP_LETTER_LOWERCASE:
3093 case XML_REGEXP_LETTER_TITLECASE:
3094 case XML_REGEXP_LETTER_MODIFIER:
3095 case XML_REGEXP_LETTER_OTHERS:
3096 case XML_REGEXP_MARK:
3097 case XML_REGEXP_MARK_NONSPACING:
3098 case XML_REGEXP_MARK_SPACECOMBINING:
3099 case XML_REGEXP_MARK_ENCLOSING:
3100 case XML_REGEXP_NUMBER:
3101 case XML_REGEXP_NUMBER_DECIMAL:
3102 case XML_REGEXP_NUMBER_LETTER:
3103 case XML_REGEXP_NUMBER_OTHERS:
3104 case XML_REGEXP_PUNCT:
3105 case XML_REGEXP_PUNCT_CONNECTOR:
3106 case XML_REGEXP_PUNCT_DASH:
3107 case XML_REGEXP_PUNCT_OPEN:
3108 case XML_REGEXP_PUNCT_CLOSE:
3109 case XML_REGEXP_PUNCT_INITQUOTE:
3110 case XML_REGEXP_PUNCT_FINQUOTE:
3111 case XML_REGEXP_PUNCT_OTHERS:
3112 case XML_REGEXP_SEPAR:
3113 case XML_REGEXP_SEPAR_SPACE:
3114 case XML_REGEXP_SEPAR_LINE:
3115 case XML_REGEXP_SEPAR_PARA:
3116 case XML_REGEXP_SYMBOL:
3117 case XML_REGEXP_SYMBOL_MATH:
3118 case XML_REGEXP_SYMBOL_CURRENCY:
3119 case XML_REGEXP_SYMBOL_MODIFIER:
3120 case XML_REGEXP_SYMBOL_OTHERS:
3121 case XML_REGEXP_OTHER:
3122 case XML_REGEXP_OTHER_CONTROL:
3123 case XML_REGEXP_OTHER_FORMAT:
3124 case XML_REGEXP_OTHER_PRIVATE:
3125 case XML_REGEXP_OTHER_NA:
3126 case XML_REGEXP_BLOCK_NAME:
3127 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3128 (const xmlChar *)atom->valuep);
3129 if (atom->neg)
3130 ret = !ret;
3131 break;
3132 }
3133 return(ret);
3134}
3135
3136/************************************************************************
3137 * *
3138 * Saving and restoring state of an execution context *
3139 * *
3140 ************************************************************************/
3141
3142#ifdef DEBUG_REGEXP_EXEC
3143static void
3144xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3145 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3146 if (exec->inputStack != NULL) {
3147 int i;
3148 printf(": ");
3149 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
3150 printf("%s ", (const char *)
3151 exec->inputStack[exec->inputStackNr - (i + 1)].value);
3152 } else {
3153 printf(": %s", &(exec->inputString[exec->index]));
3154 }
3155 printf("\n");
3156}
3157#endif
3158
3159static void
3160xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3161#ifdef DEBUG_REGEXP_EXEC
3162 printf("saving ");
3163 exec->transno++;
3164 xmlFARegDebugExec(exec);
3165 exec->transno--;
3166#endif
3167#ifdef MAX_PUSH
3168 if (exec->nbPush > MAX_PUSH) {
3169 return;
3170 }
3171 exec->nbPush++;
3172#endif
3173
3174 if (exec->maxRollbacks == 0) {
3175 exec->maxRollbacks = 4;
3176 exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3177 sizeof(xmlRegExecRollback));
3178 if (exec->rollbacks == NULL) {
3179 xmlRegexpErrMemory(NULL, "saving regexp");
3180 exec->maxRollbacks = 0;
3181 return;
3182 }
3183 memset(exec->rollbacks, 0,
3184 exec->maxRollbacks * sizeof(xmlRegExecRollback));
3185 } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3186 xmlRegExecRollback *tmp;
3187 int len = exec->maxRollbacks;
3188
3189 exec->maxRollbacks *= 2;
3190 tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3191 exec->maxRollbacks * sizeof(xmlRegExecRollback));
3192 if (tmp == NULL) {
3193 xmlRegexpErrMemory(NULL, "saving regexp");
3194 exec->maxRollbacks /= 2;
3195 return;
3196 }
3197 exec->rollbacks = tmp;
3198 tmp = &exec->rollbacks[len];
3199 memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3200 }
3201 exec->rollbacks[exec->nbRollbacks].state = exec->state;
3202 exec->rollbacks[exec->nbRollbacks].index = exec->index;
3203 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3204 if (exec->comp->nbCounters > 0) {
3205 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3206 exec->rollbacks[exec->nbRollbacks].counts = (int *)
3207 xmlMalloc(exec->comp->nbCounters * sizeof(int));
3208 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3209 xmlRegexpErrMemory(NULL, "saving regexp");
3210 exec->status = -5;
3211 return;
3212 }
3213 }
3214 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3215 exec->comp->nbCounters * sizeof(int));
3216 }
3217 exec->nbRollbacks++;
3218}
3219
3220static void
3221xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3222 if (exec->nbRollbacks <= 0) {
3223 exec->status = -1;
3224#ifdef DEBUG_REGEXP_EXEC
3225 printf("rollback failed on empty stack\n");
3226#endif
3227 return;
3228 }
3229 exec->nbRollbacks--;
3230 exec->state = exec->rollbacks[exec->nbRollbacks].state;
3231 exec->index = exec->rollbacks[exec->nbRollbacks].index;
3232 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3233 if (exec->comp->nbCounters > 0) {
3234 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3235 fprintf(stderr, "exec save: allocation failed");
3236 exec->status = -6;
3237 return;
3238 }
3239 if (exec->counts) {
3240 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3241 exec->comp->nbCounters * sizeof(int));
3242 }
3243 }
3244
3245#ifdef DEBUG_REGEXP_EXEC
3246 printf("restored ");
3247 xmlFARegDebugExec(exec);
3248#endif
3249}
3250
3251/************************************************************************
3252 * *
3253 * Verifier, running an input against a compiled regexp *
3254 * *
3255 ************************************************************************/
3256
3257static int
3258xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3259 xmlRegExecCtxt execval;
3260 xmlRegExecCtxtPtr exec = &execval;
3261 int ret, codepoint = 0, len, deter;
3262
3263 exec->inputString = content;
3264 exec->index = 0;
3265 exec->nbPush = 0;
3266 exec->determinist = 1;
3267 exec->maxRollbacks = 0;
3268 exec->nbRollbacks = 0;
3269 exec->rollbacks = NULL;
3270 exec->status = 0;
3271 exec->comp = comp;
3272 exec->state = comp->states[0];
3273 exec->transno = 0;
3274 exec->transcount = 0;
3275 exec->inputStack = NULL;
3276 exec->inputStackMax = 0;
3277 if (comp->nbCounters > 0) {
3278 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3279 if (exec->counts == NULL) {
3280 xmlRegexpErrMemory(NULL, "running regexp");
3281 return(-1);
3282 }
3283 memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3284 } else
3285 exec->counts = NULL;
3286 while ((exec->status == 0) && (exec->state != NULL) &&
3287 ((exec->inputString[exec->index] != 0) ||
3288 ((exec->state != NULL) &&
3289 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3290 xmlRegTransPtr trans;
3291 xmlRegAtomPtr atom;
3292
3293 /*
3294 * If end of input on non-terminal state, rollback, however we may
3295 * still have epsilon like transition for counted transitions
3296 * on counters, in that case don't break too early. Additionally,
3297 * if we are working on a range like "AB{0,2}", where B is not present,
3298 * we don't want to break.
3299 */
3300 len = 1;
3301 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3302 /*
3303 * if there is a transition, we must check if
3304 * atom allows minOccurs of 0
3305 */
3306 if (exec->transno < exec->state->nbTrans) {
3307 trans = &exec->state->trans[exec->transno];
3308 if (trans->to >=0) {
3309 atom = trans->atom;
3310 if (!((atom->min == 0) && (atom->max > 0)))
3311 goto rollback;
3312 }
3313 } else
3314 goto rollback;
3315 }
3316
3317 exec->transcount = 0;
3318 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3319 trans = &exec->state->trans[exec->transno];
3320 if (trans->to < 0)
3321 continue;
3322 atom = trans->atom;
3323 ret = 0;
3324 deter = 1;
3325 if (trans->count >= 0) {
3326 int count;
3327 xmlRegCounterPtr counter;
3328
3329 if (exec->counts == NULL) {
3330 exec->status = -1;
3331 goto error;
3332 }
3333 /*
3334 * A counted transition.
3335 */
3336
3337 count = exec->counts[trans->count];
3338 counter = &exec->comp->counters[trans->count];
3339#ifdef DEBUG_REGEXP_EXEC
3340 printf("testing count %d: val %d, min %d, max %d\n",
3341 trans->count, count, counter->min, counter->max);
3342#endif
3343 ret = ((count >= counter->min) && (count <= counter->max));
3344 if ((ret) && (counter->min != counter->max))
3345 deter = 0;
3346 } else if (atom == NULL) {
3347 fprintf(stderr, "epsilon transition left at runtime\n");
3348 exec->status = -2;
3349 break;
3350 } else if (exec->inputString[exec->index] != 0) {
3351 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3352 ret = xmlRegCheckCharacter(atom, codepoint);
3353 if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3354 xmlRegStatePtr to = comp->states[trans->to];
3355
3356 /*
3357 * this is a multiple input sequence
3358 * If there is a counter associated increment it now.
3359 * do not increment if the counter is already over the
3360 * maximum limit in which case get to next transition
3361 */
3362 if (trans->counter >= 0) {
3363 xmlRegCounterPtr counter;
3364
3365 if ((exec->counts == NULL) ||
3366 (exec->comp == NULL) ||
3367 (exec->comp->counters == NULL)) {
3368 exec->status = -1;
3369 goto error;
3370 }
3371 counter = &exec->comp->counters[trans->counter];
3372 if (exec->counts[trans->counter] >= counter->max)
3373 continue; /* for loop on transitions */
3374 }
3375 /* Save before incrementing */
3376 if (exec->state->nbTrans > exec->transno + 1) {
3377 xmlFARegExecSave(exec);
3378 }
3379 if (trans->counter >= 0) {
3380#ifdef DEBUG_REGEXP_EXEC
3381 printf("Increasing count %d\n", trans->counter);
3382#endif
3383 exec->counts[trans->counter]++;
3384 }
3385 exec->transcount = 1;
3386 do {
3387 /*
3388 * Try to progress as much as possible on the input
3389 */
3390 if (exec->transcount == atom->max) {
3391 break;
3392 }
3393 exec->index += len;
3394 /*
3395 * End of input: stop here
3396 */
3397 if (exec->inputString[exec->index] == 0) {
3398 exec->index -= len;
3399 break;
3400 }
3401 if (exec->transcount >= atom->min) {
3402 int transno = exec->transno;
3403 xmlRegStatePtr state = exec->state;
3404
3405 /*
3406 * The transition is acceptable save it
3407 */
3408 exec->transno = -1; /* trick */
3409 exec->state = to;
3410 xmlFARegExecSave(exec);
3411 exec->transno = transno;
3412 exec->state = state;
3413 }
3414 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3415 len);
3416 ret = xmlRegCheckCharacter(atom, codepoint);
3417 exec->transcount++;
3418 } while (ret == 1);
3419 if (exec->transcount < atom->min)
3420 ret = 0;
3421
3422 /*
3423 * If the last check failed but one transition was found
3424 * possible, rollback
3425 */
3426 if (ret < 0)
3427 ret = 0;
3428 if (ret == 0) {
3429 goto rollback;
3430 }
3431 if (trans->counter >= 0) {
3432 if (exec->counts == NULL) {
3433 exec->status = -1;
3434 goto error;
3435 }
3436#ifdef DEBUG_REGEXP_EXEC
3437 printf("Decreasing count %d\n", trans->counter);
3438#endif
3439 exec->counts[trans->counter]--;
3440 }
3441 } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3442 /*
3443 * we don't match on the codepoint, but minOccurs of 0
3444 * says that's ok. Setting len to 0 inhibits stepping
3445 * over the codepoint.
3446 */
3447 exec->transcount = 1;
3448 len = 0;
3449 ret = 1;
3450 }
3451 } else if ((atom->min == 0) && (atom->max > 0)) {
3452 /* another spot to match when minOccurs is 0 */
3453 exec->transcount = 1;
3454 len = 0;
3455 ret = 1;
3456 }
3457 if (ret == 1) {
3458 if ((trans->nd == 1) ||
3459 ((trans->count >= 0) && (deter == 0) &&
3460 (exec->state->nbTrans > exec->transno + 1))) {
3461#ifdef DEBUG_REGEXP_EXEC
3462 if (trans->nd == 1)
3463 printf("Saving on nd transition atom %d for %c at %d\n",
3464 trans->atom->no, codepoint, exec->index);
3465 else
3466 printf("Saving on counted transition count %d for %c at %d\n",
3467 trans->count, codepoint, exec->index);
3468#endif
3469 xmlFARegExecSave(exec);
3470 }
3471 if (trans->counter >= 0) {
3472 xmlRegCounterPtr counter;
3473
3474 /* make sure we don't go over the counter maximum value */
3475 if ((exec->counts == NULL) ||
3476 (exec->comp == NULL) ||
3477 (exec->comp->counters == NULL)) {
3478 exec->status = -1;
3479 goto error;
3480 }
3481 counter = &exec->comp->counters[trans->counter];
3482 if (exec->counts[trans->counter] >= counter->max)
3483 continue; /* for loop on transitions */
3484#ifdef DEBUG_REGEXP_EXEC
3485 printf("Increasing count %d\n", trans->counter);
3486#endif
3487 exec->counts[trans->counter]++;
3488 }
3489 if ((trans->count >= 0) &&
3490 (trans->count < REGEXP_ALL_COUNTER)) {
3491 if (exec->counts == NULL) {
3492 exec->status = -1;
3493 goto error;
3494 }
3495#ifdef DEBUG_REGEXP_EXEC
3496 printf("resetting count %d on transition\n",
3497 trans->count);
3498#endif
3499 exec->counts[trans->count] = 0;
3500 }
3501#ifdef DEBUG_REGEXP_EXEC
3502 printf("entering state %d\n", trans->to);
3503#endif
3504 exec->state = comp->states[trans->to];
3505 exec->transno = 0;
3506 if (trans->atom != NULL) {
3507 exec->index += len;
3508 }
3509 goto progress;
3510 } else if (ret < 0) {
3511 exec->status = -4;
3512 break;
3513 }
3514 }
3515 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3516rollback:
3517 /*
3518 * Failed to find a way out
3519 */
3520 exec->determinist = 0;
3521#ifdef DEBUG_REGEXP_EXEC
3522 printf("rollback from state %d on %d:%c\n", exec->state->no,
3523 codepoint,codepoint);
3524#endif
3525 xmlFARegExecRollBack(exec);
3526 }
3527progress:
3528 continue;
3529 }
3530error:
3531 if (exec->rollbacks != NULL) {
3532 if (exec->counts != NULL) {
3533 int i;
3534
3535 for (i = 0;i < exec->maxRollbacks;i++)
3536 if (exec->rollbacks[i].counts != NULL)
3537 xmlFree(exec->rollbacks[i].counts);
3538 }
3539 xmlFree(exec->rollbacks);
3540 }
3541 if (exec->state == NULL)
3542 return(-1);
3543 if (exec->counts != NULL)
3544 xmlFree(exec->counts);
3545 if (exec->status == 0)
3546 return(1);
3547 if (exec->status == -1) {
3548 if (exec->nbPush > MAX_PUSH)
3549 return(-1);
3550 return(0);
3551 }
3552 return(exec->status);
3553}
3554
3555/************************************************************************
3556 * *
3557 * Progressive interface to the verifier one atom at a time *
3558 * *
3559 ************************************************************************/
3560#ifdef DEBUG_ERR
3561static void testerr(xmlRegExecCtxtPtr exec);
3562#endif
3563
3575xmlRegExecCtxtPtr
3576xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3577 xmlRegExecCtxtPtr exec;
3578
3579 if (comp == NULL)
3580 return(NULL);
3581 if ((comp->compact == NULL) && (comp->states == NULL))
3582 return(NULL);
3583 exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3584 if (exec == NULL) {
3585 xmlRegexpErrMemory(NULL, "creating execution context");
3586 return(NULL);
3587 }
3588 memset(exec, 0, sizeof(xmlRegExecCtxt));
3589 exec->inputString = NULL;
3590 exec->index = 0;
3591 exec->determinist = 1;
3592 exec->maxRollbacks = 0;
3593 exec->nbRollbacks = 0;
3594 exec->rollbacks = NULL;
3595 exec->status = 0;
3596 exec->comp = comp;
3597 if (comp->compact == NULL)
3598 exec->state = comp->states[0];
3599 exec->transno = 0;
3600 exec->transcount = 0;
3601 exec->callback = callback;
3602 exec->data = data;
3603 if (comp->nbCounters > 0) {
3604 /*
3605 * For error handling, exec->counts is allocated twice the size
3606 * the second half is used to store the data in case of rollback
3607 */
3608 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3609 * 2);
3610 if (exec->counts == NULL) {
3611 xmlRegexpErrMemory(NULL, "creating execution context");
3612 xmlFree(exec);
3613 return(NULL);
3614 }
3615 memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3616 exec->errCounts = &exec->counts[comp->nbCounters];
3617 } else {
3618 exec->counts = NULL;
3619 exec->errCounts = NULL;
3620 }
3621 exec->inputStackMax = 0;
3622 exec->inputStackNr = 0;
3623 exec->inputStack = NULL;
3624 exec->errStateNo = -1;
3625 exec->errString = NULL;
3626 exec->nbPush = 0;
3627 return(exec);
3628}
3629
3636void
3637xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3638 if (exec == NULL)
3639 return;
3640
3641 if (exec->rollbacks != NULL) {
3642 if (exec->counts != NULL) {
3643 int i;
3644
3645 for (i = 0;i < exec->maxRollbacks;i++)
3646 if (exec->rollbacks[i].counts != NULL)
3647 xmlFree(exec->rollbacks[i].counts);
3648 }
3649 xmlFree(exec->rollbacks);
3650 }
3651 if (exec->counts != NULL)
3652 xmlFree(exec->counts);
3653 if (exec->inputStack != NULL) {
3654 int i;
3655
3656 for (i = 0;i < exec->inputStackNr;i++) {
3657 if (exec->inputStack[i].value != NULL)
3658 xmlFree(exec->inputStack[i].value);
3659 }
3660 xmlFree(exec->inputStack);
3661 }
3662 if (exec->errString != NULL)
3663 xmlFree(exec->errString);
3664 xmlFree(exec);
3665}
3666
3667static void
3668xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3669 void *data) {
3670#ifdef DEBUG_PUSH
3671 printf("saving value: %d:%s\n", exec->inputStackNr, value);
3672#endif
3673 if (exec->inputStackMax == 0) {
3674 exec->inputStackMax = 4;
3675 exec->inputStack = (xmlRegInputTokenPtr)
3676 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3677 if (exec->inputStack == NULL) {
3678 xmlRegexpErrMemory(NULL, "pushing input string");
3679 exec->inputStackMax = 0;
3680 return;
3681 }
3682 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3683 xmlRegInputTokenPtr tmp;
3684
3685 exec->inputStackMax *= 2;
3686 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3687 exec->inputStackMax * sizeof(xmlRegInputToken));
3688 if (tmp == NULL) {
3689 xmlRegexpErrMemory(NULL, "pushing input string");
3690 exec->inputStackMax /= 2;
3691 return;
3692 }
3693 exec->inputStack = tmp;
3694 }
3695 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3696 exec->inputStack[exec->inputStackNr].data = data;
3697 exec->inputStackNr++;
3698 exec->inputStack[exec->inputStackNr].value = NULL;
3699 exec->inputStack[exec->inputStackNr].data = NULL;
3700}
3701
3715static int
3716xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3717 if (expStr == valStr) return(1);
3718 if (expStr == NULL) return(0);
3719 if (valStr == NULL) return(0);
3720 do {
3721 /*
3722 * Eval if we have a wildcard for the current item.
3723 */
3724 if (*expStr != *valStr) {
3725 /* if one of them starts with a wildcard make valStr be it */
3726 if (*valStr == '*') {
3727 const xmlChar *tmp;
3728
3729 tmp = valStr;
3730 valStr = expStr;
3731 expStr = tmp;
3732 }
3733 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3734 do {
3735 if (*valStr == XML_REG_STRING_SEPARATOR)
3736 break;
3737 valStr++;
3738 } while (*valStr != 0);
3739 continue;
3740 } else
3741 return(0);
3742 }
3743 expStr++;
3744 valStr++;
3745 } while (*valStr != 0);
3746 if (*expStr != 0)
3747 return (0);
3748 else
3749 return (1);
3750}
3751
3764static int
3765xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3766 xmlRegexpPtr comp,
3767 const xmlChar *value,
3768 void *data) {
3769 int state = exec->index;
3770 int i, target;
3771
3772 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3773 return(-1);
3774
3775 if (value == NULL) {
3776 /*
3777 * are we at a final state ?
3778 */
3779 if (comp->compact[state * (comp->nbstrings + 1)] ==
3780 XML_REGEXP_FINAL_STATE)
3781 return(1);
3782 return(0);
3783 }
3784
3785#ifdef DEBUG_PUSH
3786 printf("value pushed: %s\n", value);
3787#endif
3788
3789 /*
3790 * Examine all outside transitions from current state
3791 */
3792 for (i = 0;i < comp->nbstrings;i++) {
3793 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3794 if ((target > 0) && (target <= comp->nbstates)) {
3795 target--; /* to avoid 0 */
3796 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3797 exec->index = target;
3798 if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3799 exec->callback(exec->data, value,
3800 comp->transdata[state * comp->nbstrings + i], data);
3801 }
3802#ifdef DEBUG_PUSH
3803 printf("entering state %d\n", target);
3804#endif
3805 if (comp->compact[target * (comp->nbstrings + 1)] ==
3806 XML_REGEXP_SINK_STATE)
3807 goto error;
3808
3809 if (comp->compact[target * (comp->nbstrings + 1)] ==
3810 XML_REGEXP_FINAL_STATE)
3811 return(1);
3812 return(0);
3813 }
3814 }
3815 }
3816 /*
3817 * Failed to find an exit transition out from current state for the
3818 * current token
3819 */
3820#ifdef DEBUG_PUSH
3821 printf("failed to find a transition for %s on state %d\n", value, state);
3822#endif
3823error:
3824 if (exec->errString != NULL)
3825 xmlFree(exec->errString);
3826 exec->errString = xmlStrdup(value);
3827 exec->errStateNo = state;
3828 exec->status = -1;
3829#ifdef DEBUG_ERR
3830 testerr(exec);
3831#endif
3832 return(-1);
3833}
3834
3847static int
3848xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3849 void *data, int compound) {
3850 xmlRegTransPtr trans;
3851 xmlRegAtomPtr atom;
3852 int ret;
3853 int final = 0;
3854 int progress = 1;
3855
3856 if (exec == NULL)
3857 return(-1);
3858 if (exec->comp == NULL)
3859 return(-1);
3860 if (exec->status != 0)
3861 return(exec->status);
3862
3863 if (exec->comp->compact != NULL)
3864 return(xmlRegCompactPushString(exec, exec->comp, value, data));
3865
3866 if (value == NULL) {
3867 if (exec->state->type == XML_REGEXP_FINAL_STATE)
3868 return(1);
3869 final = 1;
3870 }
3871
3872#ifdef DEBUG_PUSH
3873 printf("value pushed: %s\n", value);
3874#endif
3875 /*
3876 * If we have an active rollback stack push the new value there
3877 * and get back to where we were left
3878 */
3879 if ((value != NULL) && (exec->inputStackNr > 0)) {
3880 xmlFARegExecSaveInputString(exec, value, data);
3881 value = exec->inputStack[exec->index].value;
3882 data = exec->inputStack[exec->index].data;
3883#ifdef DEBUG_PUSH
3884 printf("value loaded: %s\n", value);
3885#endif
3886 }
3887
3888 while ((exec->status == 0) &&
3889 ((value != NULL) ||
3890 ((final == 1) &&
3891 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3892
3893 /*
3894 * End of input on non-terminal state, rollback, however we may
3895 * still have epsilon like transition for counted transitions
3896 * on counters, in that case don't break too early.
3897 */
3898 if ((value == NULL) && (exec->counts == NULL))
3899 goto rollback;
3900
3901 exec->transcount = 0;
3902 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3903 trans = &exec->state->trans[exec->transno];
3904 if (trans->to < 0)
3905 continue;
3906 atom = trans->atom;
3907 ret = 0;
3908 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3909 int i;
3910 int count;
3911 xmlRegTransPtr t;
3912 xmlRegCounterPtr counter;
3913
3914 ret = 0;
3915
3916#ifdef DEBUG_PUSH
3917 printf("testing all lax %d\n", trans->count);
3918#endif
3919 /*
3920 * Check all counted transitions from the current state
3921 */
3922 if ((value == NULL) && (final)) {
3923 ret = 1;
3924 } else if (value != NULL) {
3925 for (i = 0;i < exec->state->nbTrans;i++) {
3926 t = &exec->state->trans[i];
3927 if ((t->counter < 0) || (t == trans))
3928 continue;
3929 counter = &exec->comp->counters[t->counter];
3930 count = exec->counts[t->counter];
3931 if ((count < counter->max) &&
3932 (t->atom != NULL) &&
3933 (xmlStrEqual(value, t->atom->valuep))) {
3934 ret = 0;
3935 break;
3936 }
3937 if ((count >= counter->min) &&
3938 (count < counter->max) &&
3939 (t->atom != NULL) &&
3940 (xmlStrEqual(value, t->atom->valuep))) {
3941 ret = 1;
3942 break;
3943 }
3944 }
3945 }
3946 } else if (trans->count == REGEXP_ALL_COUNTER) {
3947 int i;
3948 int count;
3949 xmlRegTransPtr t;
3950 xmlRegCounterPtr counter;
3951
3952 ret = 1;
3953
3954#ifdef DEBUG_PUSH
3955 printf("testing all %d\n", trans->count);
3956#endif
3957 /*
3958 * Check all counted transitions from the current state
3959 */
3960 for (i = 0;i < exec->state->nbTrans;i++) {
3961 t = &exec->state->trans[i];
3962 if ((t->counter < 0) || (t == trans))
3963 continue;
3964 counter = &exec->comp->counters[t->counter];
3965 count = exec->counts[t->counter];
3966 if ((count < counter->min) || (count > counter->max)) {
3967 ret = 0;
3968 break;
3969 }
3970 }
3971 } else if (trans->count >= 0) {
3972 int count;
3973 xmlRegCounterPtr counter;
3974
3975 /*
3976 * A counted transition.
3977 */
3978
3979 count = exec->counts[trans->count];
3980 counter = &exec->comp->counters[trans->count];
3981#ifdef DEBUG_PUSH
3982 printf("testing count %d: val %d, min %d, max %d\n",
3983 trans->count, count, counter->min, counter->max);
3984#endif
3985 ret = ((count >= counter->min) && (count <= counter->max));
3986 } else if (atom == NULL) {
3987 fprintf(stderr, "epsilon transition left at runtime\n");
3988 exec->status = -2;
3989 break;
3990 } else if (value != NULL) {
3991 ret = xmlRegStrEqualWildcard(atom->valuep, value);
3992 if (atom->neg) {
3993 ret = !ret;
3994 if (!compound)
3995 ret = 0;
3996 }
3997 if ((ret == 1) && (trans->counter >= 0)) {
3998 xmlRegCounterPtr counter;
3999 int count;
4000
4001 count = exec->counts[trans->counter];
4002 counter = &exec->comp->counters[trans->counter];
4003 if (count >= counter->max)
4004 ret = 0;
4005 }
4006
4007 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4008 xmlRegStatePtr to = exec->comp->states[trans->to];
4009
4010 /*
4011 * this is a multiple input sequence
4012 */
4013 if (exec->state->nbTrans > exec->transno + 1) {
4014 if (exec->inputStackNr <= 0) {
4015 xmlFARegExecSaveInputString(exec, value, data);
4016 }
4017 xmlFARegExecSave(exec);
4018 }
4019 exec->transcount = 1;
4020 do {
4021 /*
4022 * Try to progress as much as possible on the input
4023 */
4024 if (exec->transcount == atom->max) {
4025 break;
4026 }
4027 exec->index++;
4028 value = exec->inputStack[exec->index].value;
4029 data = exec->inputStack[exec->index].data;
4030#ifdef DEBUG_PUSH
4031 printf("value loaded: %s\n", value);
4032#endif
4033
4034 /*
4035 * End of input: stop here
4036 */
4037 if (value == NULL) {
4038 exec->index --;
4039 break;
4040 }
4041 if (exec->transcount >= atom->min) {
4042 int transno = exec->transno;
4043 xmlRegStatePtr state = exec->state;
4044
4045 /*
4046 * The transition is acceptable save it
4047 */
4048 exec->transno = -1; /* trick */
4049 exec->state = to;
4050 if (exec->inputStackNr <= 0) {
4051 xmlFARegExecSaveInputString(exec, value, data);
4052 }
4053 xmlFARegExecSave(exec);
4054 exec->transno = transno;
4055 exec->state = state;
4056 }
4057 ret = xmlStrEqual(value, atom->valuep);
4058 exec->transcount++;
4059 } while (ret == 1);
4060 if (exec->transcount < atom->min)
4061 ret = 0;
4062
4063 /*
4064 * If the last check failed but one transition was found
4065 * possible, rollback
4066 */
4067 if (ret < 0)
4068 ret = 0;
4069 if (ret == 0) {
4070 goto rollback;
4071 }
4072 }
4073 }
4074 if (ret == 1) {
4075 if ((exec->callback != NULL) && (atom != NULL) &&
4076 (data != NULL)) {
4077 exec->callback(exec->data, atom->valuep,
4078 atom->data, data);
4079 }
4080 if (exec->state->nbTrans > exec->transno + 1) {
4081 if (exec->inputStackNr <= 0) {
4082 xmlFARegExecSaveInputString(exec, value, data);
4083 }
4084 xmlFARegExecSave(exec);
4085 }
4086 if (trans->counter >= 0) {
4087#ifdef DEBUG_PUSH
4088 printf("Increasing count %d\n", trans->counter);
4089#endif
4090 exec->counts[trans->counter]++;
4091 }
4092 if ((trans->count >= 0) &&
4093 (trans->count < REGEXP_ALL_COUNTER)) {
4094#ifdef DEBUG_REGEXP_EXEC
4095 printf("resetting count %d on transition\n",
4096 trans->count);
4097#endif
4098 exec->counts[trans->count] = 0;
4099 }
4100#ifdef DEBUG_PUSH
4101 printf("entering state %d\n", trans->to);
4102#endif
4103 if ((exec->comp->states[trans->to] != NULL) &&
4104 (exec->comp->states[trans->to]->type ==
4105 XML_REGEXP_SINK_STATE)) {
4106 /*
4107 * entering a sink state, save the current state as error
4108 * state.
4109 */
4110 if (exec->errString != NULL)
4111 xmlFree(exec->errString);
4112 exec->errString = xmlStrdup(value);
4113 exec->errState = exec->state;
4114 memcpy(exec->errCounts, exec->counts,
4115 exec->comp->nbCounters * sizeof(int));
4116 }
4117 exec->state = exec->comp->states[trans->to];
4118 exec->transno = 0;
4119 if (trans->atom != NULL) {
4120 if (exec->inputStack != NULL) {
4121 exec->index++;
4122 if (exec->index < exec->inputStackNr) {
4123 value = exec->inputStack[exec->index].value;
4124 data = exec->inputStack[exec->index].data;
4125#ifdef DEBUG_PUSH
4126 printf("value loaded: %s\n", value);
4127#endif
4128 } else {
4129 value = NULL;
4130 data = NULL;
4131#ifdef DEBUG_PUSH
4132 printf("end of input\n");
4133#endif
4134 }
4135 } else {
4136 value = NULL;
4137 data = NULL;
4138#ifdef DEBUG_PUSH
4139 printf("end of input\n");
4140#endif
4141 }
4142 }
4143 goto progress;
4144 } else if (ret < 0) {
4145 exec->status = -4;
4146 break;
4147 }
4148 }
4149 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4150rollback:
4151 /*
4152 * if we didn't yet rollback on the current input
4153 * store the current state as the error state.
4154 */
4155 if ((progress) && (exec->state != NULL) &&
4156 (exec->state->type != XML_REGEXP_SINK_STATE)) {
4157 progress = 0;
4158 if (exec->errString != NULL)
4159 xmlFree(exec->errString);
4160 exec->errString = xmlStrdup(value);
4161 exec->errState = exec->state;
4162 if (exec->comp->nbCounters)
4163 memcpy(exec->errCounts, exec->counts,
4164 exec->comp->nbCounters * sizeof(int));
4165 }
4166
4167 /*
4168 * Failed to find a way out
4169 */
4170 exec->determinist = 0;
4171 xmlFARegExecRollBack(exec);
4172 if ((exec->inputStack != NULL ) && (exec->status == 0)) {
4173 value = exec->inputStack[exec->index].value;
4174 data = exec->inputStack[exec->index].data;
4175#ifdef DEBUG_PUSH
4176 printf("value loaded: %s\n", value);
4177#endif
4178 }
4179 }
4180 continue;
4181progress:
4182 progress = 1;
4183 continue;
4184 }
4185 if (exec->status == 0) {
4186 return(exec->state->type == XML_REGEXP_FINAL_STATE);
4187 }
4188#ifdef DEBUG_ERR
4189 if (exec->status < 0) {
4190 testerr(exec);
4191 }
4192#endif
4193 return(exec->status);
4194}
4195
4207int
4208xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4209 void *data) {
4210 return(xmlRegExecPushStringInternal(exec, value, data, 0));
4211}
4212
4225int
4226xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4227 const xmlChar *value2, void *data) {
4228 xmlChar buf[150];
4229 int lenn, lenp, ret;
4230 xmlChar *str;
4231
4232 if (exec == NULL)
4233 return(-1);
4234 if (exec->comp == NULL)
4235 return(-1);
4236 if (exec->status != 0)
4237 return(exec->status);
4238
4239 if (value2 == NULL)
4240 return(xmlRegExecPushString(exec, value, data));
4241
4242 lenn = strlen((char *) value2);
4243 lenp = strlen((char *) value);
4244
4245 if (150 < lenn + lenp + 2) {
4246 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4247 if (str == NULL) {
4248 exec->status = -1;
4249 return(-1);
4250 }
4251 } else {
4252 str = buf;
4253 }
4254 memcpy(&str[0], value, lenp);
4255 str[lenp] = XML_REG_STRING_SEPARATOR;
4256 memcpy(&str[lenp + 1], value2, lenn);
4257 str[lenn + lenp + 1] = 0;
4258
4259 if (exec->comp->compact != NULL)
4260 ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4261 else
4262 ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4263
4264 if (str != buf)
4265 xmlFree(str);
4266 return(ret);
4267}
4268
4283static int
4284xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4285 int *nbval, int *nbneg,
4286 xmlChar **values, int *terminal) {
4287 int maxval;
4288 int nb = 0;
4289
4290 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4291 (values == NULL) || (*nbval <= 0))
4292 return(-1);
4293
4294 maxval = *nbval;
4295 *nbval = 0;
4296 *nbneg = 0;
4297 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4298 xmlRegexpPtr comp;
4299 int target, i, state;
4300
4301 comp = exec->comp;
4302
4303 if (err) {
4304 if (exec->errStateNo == -1) return(-1);
4305 state = exec->errStateNo;
4306 } else {
4307 state = exec->index;
4308 }
4309 if (terminal != NULL) {
4310 if (comp->compact[state * (comp->nbstrings + 1)] ==
4311 XML_REGEXP_FINAL_STATE)
4312 *terminal = 1;
4313 else
4314 *terminal = 0;
4315 }
4316 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4317 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4318 if ((target > 0) && (target <= comp->nbstates) &&
4319 (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4320 XML_REGEXP_SINK_STATE)) {
4321 values[nb++] = comp->stringMap[i];
4322 (*nbval)++;
4323 }
4324 }
4325 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4326 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4327 if ((target > 0) && (target <= comp->nbstates) &&
4328 (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4329 XML_REGEXP_SINK_STATE)) {
4330 values[nb++] = comp->stringMap[i];
4331 (*nbneg)++;
4332 }
4333 }
4334 } else {
4335 int transno;
4336 xmlRegTransPtr trans;
4337 xmlRegAtomPtr atom;
4338 xmlRegStatePtr state;
4339
4340 if (terminal != NULL) {
4341 if (exec->state->type == XML_REGEXP_FINAL_STATE)
4342 *terminal = 1;
4343 else
4344 *terminal = 0;
4345 }
4346
4347 if (err) {
4348 if (exec->errState == NULL) return(-1);
4349 state = exec->errState;
4350 } else {
4351 if (exec->state == NULL) return(-1);
4352 state = exec->state;
4353 }
4354 for (transno = 0;
4355 (transno < state->nbTrans) && (nb < maxval);
4356 transno++) {
4357 trans = &state->trans[transno];
4358 if (trans->to < 0)
4359 continue;
4360 atom = trans->atom;
4361 if ((atom == NULL) || (atom->valuep == NULL))
4362 continue;
4363 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4364 /* this should not be reached but ... */
4365 TODO;
4366 } else if (trans->count == REGEXP_ALL_COUNTER) {
4367 /* this should not be reached but ... */
4368 TODO;
4369 } else if (trans->counter >= 0) {
4370 xmlRegCounterPtr counter = NULL;
4371 int count;
4372
4373 if (err)
4374 count = exec->errCounts[trans->counter];
4375 else
4376 count = exec->counts[trans->counter];
4377 if (exec->comp != NULL)
4378 counter = &exec->comp->counters[trans->counter];
4379 if ((counter == NULL) || (count < counter->max)) {
4380 if (atom->neg)
4381 values[nb++] = (xmlChar *) atom->valuep2;
4382 else
4383 values[nb++] = (xmlChar *) atom->valuep;
4384 (*nbval)++;
4385 }
4386 } else {
4387 if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
4388 (exec->comp->states[trans->to]->type !=
4389 XML_REGEXP_SINK_STATE)) {
4390 if (atom->neg)
4391 values[nb++] = (xmlChar *) atom->valuep2;
4392 else
4393 values[nb++] = (xmlChar *) atom->valuep;
4394 (*nbval)++;
4395 }
4396 }
4397 }
4398 for (transno = 0;
4399 (transno < state->nbTrans) && (nb < maxval);
4400 transno++) {
4401 trans = &state->trans[transno];
4402 if (trans->to < 0)
4403 continue;
4404 atom = trans->atom;
4405 if ((atom == NULL) || (atom->valuep == NULL))
4406 continue;
4407 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4408 continue;
4409 } else if (trans->count == REGEXP_ALL_COUNTER) {
4410 continue;
4411 } else if (trans->counter >= 0) {
4412 continue;
4413 } else {
4414 if ((exec->comp->states[trans->to] != NULL) &&
4415 (exec->comp->states[trans->to]->type ==
4416 XML_REGEXP_SINK_STATE)) {
4417 if (atom->neg)
4418 values[nb++] = (xmlChar *) atom->valuep2;
4419 else
4420 values[nb++] = (xmlChar *) atom->valuep;
4421 (*nbneg)++;
4422 }
4423 }
4424 }
4425 }
4426 return(0);
4427}
4428
4446int
4447xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4448 xmlChar **values, int *terminal) {
4449 return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4450}
4451
4471int
4472xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4473 int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4474 if (exec == NULL)
4475 return(-1);
4476 if (string != NULL) {
4477 if (exec->status != 0)
4478 *string = exec->errString;
4479 else
4480 *string = NULL;
4481 }
4482 return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4483}
4484
4485#ifdef DEBUG_ERR
4486static void testerr(xmlRegExecCtxtPtr exec) {
4487 const xmlChar *string;
4488 xmlChar *values[5];
4489 int nb = 5;
4490 int nbneg;
4491 int terminal;
4492 xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
4493}
4494#endif
4495
4496#if 0
4497static int
4498xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4499 xmlRegTransPtr trans;
4500 xmlRegAtomPtr atom;
4501 int ret;
4502 int codepoint, len;
4503
4504 if (exec == NULL)
4505 return(-1);
4506 if (exec->status != 0)
4507 return(exec->status);
4508
4509 while ((exec->status == 0) &&
4510 ((exec->inputString[exec->index] != 0) ||
4511 (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4512
4513 /*
4514 * End of input on non-terminal state, rollback, however we may
4515 * still have epsilon like transition for counted transitions
4516 * on counters, in that case don't break too early.
4517 */
4518 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4519 goto rollback;
4520
4521 exec->transcount = 0;
4522 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4523 trans = &exec->state->trans[exec->transno];
4524 if (trans->to < 0)
4525 continue;
4526 atom = trans->atom;
4527 ret = 0;
4528 if (trans->count >= 0) {
4529 int count;
4530 xmlRegCounterPtr counter;
4531
4532 /*
4533 * A counted transition.
4534 */
4535
4536 count = exec->counts[trans->count];
4537 counter = &exec->comp->counters[trans->count];
4538#ifdef DEBUG_REGEXP_EXEC
4539 printf("testing count %d: val %d, min %d, max %d\n",
4540 trans->count, count, counter->min, counter->max);
4541#endif
4542 ret = ((count >= counter->min) && (count <= counter->max));
4543 } else if (atom == NULL) {
4544 fprintf(stderr, "epsilon transition left at runtime\n");
4545 exec->status = -2;
4546 break;
4547 } else if (exec->inputString[exec->index] != 0) {
4548 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4549 ret = xmlRegCheckCharacter(atom, codepoint);
4550 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4551 xmlRegStatePtr to = exec->comp->states[trans->to];
4552
4553 /*
4554 * this is a multiple input sequence
4555 */
4556 if (exec->state->nbTrans > exec->transno + 1) {
4557 xmlFARegExecSave(exec);
4558 }
4559 exec->transcount = 1;
4560 do {
4561 /*
4562 * Try to progress as much as possible on the input
4563 */
4564 if (exec->transcount == atom->max) {
4565 break;
4566 }
4567 exec->index += len;
4568 /*
4569 * End of input: stop here
4570 */
4571 if (exec->inputString[exec->index] == 0) {
4572 exec->index -= len;
4573 break;
4574 }
4575 if (exec->transcount >= atom->min) {
4576 int transno = exec->transno;
4577 xmlRegStatePtr state = exec->state;
4578
4579 /*
4580 * The transition is acceptable save it
4581 */
4582 exec->transno = -1; /* trick */
4583 exec->state = to;
4584 xmlFARegExecSave(exec);
4585 exec->transno = transno;
4586 exec->state = state;
4587 }
4588 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4589 len);
4590 ret = xmlRegCheckCharacter(atom, codepoint);
4591 exec->transcount++;
4592 } while (ret == 1);
4593 if (exec->transcount < atom->min)
4594 ret = 0;
4595
4596 /*
4597 * If the last check failed but one transition was found
4598 * possible, rollback
4599 */
4600 if (ret < 0)
4601 ret = 0;
4602 if (ret == 0) {
4603 goto rollback;
4604 }
4605 }
4606 }
4607 if (ret == 1) {
4608 if (exec->state->nbTrans > exec->transno + 1) {
4609 xmlFARegExecSave(exec);
4610 }
4611 /*
4612 * restart count for expressions like this ((abc){2})*
4613 */
4614 if (trans->count >= 0) {
4615#ifdef DEBUG_REGEXP_EXEC
4616 printf("Reset count %d\n", trans->count);
4617#endif
4618 exec->counts[trans->count] = 0;
4619 }
4620 if (trans->counter >= 0) {
4621#ifdef DEBUG_REGEXP_EXEC
4622 printf("Increasing count %d\n", trans->counter);
4623#endif
4624 exec->counts[trans->counter]++;
4625 }
4626#ifdef DEBUG_REGEXP_EXEC
4627 printf("entering state %d\n", trans->to);
4628#endif
4629 exec->state = exec->comp->states[trans->to];
4630 exec->transno = 0;
4631 if (trans->atom != NULL) {
4632 exec->index += len;
4633 }
4634 goto progress;
4635 } else if (ret < 0) {
4636 exec->status = -4;
4637 break;
4638 }
4639 }
4640 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4641rollback:
4642 /*
4643 * Failed to find a way out
4644 */
4645 exec->determinist = 0;
4646 xmlFARegExecRollBack(exec);
4647 }
4648progress:
4649 continue;
4650 }
4651}
4652#endif
4653/************************************************************************
4654 * *
4655 * Parser for the Schemas Datatype Regular Expressions *
4656 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
4657 * *
4658 ************************************************************************/
4659
4666static int
4667xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4668 int cur;
4669 int len;
4670
4671 cur = CUR_SCHAR(ctxt->cur, len);
4672 if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4673 (cur == '*') || (cur == '+') || (cur == '(') ||
4674 (cur == ')') || (cur == '|') || (cur == 0x5B) ||
4675 (cur == 0x5D) || (cur == 0))
4676 return(-1);
4677 return(cur);
4678}
4679
4696static void
4697xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4698 int cur;
4699 xmlRegAtomType type = (xmlRegAtomType) 0;
4700 xmlChar *blockName = NULL;
4701
4702 cur = CUR;
4703 if (cur == 'L') {
4704 NEXT;
4705 cur = CUR;
4706 if (cur == 'u') {
4707 NEXT;
4708 type = XML_REGEXP_LETTER_UPPERCASE;
4709 } else if (cur == 'l') {
4710 NEXT;
4711 type = XML_REGEXP_LETTER_LOWERCASE;
4712 } else if (cur == 't') {
4713 NEXT;
4714 type = XML_REGEXP_LETTER_TITLECASE;
4715 } else if (cur == 'm') {
4716 NEXT;
4717 type = XML_REGEXP_LETTER_MODIFIER;
4718 } else if (cur == 'o') {
4719 NEXT;
4720 type = XML_REGEXP_LETTER_OTHERS;
4721 } else {
4722 type = XML_REGEXP_LETTER;
4723 }
4724 } else if (cur == 'M') {
4725 NEXT;
4726 cur = CUR;
4727 if (cur == 'n') {
4728 NEXT;
4729 /* nonspacing */
4730 type = XML_REGEXP_MARK_NONSPACING;
4731 } else if (cur == 'c') {
4732 NEXT;
4733 /* spacing combining */
4734 type = XML_REGEXP_MARK_SPACECOMBINING;
4735 } else if (cur == 'e') {
4736 NEXT;
4737 /* enclosing */
4738 type = XML_REGEXP_MARK_ENCLOSING;
4739 } else {
4740 /* all marks */
4741 type = XML_REGEXP_MARK;
4742 }
4743 } else if (cur == 'N') {
4744 NEXT;
4745 cur = CUR;
4746 if (cur == 'd') {
4747 NEXT;
4748 /* digital */
4749 type = XML_REGEXP_NUMBER_DECIMAL;
4750 } else if (cur == 'l') {
4751 NEXT;
4752 /* letter */
4753 type = XML_REGEXP_NUMBER_LETTER;
4754 } else if (cur == 'o') {
4755 NEXT;
4756 /* other */
4757 type = XML_REGEXP_NUMBER_OTHERS;
4758 } else {
4759 /* all numbers */
4760 type = XML_REGEXP_NUMBER;
4761 }
4762 } else if (cur == 'P') {
4763 NEXT;
4764 cur = CUR;
4765 if (cur == 'c') {
4766 NEXT;
4767 /* connector */
4768 type = XML_REGEXP_PUNCT_CONNECTOR;
4769 } else if (cur == 'd') {
4770 NEXT;
4771 /* dash */
4772 type = XML_REGEXP_PUNCT_DASH;
4773 } else if (cur == 's') {
4774 NEXT;
4775 /* open */
4776 type = XML_REGEXP_PUNCT_OPEN;
4777 } else if (cur == 'e') {
4778 NEXT;
4779 /* close */
4780 type = XML_REGEXP_PUNCT_CLOSE;
4781 } else if (cur == 'i') {
4782 NEXT;
4783 /* initial quote */
4784 type = XML_REGEXP_PUNCT_INITQUOTE;
4785 } else if (cur == 'f') {
4786 NEXT;
4787 /* final quote */
4788 type = XML_REGEXP_PUNCT_FINQUOTE;
4789 } else if (cur == 'o') {
4790 NEXT;
4791 /* other */
4792 type = XML_REGEXP_PUNCT_OTHERS;
4793 } else {
4794 /* all punctuation */
4795 type = XML_REGEXP_PUNCT;
4796 }
4797 } else if (cur == 'Z') {
4798 NEXT;
4799 cur = CUR;
4800 if (cur == 's') {
4801 NEXT;
4802 /* space */
4803 type = XML_REGEXP_SEPAR_SPACE;
4804 } else if (cur == 'l') {
4805 NEXT;
4806 /* line */
4807 type = XML_REGEXP_SEPAR_LINE;
4808 } else if (cur == 'p') {
4809 NEXT;
4810 /* paragraph */
4811 type = XML_REGEXP_SEPAR_PARA;
4812 } else {
4813 /* all separators */
4814 type = XML_REGEXP_SEPAR;
4815 }
4816 } else if (cur == 'S') {
4817 NEXT;
4818 cur = CUR;
4819 if (cur == 'm') {
4820 NEXT;
4821 type = XML_REGEXP_SYMBOL_MATH;
4822 /* math */
4823 } else if (cur == 'c') {
4824 NEXT;
4825 type = XML_REGEXP_SYMBOL_CURRENCY;
4826 /* currency */
4827 } else if (cur == 'k') {
4828 NEXT;
4829 type = XML_REGEXP_SYMBOL_MODIFIER;
4830 /* modifiers */
4831 } else if (cur == 'o') {
4832 NEXT;
4833 type = XML_REGEXP_SYMBOL_OTHERS;
4834 /* other */
4835 } else {
4836 /* all symbols */
4837 type = XML_REGEXP_SYMBOL;
4838 }
4839 } else if (cur == 'C') {
4840 NEXT;
4841 cur = CUR;
4842 if (cur == 'c') {
4843 NEXT;
4844 /* control */
4845 type = XML_REGEXP_OTHER_CONTROL;
4846 } else if (cur == 'f') {
4847 NEXT;
4848 /* format */
4849 type = XML_REGEXP_OTHER_FORMAT;
4850 } else if (cur == 'o') {
4851 NEXT;
4852 /* private use */
4853 type = XML_REGEXP_OTHER_PRIVATE;
4854 } else if (cur == 'n') {
4855 NEXT;
4856 /* not assigned */
4857 type = XML_REGEXP_OTHER_NA;
4858 } else {
4859 /* all others */
4860 type = XML_REGEXP_OTHER;
4861 }
4862 } else if (cur == 'I') {
4863 const xmlChar *start;
4864 NEXT;
4865 cur = CUR;
4866 if (cur != 's') {
4867 ERROR("IsXXXX expected");
4868 return;
4869 }
4870 NEXT;
4871 start = ctxt->cur;
4872 cur = CUR;
4873 if (((cur >= 'a') && (cur <= 'z')) ||
4874 ((cur >= 'A') && (cur <= 'Z')) ||
4875 ((cur >= '0') && (cur <= '9')) ||
4876 (cur == 0x2D)) {
4877 NEXT;
4878 cur = CUR;
4879 while (((cur >= 'a') && (cur <= 'z')) ||
4880 ((cur >= 'A') && (cur <= 'Z')) ||
4881 ((cur >= '0') && (cur <= '9')) ||
4882 (cur == 0x2D)) {
4883 NEXT;
4884 cur = CUR;
4885 }
4886 }
4887 type = XML_REGEXP_BLOCK_NAME;
4888 blockName = xmlStrndup(start, ctxt->cur - start);
4889 } else {
4890 ERROR("Unknown char property");
4891 return;
4892 }
4893 if (ctxt->atom == NULL) {
4894 ctxt->atom = xmlRegNewAtom(ctxt, type);
4895 if (ctxt->atom != NULL)
4896 ctxt->atom->valuep = blockName;
4897 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4898 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4899 type, 0, 0, blockName);
4900 }
4901}
4902
4903static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
4904{
4905 int val = 0, i, cur;
4906 for (i = 0; i < 4; i++) {
4907 NEXT;
4908 val *= 16;
4909 cur = CUR;
4910 if (cur >= '0' && cur <= '9') {
4911 val += cur - '0';
4912 } else if (cur >= 'A' && cur <= 'F') {
4913 val += cur - 'A' + 10;
4914 } else if (cur >= 'a' && cur <= 'f') {
4915 val += cur - 'a' + 10;
4916 } else {
4917 ERROR("Expecting hex digit");
4918 return -1;
4919 }
4920 }
4921 return val;
4922}
4923
4924static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
4925{
4926 int val = parse_escaped_codeunit(ctxt);
4927 if (0xD800 <= val && val <= 0xDBFF) {
4928 NEXT;
4929 if (CUR == '\\') {
4930 NEXT;
4931 if (CUR == 'u') {
4932 int low = parse_escaped_codeunit(ctxt);
4933 if (0xDC00 <= low && low <= 0xDFFF) {
4934 return (val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
4935 }
4936 }
4937 }
4938 ERROR("Invalid low surrogate pair code unit");
4939 val = -1;
4940 }
4941 return val;
4942}
4943
4954static void
4955xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4956 int cur;
4957
4958 if (CUR == '.') {
4959 if (ctxt->atom == NULL) {
4960 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4961 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4962 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4963 XML_REGEXP_ANYCHAR, 0, 0, NULL);
4964 }
4965 NEXT;
4966 return;
4967 }
4968 if (CUR != '\\') {
4969 ERROR("Escaped sequence: expecting \\");
4970 return;
4971 }
4972 NEXT;
4973 cur = CUR;
4974 if (cur == 'p') {
4975 NEXT;
4976 if (CUR != '{') {
4977 ERROR("Expecting '{'");
4978 return;
4979 }
4980 NEXT;
4981 xmlFAParseCharProp(ctxt);
4982 if (CUR != '}') {
4983 ERROR("Expecting '}'");
4984 return;
4985 }
4986 NEXT;
4987 } else if (cur == 'P') {
4988 NEXT;
4989 if (CUR != '{') {
4990 ERROR("Expecting '{'");
4991 return;
4992 }
4993 NEXT;
4994 xmlFAParseCharProp(ctxt);
4995 if (ctxt->atom != NULL)
4996 ctxt->atom->neg = 1;
4997 if (CUR != '}') {
4998 ERROR("Expecting '}'");
4999 return;
5000 }
5001 NEXT;
5002 } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
5003 (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
5004 (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
5005 (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
5006 (cur == 0x5E) ||
5007
5008 /* Non-standard escape sequences:
5009 * Java 1.8|.NET Core 3.1|MSXML 6 */
5010 (cur == '!') || /* + | + | + */
5011 (cur == '"') || /* + | + | + */
5012 (cur == '#') || /* + | + | + */
5013 (cur == '$') || /* + | + | + */
5014 (cur == '%') || /* + | + | + */
5015 (cur == ',') || /* + | + | + */
5016 (cur == '/') || /* + | + | + */
5017 (cur == ':') || /* + | + | + */
5018 (cur == ';') || /* + | + | + */
5019 (cur == '=') || /* + | + | + */
5020 (cur == '>') || /* | + | + */
5021 (cur == '@') || /* + | + | + */
5022 (cur == '`') || /* + | + | + */
5023 (cur == '~') || /* + | + | + */
5024 (cur == 'u')) { /* | + | + */
5025 if (ctxt->atom == NULL) {
5026 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5027 if (ctxt->atom != NULL) {
5028 switch (cur) {
5029 case 'n':
5030 ctxt->atom->codepoint = '\n';
5031 break;
5032 case 'r':
5033 ctxt->atom->codepoint = '\r';
5034 break;
5035 case 't':
5036 ctxt->atom->codepoint = '\t';
5037 break;
5038 case 'u':
5039 cur = parse_escaped_codepoint(ctxt);
5040 if (cur < 0) {
5041 return;
5042 }
5043 ctxt->atom->codepoint = cur;
5044 break;
5045 default:
5046 ctxt->atom->codepoint = cur;
5047 }
5048 }
5049 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5050 switch (cur) {
5051 case 'n':
5052 cur = '\n';
5053 break;
5054 case 'r':
5055 cur = '\r';
5056 break;
5057 case 't':
5058 cur = '\t';
5059 break;
5060 }
5061 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5062 XML_REGEXP_CHARVAL, cur, cur, NULL);
5063 }
5064 NEXT;
5065 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
5066 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
5067 (cur == 'w') || (cur == 'W')) {
5068 xmlRegAtomType type = XML_REGEXP_ANYSPACE;
5069
5070 switch (cur) {
5071 case 's':
5072 type = XML_REGEXP_ANYSPACE;
5073 break;
5074 case 'S':
5075 type = XML_REGEXP_NOTSPACE;
5076 break;
5077 case 'i':
5078 type = XML_REGEXP_INITNAME;
5079 break;
5080 case 'I':
5081 type = XML_REGEXP_NOTINITNAME;
5082 break;
5083 case 'c':
5084 type = XML_REGEXP_NAMECHAR;
5085 break;
5086 case 'C':
5087 type = XML_REGEXP_NOTNAMECHAR;
5088 break;
5089 case 'd':
5090 type = XML_REGEXP_DECIMAL;
5091 break;
5092 case 'D':
5093 type = XML_REGEXP_NOTDECIMAL;
5094 break;
5095 case 'w':
5096 type = XML_REGEXP_REALCHAR;
5097 break;
5098 case 'W':
5099 type = XML_REGEXP_NOTREALCHAR;
5100 break;
5101 }
5102 NEXT;
5103 if (ctxt->atom == NULL) {
5104 ctxt->atom = xmlRegNewAtom(ctxt, type);
5105 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
5106 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5107 type, 0, 0, NULL);
5108 }
5109 } else {
5110 ERROR("Wrong escape sequence, misuse of character '\\'");
5111 }
5112}
5113
5124static void
5125xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
5126 int cur, len;
5127 int start = -1;
5128 int end = -1;
5129
5130 if (CUR == '\0') {
5131 ERROR("Expecting ']'");
5132 return;
5133 }
5134
5135 cur = CUR;
5136 if (cur == '\\') {
5137 NEXT;
5138 cur = CUR;
5139 switch (cur) {
5140 case 'n': start = 0xA; break;
5141 case 'r': start = 0xD; break;
5142 case 't': start = 0x9; break;
5143 case '\\': case '|': case '.': case '-': case '^': case '?':
5144 case '*': case '+': case '{': case '}': case '(': case ')':
5145 case '[': case ']':
5146 start = cur; break;
5147 default:
5148 ERROR("Invalid escape value");
5149 return;
5150 }
5151 end = start;
5152 len = 1;
5153 } else if ((cur != 0x5B) && (cur != 0x5D)) {
5154 end = start = CUR_SCHAR(ctxt->cur, len);
5155 } else {
5156 ERROR("Expecting a char range");
5157 return;
5158 }
5159 /*
5160 * Since we are "inside" a range, we can assume ctxt->cur is past
5161 * the start of ctxt->string, and PREV should be safe
5162 */
5163 if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
5164 NEXTL(len);
5165 return;
5166 }
5167 NEXTL(len);
5168 cur = CUR;
5169 if ((cur != '-') || (NXT(1) == '[') || (NXT(1) == ']')) {
5170 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5171 XML_REGEXP_CHARVAL, start, end, NULL);
5172 return;
5173 }
5174 NEXT;
5175 cur = CUR;
5176 if (cur == '\\') {
5177 NEXT;
5178 cur = CUR;
5179 switch (cur) {
5180 case 'n': end = 0xA; break;
5181 case 'r': end = 0xD; break;
5182 case 't': end = 0x9; break;
5183 case '\\': case '|': case '.': case '-': case '^': case '?':
5184 case '*': case '+': case '{': case '}': case '(': case ')':
5185 case '[': case ']':
5186 end = cur; break;
5187 default:
5188 ERROR("Invalid escape value");
5189 return;
5190 }
5191 len = 1;
5192 } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) {
5193 end = CUR_SCHAR(ctxt->cur, len);
5194 } else {
5195 ERROR("Expecting the end of a char range");
5196 return;
5197 }
5198
5199 /* TODO check that the values are acceptable character ranges for XML */
5200 if (end < start) {
5201 ERROR("End of range is before start of range");
5202 } else {
5203 NEXTL(len);
5204 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5205 XML_REGEXP_CHARVAL, start, end, NULL);
5206 }
5207 return;
5208}
5209
5216static void
5217xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5218 do {
5219 if (CUR == '\\') {
5220 xmlFAParseCharClassEsc(ctxt);
5221 } else {
5222 xmlFAParseCharRange(ctxt);
5223 }
5224 } while ((CUR != ']') && (CUR != '-') &&
5225 (CUR != 0) && (ctxt->error == 0));
5226}
5227
5237static void
5238xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5239 int neg = ctxt->neg;
5240
5241 if (CUR == '^') {
5242 NEXT;
5243 ctxt->neg = !ctxt->neg;
5244 xmlFAParsePosCharGroup(ctxt);
5245 ctxt->neg = neg;
5246 }
5247 while ((CUR != ']') && (ctxt->error == 0)) {
5248 if ((CUR == '-') && (NXT(1) == '[')) {
5249 NEXT; /* eat the '-' */
5250 NEXT; /* eat the '[' */
5251 ctxt->neg = 2;
5252 xmlFAParseCharGroup(ctxt);
5253 ctxt->neg = neg;
5254 if (CUR == ']') {
5255 NEXT;
5256 } else {
5257 ERROR("charClassExpr: ']' expected");
5258 }
5259 break;
5260 } else {
5261 xmlFAParsePosCharGroup(ctxt);
5262 }
5263 }
5264}
5265
5273static void
5274xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5275 if (CUR == '[') {
5276 NEXT;
5277 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5278 if (ctxt->atom == NULL)
5279 return;
5280 xmlFAParseCharGroup(ctxt);
5281 if (CUR == ']') {
5282 NEXT;
5283 } else {
5284 ERROR("xmlFAParseCharClass: ']' expected");
5285 }
5286 } else {
5287 xmlFAParseCharClassEsc(ctxt);
5288 }
5289}
5290
5299static int
5300xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5301 int ret = 0;
5302 int ok = 0;
5303 int overflow = 0;
5304
5305 while ((CUR >= '0') && (CUR <= '9')) {
5306 if (ret > INT_MAX / 10) {
5307 overflow = 1;
5308 } else {
5309 int digit = CUR - '0';
5310
5311 ret *= 10;
5312 if (ret > INT_MAX - digit)
5313 overflow = 1;
5314 else
5315 ret += digit;
5316 }
5317 ok = 1;
5318 NEXT;
5319 }
5320 if ((ok != 1) || (overflow == 1)) {
5321 return(-1);
5322 }
5323 return(ret);
5324}
5325
5336static int
5337xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5338 int cur;
5339
5340 cur = CUR;
5341 if ((cur == '?') || (cur == '*') || (cur == '+')) {
5342 if (ctxt->atom != NULL) {
5343 if (cur == '?')
5344 ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5345 else if (cur == '*')
5346 ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5347 else if (cur == '+')
5348 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5349 }
5350 NEXT;
5351 return(1);
5352 }
5353 if (cur == '{') {
5354 int min = 0, max = 0;
5355
5356 NEXT;
5357 cur = xmlFAParseQuantExact(ctxt);
5358 if (cur >= 0)
5359 min = cur;
5360 else {
5361 ERROR("Improper quantifier");
5362 }
5363 if (CUR == ',') {
5364 NEXT;
5365 if (CUR == '}')
5366 max = INT_MAX;
5367 else {
5368 cur = xmlFAParseQuantExact(ctxt);
5369 if (cur >= 0)
5370 max = cur;
5371 else {
5372 ERROR("Improper quantifier");
5373 }
5374 }
5375 }
5376 if (CUR == '}') {
5377 NEXT;
5378 } else {
5379 ERROR("Unterminated quantifier");
5380 }
5381 if (max == 0)
5382 max = min;
5383 if (ctxt->atom != NULL) {
5384 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5385 ctxt->atom->min = min;
5386 ctxt->atom->max = max;
5387 }
5388 return(1);
5389 }
5390 return(0);
5391}
5392
5399static int
5400xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5401 int codepoint, len;
5402
5403 codepoint = xmlFAIsChar(ctxt);
5404 if (codepoint > 0) {
5405 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5406 if (ctxt->atom == NULL)
5407 return(-1);
5408 codepoint = CUR_SCHAR(ctxt->cur, len);
5409 ctxt->atom->codepoint = codepoint;
5410 NEXTL(len);
5411 return(1);
5412 } else if (CUR == '|') {
5413 return(0);
5414 } else if (CUR == 0) {
5415 return(0);
5416 } else if (CUR == ')') {
5417 return(0);
5418 } else if (CUR == '(') {
5419 xmlRegStatePtr start, oldend, start0;
5420
5421 NEXT;
5422 if (ctxt->depth >= 50) {
5423 ERROR("xmlFAParseAtom: maximum nesting depth exceeded");
5424 return(-1);
5425 }
5426 /*
5427 * this extra Epsilon transition is needed if we count with 0 allowed
5428 * unfortunately this can't be known at that point
5429 */
5430 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5431 start0 = ctxt->state;
5432 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5433 start = ctxt->state;
5434 oldend = ctxt->end;
5435 ctxt->end = NULL;
5436 ctxt->atom = NULL;
5437 ctxt->depth++;
5438 xmlFAParseRegExp(ctxt, 0);
5439 ctxt->depth--;
5440 if (CUR == ')') {
5441 NEXT;
5442 } else {
5443 ERROR("xmlFAParseAtom: expecting ')'");
5444 }
5445 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5446 if (ctxt->atom == NULL)
5447 return(-1);
5448 ctxt->atom->start = start;
5449 ctxt->atom->start0 = start0;
5450 ctxt->atom->stop = ctxt->state;
5451 ctxt->end = oldend;
5452 return(1);
5453 } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5454 xmlFAParseCharClass(ctxt);
5455 return(1);
5456 }
5457 return(0);
5458}
5459
5466static int
5467xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5468 int ret;
5469
5470 ctxt->atom = NULL;
5471 ret = xmlFAParseAtom(ctxt);
5472 if (ret == 0)
5473 return(0);
5474 if (ctxt->atom == NULL) {
5475 ERROR("internal: no atom generated");
5476 }
5477 xmlFAParseQuantifier(ctxt);
5478 return(1);
5479}
5480
5491static int
5492xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5493 xmlRegStatePtr previous;
5494 int ret;
5495
5496 previous = ctxt->state;
5497 ret = xmlFAParsePiece(ctxt);
5498 if (ret == 0) {
5499 /* Empty branch */
5500 xmlFAGenerateEpsilonTransition(ctxt, previous, to);
5501 } else {
5502 if (xmlFAGenerateTransitions(ctxt, previous,
5503 (CUR=='|' || CUR==')' || CUR==0) ? to : NULL, ctxt->atom) < 0)
5504 return(-1);
5505 previous = ctxt->state;
5506 ctxt->atom = NULL;
5507 }
5508 while ((ret != 0) && (ctxt->error == 0)) {
5509 ret = xmlFAParsePiece(ctxt);
5510 if (ret != 0) {
5511 if (xmlFAGenerateTransitions(ctxt, previous,
5512 (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
5513 ctxt->atom) < 0)
5514 return(-1);
5515 previous = ctxt->state;
5516 ctxt->atom = NULL;
5517 }
5518 }
5519 return(0);
5520}
5521
5529static void
5530xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5531 xmlRegStatePtr start, end;
5532
5533 /* if not top start should have been generated by an epsilon trans */
5534 start = ctxt->state;
5535 ctxt->end = NULL;
5536 xmlFAParseBranch(ctxt, NULL);
5537 if (top) {
5538#ifdef DEBUG_REGEXP_GRAPH
5539 printf("State %d is final\n", ctxt->state->no);
5540#endif
5541 ctxt->state->type = XML_REGEXP_FINAL_STATE;
5542 }
5543 if (CUR != '|') {
5544 ctxt->end = ctxt->state;
5545 return;
5546 }
5547 end = ctxt->state;
5548 while ((CUR == '|') && (ctxt->error == 0)) {
5549 NEXT;
5550 ctxt->state = start;
5551 ctxt->end = NULL;
5552 xmlFAParseBranch(ctxt, end);
5553 }
5554 if (!top) {
5555 ctxt->state = end;
5556 ctxt->end = end;
5557 }
5558}
5559
5560/************************************************************************
5561 * *
5562 * The basic API *
5563 * *
5564 ************************************************************************/
5565
5573void
5574xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5575 int i;
5576
5577 if (output == NULL)
5578 return;
5579 fprintf(output, " regexp: ");
5580 if (regexp == NULL) {
5581 fprintf(output, "NULL\n");
5582 return;
5583 }
5584 fprintf(output, "'%s' ", regexp->string);
5585 fprintf(output, "\n");
5586 fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5587 for (i = 0;i < regexp->nbAtoms; i++) {
5588 fprintf(output, " %02d ", i);
5589 xmlRegPrintAtom(output, regexp->atoms[i]);
5590 }
5591 fprintf(output, "%d states:", regexp->nbStates);
5592 fprintf(output, "\n");
5593 for (i = 0;i < regexp->nbStates; i++) {
5594 xmlRegPrintState(output, regexp->states[i]);
5595 }
5596 fprintf(output, "%d counters:\n", regexp->nbCounters);
5597 for (i = 0;i < regexp->nbCounters; i++) {
5598 fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5599 regexp->counters[i].max);
5600 }
5601}
5602
5613xmlRegexpPtr
5614xmlRegexpCompile(const xmlChar *regexp) {
5615 xmlRegexpPtr ret;
5616 xmlRegParserCtxtPtr ctxt;
5617
5618 ctxt = xmlRegNewParserCtxt(regexp);
5619 if (ctxt == NULL)
5620 return(NULL);
5621
5622 /* initialize the parser */
5623 ctxt->end = NULL;
5624 ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5625 xmlRegStatePush(ctxt, ctxt->start);
5626
5627 /* parse the expression building an automata */
5628 xmlFAParseRegExp(ctxt, 1);
5629 if (CUR != 0) {
5630 ERROR("xmlFAParseRegExp: extra characters");
5631 }
5632 if (ctxt->error != 0) {
5633 xmlRegFreeParserCtxt(ctxt);
5634 return(NULL);
5635 }
5636 ctxt->end = ctxt->state;
5637 ctxt->start->type = XML_REGEXP_START_STATE;
5638 ctxt->end->type = XML_REGEXP_FINAL_STATE;
5639
5640 /* remove the Epsilon except for counted transitions */
5641 xmlFAEliminateEpsilonTransitions(ctxt);
5642
5643
5644 if (ctxt->error != 0) {
5645 xmlRegFreeParserCtxt(ctxt);
5646 return(NULL);
5647 }
5648 ret = xmlRegEpxFromParse(ctxt);
5649 xmlRegFreeParserCtxt(ctxt);
5650 return(ret);
5651}
5652
5662int
5663xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5664 if ((comp == NULL) || (content == NULL))
5665 return(-1);
5666 return(xmlFARegExec(comp, content));
5667}
5668
5677int
5678xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5679 xmlAutomataPtr am;
5680 int ret;
5681
5682 if (comp == NULL)
5683 return(-1);
5684 if (comp->determinist != -1)
5685 return(comp->determinist);
5686
5687 am = xmlNewAutomata();
5688 if (am == NULL)
5689 return(-1);
5690 if (am->states != NULL) {
5691 int i;
5692
5693 for (i = 0;i < am->nbStates;i++)
5694 xmlRegFreeState(am->states[i]);
5695 xmlFree(am->states);
5696 }
5697 am->nbAtoms = comp->nbAtoms;
5698 am->atoms = comp->atoms;
5699 am->nbStates = comp->nbStates;
5700 am->states = comp->states;
5701 am->determinist = -1;
5702 am->flags = comp->flags;
5703 ret = xmlFAComputesDeterminism(am);
5704 am->atoms = NULL;
5705 am->states = NULL;
5706 xmlFreeAutomata(am);
5707 comp->determinist = ret;
5708 return(ret);
5709}
5710
5717void
5718xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5719 int i;
5720 if (regexp == NULL)
5721 return;
5722
5723 if (regexp->string != NULL)
5724 xmlFree(regexp->string);
5725 if (regexp->states != NULL) {
5726 for (i = 0;i < regexp->nbStates;i++)
5727 xmlRegFreeState(regexp->states[i]);
5728 xmlFree(regexp->states);
5729 }
5730 if (regexp->atoms != NULL) {
5731 for (i = 0;i < regexp->nbAtoms;i++)
5732 xmlRegFreeAtom(regexp->atoms[i]);
5733 xmlFree(regexp->atoms);
5734 }
5735 if (regexp->counters != NULL)
5736 xmlFree(regexp->counters);
5737 if (regexp->compact != NULL)
5738 xmlFree(regexp->compact);
5739 if (regexp->transdata != NULL)
5740 xmlFree(regexp->transdata);
5741 if (regexp->stringMap != NULL) {
5742 for (i = 0; i < regexp->nbstrings;i++)
5743 xmlFree(regexp->stringMap[i]);
5744 xmlFree(regexp->stringMap);
5745 }
5746
5747 xmlFree(regexp);
5748}
5749
5750#ifdef LIBXML_AUTOMATA_ENABLED
5751/************************************************************************
5752 * *
5753 * The Automata interface *
5754 * *
5755 ************************************************************************/
5756
5764xmlAutomataPtr
5765xmlNewAutomata(void) {
5766 xmlAutomataPtr ctxt;
5767
5768 ctxt = xmlRegNewParserCtxt(NULL);
5769 if (ctxt == NULL)
5770 return(NULL);
5771
5772 /* initialize the parser */
5773 ctxt->end = NULL;
5774 ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5775 if (ctxt->start == NULL) {
5776 xmlFreeAutomata(ctxt);
5777 return(NULL);
5778 }
5779 ctxt->start->type = XML_REGEXP_START_STATE;
5780 if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5781 xmlRegFreeState(ctxt->start);
5782 xmlFreeAutomata(ctxt);
5783 return(NULL);
5784 }
5785 ctxt->flags = 0;
5786
5787 return(ctxt);
5788}
5789
5796void
5797xmlFreeAutomata(xmlAutomataPtr am) {
5798 if (am == NULL)
5799 return;
5800 xmlRegFreeParserCtxt(am);
5801}
5802
5810void
5811xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
5812 if (am == NULL)
5813 return;
5814 am->flags |= flags;
5815}
5816
5825xmlAutomataStatePtr
5826xmlAutomataGetInitState(xmlAutomataPtr am) {
5827 if (am == NULL)
5828 return(NULL);
5829 return(am->start);
5830}
5831
5841int
5842xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5843 if ((am == NULL) || (state == NULL))
5844 return(-1);
5845 state->type = XML_REGEXP_FINAL_STATE;
5846 return(0);
5847}
5848
5863xmlAutomataStatePtr
5864xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5865 xmlAutomataStatePtr to, const xmlChar *token,
5866 void *data) {
5867 xmlRegAtomPtr atom;
5868
5869 if ((am == NULL) || (from == NULL) || (token == NULL))
5870 return(NULL);
5871 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5872 if (atom == NULL)
5873 return(NULL);
5874 atom->data = data;
5875 atom->valuep = xmlStrdup(token);
5876
5877 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5878 xmlRegFreeAtom(atom);
5879 return(NULL);
5880 }
5881 if (to == NULL)
5882 return(am->state);
5883 return(to);
5884}
5885
5901xmlAutomataStatePtr
5902xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5903 xmlAutomataStatePtr to, const xmlChar *token,
5904 const xmlChar *token2, void *data) {
5905 xmlRegAtomPtr atom;
5906
5907 if ((am == NULL) || (from == NULL) || (token == NULL))
5908 return(NULL);
5909 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5910 if (atom == NULL)
5911 return(NULL);
5912 atom->data = data;
5913 if ((token2 == NULL) || (*token2 == 0)) {
5914 atom->valuep = xmlStrdup(token);
5915 } else {
5916 int lenn, lenp;
5917 xmlChar *str;
5918
5919 lenn = strlen((char *) token2);
5920 lenp = strlen((char *) token);
5921
5922 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5923 if (str == NULL) {
5924 xmlRegFreeAtom(atom);
5925 return(NULL);
5926 }
5927 memcpy(&str[0], token, lenp);
5928 str[lenp] = '|';
5929 memcpy(&str[lenp + 1], token2, lenn);
5930 str[lenn + lenp + 1] = 0;
5931
5932 atom->valuep = str;
5933 }
5934
5935 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5936 xmlRegFreeAtom(atom);
5937 return(NULL);
5938 }
5939 if (to == NULL)
5940 return(am->state);
5941 return(to);
5942}
5943
5961xmlAutomataStatePtr
5962xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5963 xmlAutomataStatePtr to, const xmlChar *token,
5964 const xmlChar *token2, void *data) {
5965 xmlRegAtomPtr atom;
5966 xmlChar err_msg[200];
5967
5968 if ((am == NULL) || (from == NULL) || (token == NULL))
5969 return(NULL);
5970 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5971 if (atom == NULL)
5972 return(NULL);
5973 atom->data = data;
5974 atom->neg = 1;
5975 if ((token2 == NULL) || (*token2 == 0)) {
5976 atom->valuep = xmlStrdup(token);
5977 } else {
5978 int lenn, lenp;
5979 xmlChar *str;
5980
5981 lenn = strlen((char *) token2);
5982 lenp = strlen((char *) token);
5983
5984 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5985 if (str == NULL) {
5986 xmlRegFreeAtom(atom);
5987 return(NULL);
5988 }
5989 memcpy(&str[0], token, lenp);
5990 str[lenp] = '|';
5991 memcpy(&str[lenp + 1], token2, lenn);
5992 str[lenn + lenp + 1] = 0;
5993
5994 atom->valuep = str;
5995 }
5996 snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5997 err_msg[199] = 0;
5998 atom->valuep2 = xmlStrdup(err_msg);
5999
6000 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
6001 xmlRegFreeAtom(atom);
6002 return(NULL);
6003 }
6004 am->negs++;
6005 if (to == NULL)
6006 return(am->state);
6007 return(to);
6008}
6009
6028xmlAutomataStatePtr
6029xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6030 xmlAutomataStatePtr to, const xmlChar *token,
6031 const xmlChar *token2,
6032 int min, int max, void *data) {
6033 xmlRegAtomPtr atom;
6034 int counter;
6035
6036 if ((am == NULL) || (from == NULL) || (token == NULL))
6037 return(NULL);
6038 if (min < 0)
6039 return(NULL);
6040 if ((max < min) || (max < 1))
6041 return(NULL);
6042 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6043 if (atom == NULL)
6044 return(NULL);
6045 if ((token2 == NULL) || (*token2 == 0)) {
6046 atom->valuep = xmlStrdup(token);
6047 } else {
6048 int lenn, lenp;
6049 xmlChar *str;
6050
6051 lenn = strlen((char *) token2);
6052 lenp = strlen((char *) token);
6053
6054 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6055 if (str == NULL) {
6056 xmlRegFreeAtom(atom);
6057 return(NULL);
6058 }
6059 memcpy(&str[0], token, lenp);
6060 str[lenp] = '|';
6061 memcpy(&str[lenp + 1], token2, lenn);
6062 str[lenn + lenp + 1] = 0;
6063
6064 atom->valuep = str;
6065 }
6066 atom->data = data;
6067 if (min == 0)
6068 atom->min = 1;
6069 else
6070 atom->min = min;
6071 atom->max = max;
6072
6073 /*
6074 * associate a counter to the transition.
6075 */
6076 counter = xmlRegGetCounter(am);
6077 am->counters[counter].min = min;
6078 am->counters[counter].max = max;
6079
6080 /* xmlFAGenerateTransitions(am, from, to, atom); */
6081 if (to == NULL) {
6082 to = xmlRegNewState(am);
6083 xmlRegStatePush(am, to);
6084 }
6085 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6086 xmlRegAtomPush(am, atom);
6087 am->state = to;
6088
6089 if (to == NULL)
6090 to = am->state;
6091 if (to == NULL)
6092 return(NULL);
6093 if (min == 0)
6094 xmlFAGenerateEpsilonTransition(am, from, to);
6095 return(to);
6096}
6097
6115xmlAutomataStatePtr
6116xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6117 xmlAutomataStatePtr to, const xmlChar *token,
6118 int min, int max, void *data) {
6119 xmlRegAtomPtr atom;
6120 int counter;
6121
6122 if ((am == NULL) || (from == NULL) || (token == NULL))
6123 return(NULL);
6124 if (min < 0)
6125 return(NULL);
6126 if ((max < min) || (max < 1))
6127 return(NULL);
6128 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6129 if (atom == NULL)
6130 return(NULL);
6131 atom->valuep = xmlStrdup(token);
6132 atom->data = data;
6133 if (min == 0)
6134 atom->min = 1;
6135 else
6136 atom->min = min;
6137 atom->max = max;
6138
6139 /*
6140 * associate a counter to the transition.
6141 */
6142 counter = xmlRegGetCounter(am);
6143 am->counters[counter].min = min;
6144 am->counters[counter].max = max;
6145
6146 /* xmlFAGenerateTransitions(am, from, to, atom); */
6147 if (to == NULL) {
6148 to = xmlRegNewState(am);
6149 xmlRegStatePush(am, to);
6150 }
6151 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6152 xmlRegAtomPush(am, atom);
6153 am->state = to;
6154
6155 if (to == NULL)
6156 to = am->state;
6157 if (to == NULL)
6158 return(NULL);
6159 if (min == 0)
6160 xmlFAGenerateEpsilonTransition(am, from, to);
6161 return(to);
6162}
6163
6183xmlAutomataStatePtr
6184xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6185 xmlAutomataStatePtr to, const xmlChar *token,
6186 const xmlChar *token2,
6187 int min, int max, void *data) {
6188 xmlRegAtomPtr atom;
6189 int counter;
6190
6191 if ((am == NULL) || (from == NULL) || (token == NULL))
6192 return(NULL);
6193 if (min < 1)
6194 return(NULL);
6195 if (max < min)
6196 return(NULL);
6197 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6198 if (atom == NULL)
6199 return(NULL);
6200 if ((token2 == NULL) || (*token2 == 0)) {
6201 atom->valuep = xmlStrdup(token);
6202 } else {
6203 int lenn, lenp;
6204 xmlChar *str;
6205
6206 lenn = strlen((char *) token2);
6207 lenp = strlen((char *) token);
6208
6209 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6210 if (str == NULL) {
6211 xmlRegFreeAtom(atom);
6212 return(NULL);
6213 }
6214 memcpy(&str[0], token, lenp);
6215 str[lenp] = '|';
6216 memcpy(&str[lenp + 1], token2, lenn);
6217 str[lenn + lenp + 1] = 0;
6218
6219 atom->valuep = str;
6220 }
6221 atom->data = data;
6222 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6223 atom->min = min;
6224 atom->max = max;
6225 /*
6226 * associate a counter to the transition.
6227 */
6228 counter = xmlRegGetCounter(am);
6229 am->counters[counter].min = 1;
6230 am->counters[counter].max = 1;
6231
6232 /* xmlFAGenerateTransitions(am, from, to, atom); */
6233 if (to == NULL) {
6234 to = xmlRegNewState(am);
6235 xmlRegStatePush(am, to);
6236 }
6237 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6238 xmlRegAtomPush(am, atom);
6239 am->state = to;
6240 return(to);
6241}
6242
6243
6244
6263xmlAutomataStatePtr
6264xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6265 xmlAutomataStatePtr to, const xmlChar *token,
6266 int min, int max, void *data) {
6267 xmlRegAtomPtr atom;
6268 int counter;
6269
6270 if ((am == NULL) || (from == NULL) || (token == NULL))
6271 return(NULL);
6272 if (min < 1)
6273 return(NULL);
6274 if (max < min)
6275 return(NULL);
6276 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6277 if (atom == NULL)
6278 return(NULL);
6279 atom->valuep = xmlStrdup(token);
6280 atom->data = data;
6281 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6282 atom->min = min;
6283 atom->max = max;
6284 /*
6285 * associate a counter to the transition.
6286 */
6287 counter = xmlRegGetCounter(am);
6288 am->counters[counter].min = 1;
6289 am->counters[counter].max = 1;
6290
6291 /* xmlFAGenerateTransitions(am, from, to, atom); */
6292 if (to == NULL) {
6293 to = xmlRegNewState(am);
6294 xmlRegStatePush(am, to);
6295 }
6296 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6297 xmlRegAtomPush(am, atom);
6298 am->state = to;
6299 return(to);
6300}
6301
6310xmlAutomataStatePtr
6311xmlAutomataNewState(xmlAutomataPtr am) {
6312 xmlAutomataStatePtr to;
6313
6314 if (am == NULL)
6315 return(NULL);
6316 to = xmlRegNewState(am);
6317 xmlRegStatePush(am, to);
6318 return(to);
6319}
6320
6333xmlAutomataStatePtr
6334xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6335 xmlAutomataStatePtr to) {
6336 if ((am == NULL) || (from == NULL))
6337 return(NULL);
6338 xmlFAGenerateEpsilonTransition(am, from, to);
6339 if (to == NULL)
6340 return(am->state);
6341 return(to);
6342}
6343
6358xmlAutomataStatePtr
6359xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6360 xmlAutomataStatePtr to, int lax) {
6361 if ((am == NULL) || (from == NULL))
6362 return(NULL);
6363 xmlFAGenerateAllTransition(am, from, to, lax);
6364 if (to == NULL)
6365 return(am->state);
6366 return(to);
6367}
6368
6379int
6380xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6381 int ret;
6382
6383 if (am == NULL)
6384 return(-1);
6385
6386 ret = xmlRegGetCounter(am);
6387 if (ret < 0)
6388 return(-1);
6389 am->counters[ret].min = min;
6390 am->counters[ret].max = max;
6391 return(ret);
6392}
6393
6407xmlAutomataStatePtr
6408xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6409 xmlAutomataStatePtr to, int counter) {
6410 if ((am == NULL) || (from == NULL) || (counter < 0))
6411 return(NULL);
6412 xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6413 if (to == NULL)
6414 return(am->state);
6415 return(to);
6416}
6417
6431xmlAutomataStatePtr
6432xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6433 xmlAutomataStatePtr to, int counter) {
6434 if ((am == NULL) || (from == NULL) || (counter < 0))
6435 return(NULL);
6436 xmlFAGenerateCountedTransition(am, from, to, counter);
6437 if (to == NULL)
6438 return(am->state);
6439 return(to);
6440}
6441
6451xmlRegexpPtr
6452xmlAutomataCompile(xmlAutomataPtr am) {
6453 xmlRegexpPtr ret;
6454
6455 if ((am == NULL) || (am->error != 0)) return(NULL);
6456 xmlFAEliminateEpsilonTransitions(am);
6457 /* xmlFAComputesDeterminism(am); */
6458 ret = xmlRegEpxFromParse(am);
6459
6460 return(ret);
6461}
6462
6471int
6472xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6473 int ret;
6474
6475 if (am == NULL)
6476 return(-1);
6477
6478 ret = xmlFAComputesDeterminism(am);
6479 return(ret);
6480}
6481#endif /* LIBXML_AUTOMATA_ENABLED */
6482
6483#ifdef LIBXML_EXPR_ENABLED
6484/************************************************************************
6485 * *
6486 * Formal Expression handling code *
6487 * *
6488 ************************************************************************/
6489/************************************************************************
6490 * *
6491 * Expression handling context *
6492 * *
6493 ************************************************************************/
6494
6495struct _xmlExpCtxt {
6496 xmlDictPtr dict;
6497 xmlExpNodePtr *table;
6498 int size;
6499 int nbElems;
6500 int nb_nodes;
6501 int maxNodes;
6502 const char *expr;
6503 const char *cur;
6504 int nb_cons;
6505 int tabSize;
6506};
6507
6517xmlExpCtxtPtr
6518xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6519 xmlExpCtxtPtr ret;
6520 int size = 256;
6521
6522 if (maxNodes <= 4096)
6523 maxNodes = 4096;
6524
6525 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6526 if (ret == NULL)
6527 return(NULL);
6528 memset(ret, 0, sizeof(xmlExpCtxt));
6529 ret->size = size;
6530 ret->nbElems = 0;
6531 ret->maxNodes = maxNodes;
6532 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6533 if (ret->table == NULL) {
6534 xmlFree(ret);
6535 return(NULL);
6536 }
6537 memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6538 if (dict == NULL) {
6539 ret->dict = xmlDictCreate();
6540 if (ret->dict == NULL) {
6541 xmlFree(ret->table);
6542 xmlFree(ret);
6543 return(NULL);
6544 }
6545 } else {
6546 ret->dict = dict;
6547 xmlDictReference(ret->dict);
6548 }
6549 return(ret);
6550}
6551
6558void
6559xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6560 if (ctxt == NULL)
6561 return;
6562 xmlDictFree(ctxt->dict);
6563 if (ctxt->table != NULL)
6564 xmlFree(ctxt->table);
6565 xmlFree(ctxt);
6566}
6567
6568/************************************************************************
6569 * *
6570 * Structure associated to an expression node *
6571 * *
6572 ************************************************************************/
6573#define MAX_NODES 10000
6574
6575/* #define DEBUG_DERIV */
6576
6577/*
6578 * TODO:
6579 * - Wildcards
6580 * - public API for creation
6581 *
6582 * Started
6583 * - regression testing
6584 *
6585 * Done
6586 * - split into module and test tool
6587 * - memleaks
6588 */
6589
6590typedef enum {
6591 XML_EXP_NILABLE = (1 << 0)
6592} xmlExpNodeInfo;
6593
6594#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6595
6596struct _xmlExpNode {
6597 unsigned char type;/* xmlExpNodeType */
6598 unsigned char info;/* OR of xmlExpNodeInfo */
6599 unsigned short key; /* the hash key */
6600 unsigned int ref; /* The number of references */
6601 int c_max; /* the maximum length it can consume */
6602 xmlExpNodePtr exp_left;
6603 xmlExpNodePtr next;/* the next node in the hash table or free list */
6604 union {
6605 struct {
6606 int f_min;
6607 int f_max;
6608 } count;
6609 struct {
6610 xmlExpNodePtr f_right;
6611 } children;
6612 const xmlChar *f_str;
6613 } field;
6614};
6615
6616#define exp_min field.count.f_min
6617#define exp_max field.count.f_max
6618/* #define exp_left field.children.f_left */
6619#define exp_right field.children.f_right
6620#define exp_str field.f_str
6621
6622static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6623static xmlExpNode forbiddenExpNode = {
6624 XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6625};
6626xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6627static xmlExpNode emptyExpNode = {
6628 XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6629};
6630xmlExpNodePtr emptyExp = &emptyExpNode;
6631
6632/************************************************************************
6633 * *
6634 * The custom hash table for unicity and canonicalization *
6635 * of sub-expressions pointers *
6636 * *
6637 ************************************************************************/
6638/*
6639 * xmlExpHashNameComputeKey:
6640 * Calculate the hash key for a token
6641 */
6642static unsigned short
6643xmlExpHashNameComputeKey(const xmlChar *name) {
6644 unsigned short value = 0L;
6645 char ch;
6646
6647 if (name != NULL) {
6648 value += 30 * (*name);
6649 while ((ch = *name++) != 0) {
6650 value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6651 }
6652 }
6653 return (value);
6654}
6655
6656/*
6657 * xmlExpHashComputeKey:
6658 * Calculate the hash key for a compound expression
6659 */
6660static unsigned short
6661xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6662 xmlExpNodePtr right) {
6663 unsigned long value;
6664 unsigned short ret;
6665
6666 switch (type) {
6667 case XML_EXP_SEQ:
6668 value = left->key;
6669 value += right->key;
6670 value *= 3;
6671 ret = (unsigned short) value;
6672 break;
6673 case XML_EXP_OR:
6674 value = left->key;
6675 value += right->key;
6676 value *= 7;
6677 ret = (unsigned short) value;
6678 break;
6679 case XML_EXP_COUNT:
6680 value = left->key;
6681 value += right->key;
6682 ret = (unsigned short) value;
6683 break;
6684 default:
6685 ret = 0;
6686 }
6687 return(ret);
6688}
6689
6690
6691static xmlExpNodePtr
6692xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6693 xmlExpNodePtr ret;
6694
6695 if (ctxt->nb_nodes >= MAX_NODES)
6696 return(NULL);
6697 ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6698 if (ret == NULL)
6699 return(NULL);
6700 memset(ret, 0, sizeof(xmlExpNode));
6701 ret->type = type;
6702 ret->next = NULL;
6703 ctxt->nb_nodes++;
6704 ctxt->nb_cons++;
6705 return(ret);
6706}
6707
6718static xmlExpNodePtr
6719xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6720 xmlExpNodePtr left, xmlExpNodePtr right,
6721 const xmlChar *name, int min, int max) {
6722 unsigned short kbase, key;
6723 xmlExpNodePtr entry;
6724 xmlExpNodePtr insert;
6725
6726 if (ctxt == NULL)
6727 return(NULL);
6728
6729 /*
6730 * Check for duplicate and insertion location.
6731 */
6732 if (type == XML_EXP_ATOM) {
6733 kbase = xmlExpHashNameComputeKey(name);
6734 } else if (type == XML_EXP_COUNT) {
6735 /* COUNT reduction rule 1 */
6736 /* a{1} -> a */
6737 if (min == max) {
6738 if (min == 1) {
6739 return(left);
6740 }
6741 if (min == 0) {
6742 xmlExpFree(ctxt, left);
6743 return(emptyExp);
6744 }
6745 }
6746 if (min < 0) {
6747 xmlExpFree(ctxt, left);
6748 return(forbiddenExp);
6749 }
6750 if (max == -1)
6751 kbase = min + 79;
6752 else
6753 kbase = max - min;
6754 kbase += left->key;
6755 } else if (type == XML_EXP_OR) {
6756 /* Forbid reduction rules */
6757 if (left->type == XML_EXP_FORBID) {
6758 xmlExpFree(ctxt, left);
6759 return(right);
6760 }
6761 if (right->type == XML_EXP_FORBID) {
6762 xmlExpFree(ctxt, right);
6763 return(left);
6764 }
6765
6766 /* OR reduction rule 1 */
6767 /* a | a reduced to a */
6768 if (left == right) {
6769 xmlExpFree(ctxt, right);
6770 return(left);
6771 }
6772 /* OR canonicalization rule 1 */
6773 /* linearize (a | b) | c into a | (b | c) */
6774 if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6775 xmlExpNodePtr tmp = left;
6776 left = right;
6777 right = tmp;
6778 }
6779 /* OR reduction rule 2 */
6780 /* a | (a | b) and b | (a | b) are reduced to a | b */
6781 if (right->type == XML_EXP_OR) {
6782 if ((left == right->exp_left) ||
6783 (left == right->exp_right)) {
6784 xmlExpFree(ctxt, left);
6785 return(right);
6786 }
6787 }
6788 /* OR canonicalization rule 2 */
6789 /* linearize (a | b) | c into a | (b | c) */
6790 if (left->type == XML_EXP_OR) {
6791 xmlExpNodePtr tmp;
6792
6793 /* OR canonicalization rule 2 */
6794 if ((left->exp_right->type != XML_EXP_OR) &&
6795 (left->exp_right->key < left->exp_left->key)) {
6796 tmp = left->exp_right;
6797 left->exp_right = left->exp_left;
6798 left->exp_left = tmp;
6799 }
6800 left->exp_right->ref++;
6801 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6802 NULL, 0, 0);
6803 left->exp_left->ref++;
6804 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6805 NULL, 0, 0);
6806
6807 xmlExpFree(ctxt, left);
6808 return(tmp);
6809 }
6810 if (right->type == XML_EXP_OR) {
6811 /* Ordering in the tree */
6812 /* C | (A | B) -> A | (B | C) */
6813 if (left->key > right->exp_right->key) {
6814 xmlExpNodePtr tmp;
6815 right->exp_right->ref++;
6816 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6817 left, NULL, 0, 0);
6818 right->exp_left->ref++;
6819 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6820 tmp, NULL, 0, 0);
6821 xmlExpFree(ctxt, right);
6822 return(tmp);
6823 }
6824 /* Ordering in the tree */
6825 /* B | (A | C) -> A | (B | C) */
6826 if (left->key > right->exp_left->key) {
6827 xmlExpNodePtr tmp;
6828 right->exp_right->ref++;
6829 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6830 right->exp_right, NULL, 0, 0);
6831 right->exp_left->ref++;
6832 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6833 tmp, NULL, 0, 0);
6834 xmlExpFree(ctxt, right);
6835 return(tmp);
6836 }
6837 }
6838 /* we know both types are != XML_EXP_OR here */
6839 else if (left->key > right->key) {
6840 xmlExpNodePtr tmp = left;
6841 left = right;
6842 right = tmp;
6843 }
6844 kbase = xmlExpHashComputeKey(type, left, right);
6845 } else if (type == XML_EXP_SEQ) {
6846 /* Forbid reduction rules */
6847 if (left->type == XML_EXP_FORBID) {
6848 xmlExpFree(ctxt, right);
6849 return(left);
6850 }
6851 if (right->type == XML_EXP_FORBID) {
6852 xmlExpFree(ctxt, left);
6853 return(right);
6854 }
6855 /* Empty reduction rules */
6856 if (right->type == XML_EXP_EMPTY) {
6857 return(left);
6858 }
6859 if (left->type == XML_EXP_EMPTY) {
6860 return(right);
6861 }
6862 kbase = xmlExpHashComputeKey(type, left, right);
6863 } else
6864 return(NULL);
6865
6866 key = kbase % ctxt->size;
6867 if (ctxt->table[key] != NULL) {
6868 for (insert = ctxt->table[key]; insert != NULL;
6869 insert = insert->next) {
6870 if ((insert->key == kbase) &&
6871 (insert->type == type)) {
6872 if (type == XML_EXP_ATOM) {
6873 if (name == insert->exp_str) {
6874 insert->ref++;
6875 return(insert);
6876 }
6877 } else if (type == XML_EXP_COUNT) {
6878 if ((insert->exp_min == min) && (insert->exp_max == max) &&
6879 (insert->exp_left == left)) {
6880 insert->ref++;
6881 left->ref--;
6882 return(insert);
6883 }
6884 } else if ((insert->exp_left == left) &&
6885 (insert->exp_right == right)) {
6886 insert->ref++;
6887 left->ref--;
6888 right->ref--;
6889 return(insert);
6890 }
6891 }
6892 }
6893 }
6894
6895 entry = xmlExpNewNode(ctxt, type);
6896 if (entry == NULL)
6897 return(NULL);
6898 entry->key = kbase;
6899 if (type == XML_EXP_ATOM) {
6900 entry->exp_str = name;
6901 entry->c_max = 1;
6902 } else if (type == XML_EXP_COUNT) {
6903 entry->exp_min = min;
6904 entry->exp_max = max;
6905 entry->exp_left = left;
6906 if ((min == 0) || (IS_NILLABLE(left)))
6907 entry->info |= XML_EXP_NILABLE;
6908 if (max < 0)
6909 entry->c_max = -1;
6910 else
6911 entry->c_max = max * entry->exp_left->c_max;
6912 } else {
6913 entry->exp_left = left;
6914 entry->exp_right = right;
6915 if (type == XML_EXP_OR) {
6916 if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6917 entry->info |= XML_EXP_NILABLE;
6918 if ((entry->exp_left->c_max == -1) ||
6919 (entry->exp_right->c_max == -1))
6920 entry->c_max = -1;
6921 else if (entry->exp_left->c_max > entry->exp_right->c_max)
6922 entry->c_max = entry->exp_left->c_max;
6923 else
6924 entry->c_max = entry->exp_right->c_max;
6925 } else {
6926 if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6927 entry->info |= XML_EXP_NILABLE;
6928 if ((entry->exp_left->c_max == -1) ||
6929 (entry->exp_right->c_max == -1))
6930 entry->c_max = -1;
6931 else
6932 entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6933 }
6934 }
6935 entry->ref = 1;
6936 if (ctxt->table[key] != NULL)
6937 entry->next = ctxt->table[key];
6938
6939 ctxt->table[key] = entry;
6940 ctxt->nbElems++;
6941
6942 return(entry);
6943}
6944
6952void
6953xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6954 if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6955 return;
6956 exp->ref--;
6957 if (exp->ref == 0) {
6958 unsigned short key;
6959
6960 /* Unlink it first from the hash table */
6961 key = exp->key % ctxt->size;
6962 if (ctxt->table[key] == exp) {
6963 ctxt->table[key] = exp->next;
6964 } else {
6965 xmlExpNodePtr tmp;
6966
6967 tmp = ctxt->table[key];
6968 while (tmp != NULL) {
6969 if (tmp->next == exp) {
6970 tmp->next = exp->next;
6971 break;
6972 }
6973 tmp = tmp->next;
6974 }
6975 }
6976
6977 if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6978 xmlExpFree(ctxt, exp->exp_left);
6979 xmlExpFree(ctxt, exp->exp_right);
6980 } else if (exp->type == XML_EXP_COUNT) {
6981 xmlExpFree(ctxt, exp->exp_left);
6982 }
6983 xmlFree(exp);
6984 ctxt->nb_nodes--;
6985 }
6986}
6987
6994void
6995xmlExpRef(xmlExpNodePtr exp) {
6996 if (exp != NULL)
6997 exp->ref++;
6998}
6999
7010xmlExpNodePtr
7011xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
7012 if ((ctxt == NULL) || (name == NULL))
7013 return(NULL);
7014 name = xmlDictLookup(ctxt->dict, name, len);
7015 if (name == NULL)
7016 return(NULL);
7017 return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
7018}
7019
7033xmlExpNodePtr
7034xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
7035 if (ctxt == NULL)
7036 return(NULL);
7037 if ((left == NULL) || (right == NULL)) {
7038 xmlExpFree(ctxt, left);
7039 xmlExpFree(ctxt, right);
7040 return(NULL);
7041 }
7042 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
7043}
7044
7058xmlExpNodePtr
7059xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
7060 if (ctxt == NULL)
7061 return(NULL);
7062 if ((left == NULL) || (right == NULL)) {
7063 xmlExpFree(ctxt, left);
7064 xmlExpFree(ctxt, right);
7065 return(NULL);
7066 }
7067 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
7068}
7069
7084xmlExpNodePtr
7085xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
7086 if (ctxt == NULL)
7087 return(NULL);
7088 if ((subset == NULL) || (min < 0) || (max < -1) ||
7089 ((max >= 0) && (min > max))) {
7090 xmlExpFree(ctxt, subset);
7091 return(NULL);
7092 }
7093 return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
7094 NULL, NULL, min, max));
7095}
7096
7097/************************************************************************
7098 * *
7099 * Public API for operations on expressions *
7100 * *
7101 ************************************************************************/
7102
7103static int
7104xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7105 const xmlChar**list, int len, int nb) {
7106 int tmp, tmp2;
7107tail:
7108 switch (exp->type) {
7109 case XML_EXP_EMPTY:
7110 return(0);
7111 case XML_EXP_ATOM:
7112 for (tmp = 0;tmp < nb;tmp++)
7113 if (list[tmp] == exp->exp_str)
7114 return(0);
7115 if (nb >= len)
7116 return(-2);
7117 list[nb] = exp->exp_str;
7118 return(1);
7119 case XML_EXP_COUNT:
7120 exp = exp->exp_left;
7121 goto tail;
7122 case XML_EXP_SEQ:
7123 case XML_EXP_OR:
7124 tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
7125 if (tmp < 0)
7126 return(tmp);
7127 tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
7128 nb + tmp);
7129 if (tmp2 < 0)
7130 return(tmp2);
7131 return(tmp + tmp2);
7132 }
7133 return(-1);
7134}
7135
7148int
7149xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7150 const xmlChar**langList, int len) {
7151 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
7152 return(-1);
7153 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
7154}
7155
7156static int
7157xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7158 const xmlChar**list, int len, int nb) {
7159 int tmp, tmp2;
7160tail:
7161 switch (exp->type) {
7162 case XML_EXP_FORBID:
7163 return(0);
7164 case XML_EXP_EMPTY:
7165 return(0);
7166 case XML_EXP_ATOM:
7167 for (tmp = 0;tmp < nb;tmp++)
7168 if (list[tmp] == exp->exp_str)
7169 return(0);
7170 if (nb >= len)
7171 return(-2);
7172 list[nb] = exp->exp_str;
7173 return(1);
7174 case XML_EXP_COUNT:
7175 exp = exp->exp_left;
7176 goto tail;
7177 case XML_EXP_SEQ:
7178 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7179 if (tmp < 0)
7180 return(tmp);
7181 if (IS_NILLABLE(exp->exp_left)) {
7182 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7183 nb + tmp);
7184 if (tmp2 < 0)
7185 return(tmp2);
7186 tmp += tmp2;
7187 }
7188 return(tmp);
7189 case XML_EXP_OR:
7190 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7191 if (tmp < 0)
7192 return(tmp);
7193 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7194 nb + tmp);
7195 if (tmp2 < 0)
7196 return(tmp2);
7197 return(tmp + tmp2);
7198 }
7199 return(-1);
7200}
7201
7216int
7217xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7218 const xmlChar**tokList, int len) {
7219 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7220 return(-1);
7221 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7222}
7223
7232int
7233xmlExpIsNillable(xmlExpNodePtr exp) {
7234 if (exp == NULL)
7235 return(-1);
7236 return(IS_NILLABLE(exp) != 0);
7237}
7238
7239static xmlExpNodePtr
7240xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
7241{
7242 xmlExpNodePtr ret;
7243
7244 switch (exp->type) {
7245 case XML_EXP_EMPTY:
7246 return(forbiddenExp);
7247 case XML_EXP_FORBID:
7248 return(forbiddenExp);
7249 case XML_EXP_ATOM:
7250 if (exp->exp_str == str) {
7251#ifdef DEBUG_DERIV
7252 printf("deriv atom: equal => Empty\n");
7253#endif
7254 ret = emptyExp;
7255 } else {
7256#ifdef DEBUG_DERIV
7257 printf("deriv atom: mismatch => forbid\n");
7258#endif
7259 /* TODO wildcards here */
7260 ret = forbiddenExp;
7261 }
7262 return(ret);
7263 case XML_EXP_OR: {
7264 xmlExpNodePtr tmp;
7265
7266#ifdef DEBUG_DERIV
7267 printf("deriv or: => or(derivs)\n");
7268#endif
7269 tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7270 if (tmp == NULL) {
7271 return(NULL);
7272 }
7273 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7274 if (ret == NULL) {
7275 xmlExpFree(ctxt, tmp);
7276 return(NULL);
7277 }
7278 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7279 NULL, 0, 0);
7280 return(ret);
7281 }
7282 case XML_EXP_SEQ:
7283#ifdef DEBUG_DERIV
7284 printf("deriv seq: starting with left\n");
7285#endif
7286 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7287 if (ret == NULL) {
7288 return(NULL);
7289 } else if (ret == forbiddenExp) {
7290 if (IS_NILLABLE(exp->exp_left)) {
7291#ifdef DEBUG_DERIV
7292 printf("deriv seq: left failed but nillable\n");
7293#endif
7294 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7295 }
7296 } else {
7297#ifdef DEBUG_DERIV
7298 printf("deriv seq: left match => sequence\n");
7299#endif
7300 exp->exp_right->ref++;
7301 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7302 NULL, 0, 0);
7303 }
7304 return(ret);
7305 case XML_EXP_COUNT: {
7306 int min, max;
7307 xmlExpNodePtr tmp;
7308
7309 if (exp->exp_max == 0)
7310 return(forbiddenExp);
7311 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7312 if (ret == NULL)
7313 return(NULL);
7314 if (ret == forbiddenExp) {
7315#ifdef DEBUG_DERIV
7316 printf("deriv count: pattern mismatch => forbid\n");
7317#endif
7318 return(ret);
7319 }
7320 if (exp->exp_max == 1)
7321 return(ret);
7322 if (exp->exp_max < 0) /* unbounded */
7323 max = -1;
7324 else
7325 max = exp->exp_max - 1;
7326 if (exp->exp_min > 0)
7327 min = exp->exp_min - 1;
7328 else
7329 min = 0;
7330 exp->exp_left->ref++;
7331 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7332 NULL, min, max);
7333 if (ret == emptyExp) {
7334#ifdef DEBUG_DERIV
7335 printf("deriv count: match to empty => new count\n");
7336#endif
7337 return(tmp);
7338 }
7339#ifdef DEBUG_DERIV
7340 printf("deriv count: match => sequence with new count\n");
7341#endif
7342 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7343 NULL, 0, 0));
7344 }
7345 }
7346 return(NULL);
7347}
7348
7361xmlExpNodePtr
7362xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7363 const xmlChar *str, int len) {
7364 const xmlChar *input;
7365
7366 if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7367 return(NULL);
7368 }
7369 /*
7370 * check the string is in the dictionary, if yes use an interned
7371 * copy, otherwise we know it's not an acceptable input
7372 */
7373 input = xmlDictExists(ctxt->dict, str, len);
7374 if (input == NULL) {
7375 return(forbiddenExp);
7376 }
7377 return(xmlExpStringDeriveInt(ctxt, exp, input));
7378}
7379
7380static int
7381xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7382 int ret = 1;
7383
7384 if (sub->c_max == -1) {
7385 if (exp->c_max != -1)
7386 ret = 0;
7387 } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7388 ret = 0;
7389 }
7390#if 0
7391 if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7392 ret = 0;
7393#endif
7394 return(ret);
7395}
7396
7397static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7398 xmlExpNodePtr sub);
7414static int
7415xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7416 xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7417 int i;
7418 xmlExpNodePtr tmp, tmp2;
7419
7420 if (mult != NULL) *mult = NULL;
7421 if (remain != NULL) *remain = NULL;
7422 if (exp->c_max == -1) return(0);
7423 if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7424
7425 for (i = 1;i <= exp->c_max;i++) {
7426 sub->ref++;
7427 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7428 sub, NULL, NULL, i, i);
7429 if (tmp == NULL) {
7430 return(-1);
7431 }
7432 if (!xmlExpCheckCard(tmp, exp)) {
7433 xmlExpFree(ctxt, tmp);
7434 continue;
7435 }
7436 tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7437 if (tmp2 == NULL) {
7438 xmlExpFree(ctxt, tmp);
7439 return(-1);
7440 }
7441 if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7442 if (remain != NULL)
7443 *remain = tmp2;
7444 else
7445 xmlExpFree(ctxt, tmp2);
7446 if (mult != NULL)
7447 *mult = tmp;
7448 else
7449 xmlExpFree(ctxt, tmp);
7450#ifdef DEBUG_DERIV
7451 printf("Divide succeeded %d\n", i);
7452#endif
7453 return(i);
7454 }
7455 xmlExpFree(ctxt, tmp);
7456 xmlExpFree(ctxt, tmp2);
7457 }
7458#ifdef DEBUG_DERIV
7459 printf("Divide failed\n");
7460#endif
7461 return(0);
7462}
7463
7475static xmlExpNodePtr
7476xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7477 xmlExpNodePtr ret, tmp, tmp2, tmp3;
7478 const xmlChar **tab;
7479 int len, i;
7480
7481 /*
7482 * In case of equality and if the expression can only consume a finite
7483 * amount, then the derivation is empty
7484 */
7485 if ((exp == sub) && (exp->c_max >= 0)) {
7486#ifdef DEBUG_DERIV
7487 printf("Equal(exp, sub) and finite -> Empty\n");
7488#endif
7489 return(emptyExp);
7490 }
7491 /*
7492 * decompose sub sequence first
7493 */
7494 if (sub->type == XML_EXP_EMPTY) {
7495#ifdef DEBUG_DERIV
7496 printf("Empty(sub) -> Empty\n");
7497#endif
7498 exp->ref++;
7499 return(exp);
7500 }
7501 if (sub->type == XML_EXP_SEQ) {
7502#ifdef DEBUG_DERIV
7503 printf("Seq(sub) -> decompose\n");
7504#endif
7505 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7506 if (tmp == NULL)
7507 return(NULL);
7508 if (tmp == forbiddenExp)
7509 return(tmp);
7510 ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7511 xmlExpFree(ctxt, tmp);
7512 return(ret);
7513 }
7514 if (sub->type == XML_EXP_OR) {
7515#ifdef DEBUG_DERIV
7516 printf("Or(sub) -> decompose\n");
7517#endif
7518 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7519 if (tmp == forbiddenExp)
7520 return(tmp);
7521 if (tmp == NULL)
7522 return(NULL);
7523 ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7524 if ((ret == NULL) || (ret == forbiddenExp)) {
7525 xmlExpFree(ctxt, tmp);
7526 return(ret);
7527 }
7528 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7529 }
7530 if (!xmlExpCheckCard(exp, sub)) {
7531#ifdef DEBUG_DERIV
7532 printf("CheckCard(exp, sub) failed -> Forbid\n");
7533#endif
7534 return(forbiddenExp);
7535 }
7536 switch (exp->type) {
7537 case XML_EXP_EMPTY:
7538 if (sub == emptyExp)
7539 return(emptyExp);
7540#ifdef DEBUG_DERIV
7541 printf("Empty(exp) -> Forbid\n");
7542#endif
7543 return(forbiddenExp);
7544 case XML_EXP_FORBID:
7545#ifdef DEBUG_DERIV
7546 printf("Forbid(exp) -> Forbid\n");
7547#endif
7548 return(forbiddenExp);
7549 case XML_EXP_ATOM:
7550 if (sub->type == XML_EXP_ATOM) {
7551 /* TODO: handle wildcards */
7552 if (exp->exp_str == sub->exp_str) {
7553#ifdef DEBUG_DERIV
7554 printf("Atom match -> Empty\n");
7555#endif
7556 return(emptyExp);
7557 }
7558#ifdef DEBUG_DERIV
7559 printf("Atom mismatch -> Forbid\n");
7560#endif
7561 return(forbiddenExp);
7562 }
7563 if ((sub->type == XML_EXP_COUNT) &&
7564 (sub->exp_max == 1) &&
7565 (sub->exp_left->type == XML_EXP_ATOM)) {
7566 /* TODO: handle wildcards */
7567 if (exp->exp_str == sub->exp_left->exp_str) {
7568#ifdef DEBUG_DERIV
7569 printf("Atom match -> Empty\n");
7570#endif
7571 return(emptyExp);
7572 }
7573#ifdef DEBUG_DERIV
7574 printf("Atom mismatch -> Forbid\n");
7575#endif
7576 return(forbiddenExp);
7577 }
7578#ifdef DEBUG_DERIV
7579 printf("Complex exp vs Atom -> Forbid\n");
7580#endif
7581 return(forbiddenExp);
7582 case XML_EXP_SEQ:
7583 /* try to get the sequence consumed only if possible */
7584 if (xmlExpCheckCard(exp->exp_left, sub)) {
7585 /* See if the sequence can be consumed directly */
7586#ifdef DEBUG_DERIV
7587 printf("Seq trying left only\n");
7588#endif
7589 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7590 if ((ret != forbiddenExp) && (ret != NULL)) {
7591#ifdef DEBUG_DERIV
7592 printf("Seq trying left only worked\n");
7593#endif
7594 /*
7595 * TODO: assumption here that we are determinist
7596 * i.e. we won't get to a nillable exp left
7597 * subset which could be matched by the right
7598 * part too.
7599 * e.g.: (a | b)+,(a | c) and 'a+,a'
7600 */
7601 exp->exp_right->ref++;
7602 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7603 exp->exp_right, NULL, 0, 0));
7604 }
7605#ifdef DEBUG_DERIV
7606 } else {
7607 printf("Seq: left too short\n");
7608#endif
7609 }
7610 /* Try instead to decompose */
7611 if (sub->type == XML_EXP_COUNT) {
7612 int min, max;
7613
7614#ifdef DEBUG_DERIV
7615 printf("Seq: sub is a count\n");
7616#endif
7617 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7618 if (ret == NULL)
7619 return(NULL);
7620 if (ret != forbiddenExp) {
7621#ifdef DEBUG_DERIV
7622 printf("Seq , Count match on left\n");
7623#endif
7624 if (sub->exp_max < 0)
7625 max = -1;
7626 else
7627 max = sub->exp_max -1;
7628 if (sub->exp_min > 0)
7629 min = sub->exp_min -1;
7630 else
7631 min = 0;
7632 exp->exp_right->ref++;
7633 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7634 exp->exp_right, NULL, 0, 0);
7635 if (tmp == NULL)
7636 return(NULL);
7637
7638 sub->exp_left->ref++;
7639 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7640 sub->exp_left, NULL, NULL, min, max);
7641 if (tmp2 == NULL) {
7642 xmlExpFree(ctxt, tmp);
7643 return(NULL);
7644 }
7645 ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7646 xmlExpFree(ctxt, tmp);
7647 xmlExpFree(ctxt, tmp2);
7648 return(ret);
7649 }
7650 }
7651 /* we made no progress on structured operations */
7652 break;
7653 case XML_EXP_OR:
7654#ifdef DEBUG_DERIV
7655 printf("Or , trying both side\n");
7656#endif
7657 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7658 if (ret == NULL)
7659 return(NULL);
7660 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7661 if (tmp == NULL) {
7662 xmlExpFree(ctxt, ret);
7663 return(NULL);
7664 }
7665 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7666 case XML_EXP_COUNT: {
7667 int min, max;
7668
7669 if (sub->type == XML_EXP_COUNT) {
7670 /*
7671 * Try to see if the loop is completely subsumed
7672 */
7673 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7674 if (tmp == NULL)
7675 return(NULL);
7676 if (tmp == forbiddenExp) {
7677 int mult;
7678
7679#ifdef DEBUG_DERIV
7680 printf("Count, Count inner don't subsume\n");
7681#endif
7682 mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7683 NULL, &tmp);
7684 if (mult <= 0) {
7685#ifdef DEBUG_DERIV
7686 printf("Count, Count not multiple => forbidden\n");
7687#endif
7688 return(forbiddenExp);
7689 }
7690 if (sub->exp_max == -1) {
7691 max = -1;
7692 if (exp->exp_max == -1) {
7693 if (exp->exp_min <= sub->exp_min * mult)
7694 min = 0;
7695 else
7696 min = exp->exp_min - sub->exp_min * mult;
7697 } else {
7698#ifdef DEBUG_DERIV
7699 printf("Count, Count finite can't subsume infinite\n");
7700#endif
7701 xmlExpFree(ctxt, tmp);
7702 return(forbiddenExp);
7703 }
7704 } else {
7705 if (exp->exp_max == -1) {
7706#ifdef DEBUG_DERIV
7707 printf("Infinite loop consume mult finite loop\n");
7708#endif
7709 if (exp->exp_min > sub->exp_min * mult) {
7710 max = -1;
7711 min = exp->exp_min - sub->exp_min * mult;
7712 } else {
7713 max = -1;
7714 min = 0;
7715 }
7716 } else {
7717 if (exp->exp_max < sub->exp_max * mult) {
7718#ifdef DEBUG_DERIV
7719 printf("loops max mult mismatch => forbidden\n");
7720#endif
7721 xmlExpFree(ctxt, tmp);
7722 return(forbiddenExp);
7723 }
7724 if (sub->exp_max * mult > exp->exp_min)
7725 min = 0;
7726 else
7727 min = exp->exp_min - sub->exp_max * mult;
7728 max = exp->exp_max - sub->exp_max * mult;
7729 }
7730 }
7731 } else if (!IS_NILLABLE(tmp)) {
7732 /*
7733 * TODO: loop here to try to grow if working on finite
7734 * blocks.
7735 */
7736#ifdef DEBUG_DERIV
7737 printf("Count, Count remain not nillable => forbidden\n");
7738#endif
7739 xmlExpFree(ctxt, tmp);
7740 return(forbiddenExp);
7741 } else if (sub->exp_max == -1) {
7742 if (exp->exp_max == -1) {
7743 if (exp->exp_min <= sub->exp_min) {
7744#ifdef DEBUG_DERIV
7745 printf("Infinite loops Okay => COUNT(0,Inf)\n");
7746#endif
7747 max = -1;
7748 min = 0;
7749 } else {
7750#ifdef DEBUG_DERIV
7751 printf("Infinite loops min => Count(X,Inf)\n");
7752#endif
7753 max = -1;
7754 min = exp->exp_min - sub->exp_min;
7755 }
7756 } else if (exp->exp_min > sub->exp_min) {
7757#ifdef DEBUG_DERIV
7758 printf("loops min mismatch 1 => forbidden ???\n");
7759#endif
7760 xmlExpFree(ctxt, tmp);
7761 return(forbiddenExp);
7762 } else {
7763 max = -1;
7764 min = 0;
7765 }
7766 } else {
7767 if (exp->exp_max == -1) {
7768#ifdef DEBUG_DERIV
7769 printf("Infinite loop consume finite loop\n");
7770#endif
7771 if (exp->exp_min > sub->exp_min) {
7772 max = -1;
7773 min = exp->exp_min - sub->exp_min;
7774 } else {
7775 max = -1;
7776 min = 0;
7777 }
7778 } else {
7779 if (exp->exp_max < sub->exp_max) {
7780#ifdef DEBUG_DERIV
7781 printf("loops max mismatch => forbidden\n");
7782#endif
7783 xmlExpFree(ctxt, tmp);
7784 return(forbiddenExp);
7785 }
7786 if (sub->exp_max > exp->exp_min)
7787 min = 0;
7788 else
7789 min = exp->exp_min - sub->exp_max;
7790 max = exp->exp_max - sub->exp_max;
7791 }
7792 }
7793#ifdef DEBUG_DERIV
7794 printf("loops match => SEQ(COUNT())\n");
7795#endif
7796 exp->exp_left->ref++;
7797 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7798 NULL, NULL, min, max);
7799 if (tmp2 == NULL) {
7800 return(NULL);
7801 }
7802 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7803 NULL, 0, 0);
7804 return(ret);
7805 }
7806 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7807 if (tmp == NULL)
7808 return(NULL);
7809 if (tmp == forbiddenExp) {
7810#ifdef DEBUG_DERIV
7811 printf("loop mismatch => forbidden\n");
7812#endif
7813 return(forbiddenExp);
7814 }
7815 if (exp->exp_min > 0)
7816 min = exp->exp_min - 1;
7817 else
7818 min = 0;
7819 if (exp->exp_max < 0)
7820 max = -1;
7821 else
7822 max = exp->exp_max - 1;
7823
7824#ifdef DEBUG_DERIV
7825 printf("loop match => SEQ(COUNT())\n");
7826#endif
7827 exp->exp_left->ref++;
7828 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7829 NULL, NULL, min, max);
7830 if (tmp2 == NULL)
7831 return(NULL);
7832 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7833 NULL, 0, 0);
7834 return(ret);
7835 }
7836 }
7837
7838#ifdef DEBUG_DERIV
7839 printf("Fallback to derivative\n");
7840#endif
7841 if (IS_NILLABLE(sub)) {
7842 if (!(IS_NILLABLE(exp)))
7843 return(forbiddenExp);
7844 else
7845 ret = emptyExp;
7846 } else
7847 ret = NULL;
7848 /*
7849 * here the structured derivation made no progress so
7850 * we use the default token based derivation to force one more step
7851 */
7852 if (ctxt->tabSize == 0)
7853 ctxt->tabSize = 40;
7854
7855 tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7856 sizeof(const xmlChar *));
7857 if (tab == NULL) {
7858 return(NULL);
7859 }
7860
7861 /*
7862 * collect all the strings accepted by the subexpression on input
7863 */
7864 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7865 while (len < 0) {
7866 const xmlChar **temp;
7867 temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7868 sizeof(const xmlChar *));
7869 if (temp == NULL) {
7870 xmlFree((xmlChar **) tab);
7871 return(NULL);
7872 }
7873 tab = temp;
7874 ctxt->tabSize *= 2;
7875 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7876 }
7877 for (i = 0;i < len;i++) {
7878 tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7879 if ((tmp == NULL) || (tmp == forbiddenExp)) {
7880 xmlExpFree(ctxt, ret);
7881 xmlFree((xmlChar **) tab);
7882 return(tmp);
7883 }
7884 tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7885 if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7886 xmlExpFree(ctxt, tmp);
7887 xmlExpFree(ctxt, ret);
7888 xmlFree((xmlChar **) tab);
7889 return(tmp);
7890 }
7891 tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7892 xmlExpFree(ctxt, tmp);
7893 xmlExpFree(ctxt, tmp2);
7894
7895 if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7896 xmlExpFree(ctxt, ret);
7897 xmlFree((xmlChar **) tab);
7898 return(tmp3);
7899 }
7900
7901 if (ret == NULL)
7902 ret = tmp3;
7903 else {
7904 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7905 if (ret == NULL) {
7906 xmlFree((xmlChar **) tab);
7907 return(NULL);
7908 }
7909 }
7910 }
7911 xmlFree((xmlChar **) tab);
7912 return(ret);
7913}
7914
7929xmlExpNodePtr
7930xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7931 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7932 return(NULL);
7933
7934 /*
7935 * O(1) speedups
7936 */
7937 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7938#ifdef DEBUG_DERIV
7939 printf("Sub nillable and not exp : can't subsume\n");
7940#endif
7941 return(forbiddenExp);
7942 }
7943 if (xmlExpCheckCard(exp, sub) == 0) {
7944#ifdef DEBUG_DERIV
7945 printf("sub generate longer sequences than exp : can't subsume\n");
7946#endif
7947 return(forbiddenExp);
7948 }
7949 return(xmlExpExpDeriveInt(ctxt, exp, sub));
7950}
7951
7963int
7964xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7965 xmlExpNodePtr tmp;
7966
7967 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7968 return(-1);
7969
7970 /*
7971 * TODO: speedup by checking the language of sub is a subset of the
7972 * language of exp
7973 */
7974 /*
7975 * O(1) speedups
7976 */
7977 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7978#ifdef DEBUG_DERIV
7979 printf("Sub nillable and not exp : can't subsume\n");
7980#endif
7981 return(0);
7982 }
7983 if (xmlExpCheckCard(exp, sub) == 0) {
7984#ifdef DEBUG_DERIV
7985 printf("sub generate longer sequences than exp : can't subsume\n");
7986#endif
7987 return(0);
7988 }
7989 tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7990#ifdef DEBUG_DERIV
7991 printf("Result derivation :\n");
7992 PRINT_EXP(tmp);
7993#endif
7994 if (tmp == NULL)
7995 return(-1);
7996 if (tmp == forbiddenExp)
7997 return(0);
7998 if (tmp == emptyExp)
7999 return(1);
8000 if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
8001 xmlExpFree(ctxt, tmp);
8002 return(1);
8003 }
8004 xmlExpFree(ctxt, tmp);
8005 return(0);
8006}
8007
8008/************************************************************************
8009 * *
8010 * Parsing expression *
8011 * *
8012 ************************************************************************/
8013
8014static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
8015
8016#undef CUR
8017#define CUR (*ctxt->cur)
8018#undef NEXT
8019#define NEXT ctxt->cur++;
8020#undef IS_BLANK
8021#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
8022#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
8023
8024static int
8025xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
8026 int ret = 0;
8027
8029 if (CUR == '*') {
8030 NEXT
8031 return(-1);
8032 }
8033 if ((CUR < '0') || (CUR > '9'))
8034 return(-1);
8035 while ((CUR >= '0') && (CUR <= '9')) {
8036 ret = ret * 10 + (CUR - '0');
8037 NEXT
8038 }
8039 return(ret);
8040}
8041
8042static xmlExpNodePtr
8043xmlExpParseOr(xmlExpCtxtPtr ctxt) {
8044 const char *base;
8045 xmlExpNodePtr ret;
8046 const xmlChar *val;
8047
8049 base = ctxt->cur;
8050 if (*ctxt->cur == '(') {
8051 NEXT
8052 ret = xmlExpParseExpr(ctxt);
8054 if (*ctxt->cur != ')') {
8055 fprintf(stderr, "unbalanced '(' : %s\n", base);
8056 xmlExpFree(ctxt, ret);
8057 return(NULL);
8058 }
8059 NEXT;
8061 goto parse_quantifier;
8062 }
8063 while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
8064 (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
8065 (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
8066 NEXT;
8067 val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
8068 if (val == NULL)
8069 return(NULL);
8070 ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
8071 if (ret == NULL)
8072 return(NULL);
8074parse_quantifier:
8075 if (CUR == '{') {
8076 int min, max;
8077
8078 NEXT
8079 min = xmlExpParseNumber(ctxt);
8080 if (min < 0) {
8081 xmlExpFree(ctxt, ret);
8082 return(NULL);
8083 }
8085 if (CUR == ',') {
8086 NEXT
8087 max = xmlExpParseNumber(ctxt);
8089 } else
8090 max = min;
8091 if (CUR != '}') {
8092 xmlExpFree(ctxt, ret);
8093 return(NULL);
8094 }
8095 NEXT
8096 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8097 min, max);
8099 } else if (CUR == '?') {
8100 NEXT
8101 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8102 0, 1);
8104 } else if (CUR == '+') {
8105 NEXT
8106 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8107 1, -1);
8109 } else if (CUR == '*') {
8110 NEXT
8111 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
8112 0, -1);
8114 }
8115 return(ret);
8116}
8117
8118
8119static xmlExpNodePtr
8120xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
8121 xmlExpNodePtr ret, right;
8122
8123 ret = xmlExpParseOr(ctxt);
8125 while (CUR == '|') {
8126 NEXT
8127 right = xmlExpParseOr(ctxt);
8128 if (right == NULL) {
8129 xmlExpFree(ctxt, ret);
8130 return(NULL);
8131 }
8132 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
8133 if (ret == NULL)
8134 return(NULL);
8135 }
8136 return(ret);
8137}
8138
8139static xmlExpNodePtr
8140xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
8141 xmlExpNodePtr ret, right;
8142
8143 ret = xmlExpParseSeq(ctxt);
8145 while (CUR == ',') {
8146 NEXT
8147 right = xmlExpParseSeq(ctxt);
8148 if (right == NULL) {
8149 xmlExpFree(ctxt, ret);
8150 return(NULL);
8151 }
8152 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
8153 if (ret == NULL)
8154 return(NULL);
8155 }
8156 return(ret);
8157}
8158
8176xmlExpNodePtr
8177xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
8178 xmlExpNodePtr ret;
8179
8180 ctxt->expr = expr;
8181 ctxt->cur = expr;
8182
8183 ret = xmlExpParseExpr(ctxt);
8185 if (*ctxt->cur != 0) {
8186 xmlExpFree(ctxt, ret);
8187 return(NULL);
8188 }
8189 return(ret);
8190}
8191
8192static void
8193xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
8194 xmlExpNodePtr c;
8195
8196 if (expr == NULL) return;
8197 if (glob) xmlBufferWriteChar(buf, "(");
8198 switch (expr->type) {
8199 case XML_EXP_EMPTY:
8200 xmlBufferWriteChar(buf, "empty");
8201 break;
8202 case XML_EXP_FORBID:
8203 xmlBufferWriteChar(buf, "forbidden");
8204 break;
8205 case XML_EXP_ATOM:
8206 xmlBufferWriteCHAR(buf, expr->exp_str);
8207 break;
8208 case XML_EXP_SEQ:
8209 c = expr->exp_left;
8210 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8211 xmlExpDumpInt(buf, c, 1);
8212 else
8213 xmlExpDumpInt(buf, c, 0);
8214 xmlBufferWriteChar(buf, " , ");
8215 c = expr->exp_right;
8216 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8217 xmlExpDumpInt(buf, c, 1);
8218 else
8219 xmlExpDumpInt(buf, c, 0);
8220 break;
8221 case XML_EXP_OR:
8222 c = expr->exp_left;
8223 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8224 xmlExpDumpInt(buf, c, 1);
8225 else
8226 xmlExpDumpInt(buf, c, 0);
8227 xmlBufferWriteChar(buf, " | ");
8228 c = expr->exp_right;
8229 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8230 xmlExpDumpInt(buf, c, 1);
8231 else
8232 xmlExpDumpInt(buf, c, 0);
8233 break;
8234 case XML_EXP_COUNT: {
8235 char rep[40];
8236
8237 c = expr->exp_left;
8238 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8239 xmlExpDumpInt(buf, c, 1);
8240 else
8241 xmlExpDumpInt(buf, c, 0);
8242 if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
8243 rep[0] = '?';
8244 rep[1] = 0;
8245 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8246 rep[0] = '*';
8247 rep[1] = 0;
8248 } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
8249 rep[0] = '+';
8250 rep[1] = 0;
8251 } else if (expr->exp_max == expr->exp_min) {
8252 snprintf(rep, 39, "{%d}", expr->exp_min);
8253 } else if (expr->exp_max < 0) {
8254 snprintf(rep, 39, "{%d,inf}", expr->exp_min);
8255 } else {
8256 snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
8257 }
8258 rep[39] = 0;
8259 xmlBufferWriteChar(buf, rep);
8260 break;
8261 }
8262 default:
8263 fprintf(stderr, "Error in tree\n");
8264 }
8265 if (glob)
8266 xmlBufferWriteChar(buf, ")");
8267}
8275void
8276xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
8277 if ((buf == NULL) || (expr == NULL))
8278 return;
8279 xmlExpDumpInt(buf, expr, 0);
8280}
8281
8290int
8291xmlExpMaxToken(xmlExpNodePtr expr) {
8292 if (expr == NULL)
8293 return(-1);
8294 return(expr->c_max);
8295}
8296
8305int
8306xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
8307 if (ctxt == NULL)
8308 return(-1);
8309 return(ctxt->nb_nodes);
8310}
8311
8320int
8321xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8322 if (ctxt == NULL)
8323 return(-1);
8324 return(ctxt->nb_cons);
8325}
8326
8327#endif /* LIBXML_EXPR_ENABLED */
8328
8329#endif /* LIBXML_REGEXP_ENABLED */
#define TODO
Definition: SAX2.c:44
ACPI_SIZE strlen(const char *String)
Definition: utclib.c:269
struct outqueuenode * tail
Definition: adnsresfilter.c:66
static int state
Definition: maze.c:121
#define ok(value,...)
Definition: atltest.h:57
#define index(s, c)
Definition: various.h:29
INT copy(TCHAR source[MAX_PATH], TCHAR dest[MAX_PATH], INT append, DWORD lpdwFlags, BOOL bTouch)
Definition: copy.c:51
_In_ fcb _In_ chunk _In_ uint64_t _In_ uint64_t _In_ bool _In_opt_ void _In_opt_ PIRP _In_ LIST_ENTRY * rollback
Definition: btrfs_drv.h:1365
cd_progress_ptr progress
Definition: cdjpeg.h:152
Definition: list.h:37
#define NULL
Definition: types.h:112
#define IS_BLANK(c)
Definition: attributes.c:26
#define SKIP_BLANKS
Definition: pattern.c:1175
#define NXT(val)
Definition: pattern.c:1172
#define CUR
Definition: pattern.c:1170
unsigned int idx
Definition: utils.c:41
content
Definition: atl_ax.c:994
static WCHAR no[MAX_STRING_RESOURCE_LEN]
Definition: object.c:2340
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
Definition: typeof.h:94
#define ERROR(name)
Definition: error_private.h:53
char ** glob(const char *v)
Definition: fake.c:36
#define printf
Definition: freeldr.h:97
#define NEXT(n, i)
FxCollectionEntry * cur
GLuint start
Definition: gl.h:1545
GLint GLint GLsizei GLsizei GLsizei depth
Definition: gl.h:1546
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLuint GLuint end
Definition: gl.h:1545
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: gl.h:1950
GLdouble GLdouble t
Definition: gl.h:2047
GLsizeiptr size
Definition: glext.h:5919
GLuint res
Definition: glext.h:9613
const GLubyte * c
Definition: glext.h:8905
GLdouble GLdouble GLdouble GLdouble top
Definition: glext.h:10859
GLdouble GLdouble right
Definition: glext.h:10859
GLenum GLint * range
Definition: glext.h:7539
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLint left
Definition: glext.h:7726
GLboolean GLenum GLenum GLvoid * values
Definition: glext.h:5666
GLbitfield flags
Definition: glext.h:7161
GLint GLint GLsizei GLuint * counters
Definition: glext.h:11114
GLuint GLfloat * val
Definition: glext.h:7180
GLenum GLsizei len
Definition: glext.h:6722
GLenum GLenum GLenum input
Definition: glext.h:9031
GLenum target
Definition: glext.h:7315
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat token
Definition: glfuncs.h:210
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
@ extra
Definition: id3.c:95
#define SIZE_MAX
Definition: limits.h:75
#define INT_MAX
Definition: limits.h:40
#define stdout
Definition: stdio.h:99
#define stderr
Definition: stdio.h:100
_Check_return_opt_ _CRTIMP int __cdecl fprintf(_Inout_ FILE *_File, _In_z_ _Printf_format_string_ const char *_Format,...)
uint32_t entry
Definition: isohybrid.c:63
#define c
Definition: ke_i.h:80
#define error(str)
Definition: mkdosfs.c:1605
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define for
Definition: utility.h:88
char string[160]
Definition: util.h:11
static IPrintDialogCallback callback
Definition: printdlg.c:326
static DNS_RECORDW r1
Definition: record.c:37
static DNS_RECORDW r2
Definition: record.c:38
static UINT UINT last
Definition: font.c:45
DWORD exp
Definition: msg.c:16058
#define min(a, b)
Definition: monoChain.cc:55
#define L(x)
Definition: ntvdm.h:50
#define IS_COMBINING(c)
#define IS_DIGIT(c)
#define IS_CHAR(c)
#define IS_EXTENDER(c)
#define IS_LETTER(c)
#define long
Definition: qsort.c:33
static unsigned __int64 next
Definition: rand_nt.c:6
#define err(...)
const WCHAR * str
static calc_node_t temp
Definition: rpn_ieee.c:38
XMLPUBFUN const xmlChar *XMLCALL xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len)
Definition: dict.c:867
XMLPUBFUN const xmlChar *XMLCALL xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len)
Definition: dict.c:1007
XMLPUBFUN xmlDictPtr XMLCALL xmlDictCreate(void)
Definition: dict.c:577
XMLPUBFUN void XMLCALL xmlDictFree(xmlDictPtr dict)
Definition: dict.c:802
XMLPUBFUN int XMLCALL xmlDictReference(xmlDictPtr dict)
Definition: dict.c:647
XMLPUBVAR xmlMallocFunc xmlMallocAtomic
Definition: globals.h:249
XMLPUBVAR xmlMallocFunc xmlMalloc
Definition: globals.h:248
XMLPUBVAR xmlFreeFunc xmlFree
Definition: globals.h:251
XMLPUBVAR xmlReallocFunc xmlRealloc
Definition: globals.h:250
XMLPUBFUN void XMLCALL xmlBufferWriteChar(xmlBufferPtr buf, const char *string)
XMLPUBFUN void XMLCALL xmlBufferWriteCHAR(xmlBufferPtr buf, const xmlChar *string)
#define NEXTL(l)
Definition: parser.c:2156
#define CUR_SCHAR(s, l)
Definition: parser.c:2164
#define memset(x, y, z)
Definition: compat.h:39
SOCKET WSAAPI accept(IN SOCKET s, OUT LPSOCKADDR addr, OUT INT FAR *addrlen)
Definition: socklife.c:23
CardRegion * from
Definition: spigame.cpp:19
Definition: dict.c:111
DWORD status
Definition: pdh_main.c:106
Definition: query.h:87
int type
Definition: query.h:88
Definition: parser.c:44
Definition: copy.c:22
Definition: name.c:39
Definition: send.c:48
Definition: ps.c:97
#define max(a, b)
Definition: svc.c:63
Definition: pdh_main.c:94
int ret
#define snprintf
Definition: wintirpc.h:48
@ XML_ERR_FATAL
Definition: xmlerror.h:28
@ XML_FROM_REGEXP
Definition: xmlerror.h:51
@ XML_REGEXP_COMPILE_ERROR
Definition: xmlerror.h:417
@ XML_ERR_NO_MEMORY
Definition: xmlerror.h:102
static int insert
Definition: xmllint.c:138
XMLPUBFUN const xmlChar *XMLCALL xmlStrchr(const xmlChar *str, xmlChar val)
Definition: xmlstring.c:325
XMLPUBFUN xmlChar *XMLCALL xmlStrndup(const xmlChar *cur, int len)
Definition: xmlstring.c:42
XMLPUBFUN xmlChar *XMLCALL xmlStrdup(const xmlChar *cur)
Definition: xmlstring.c:67
#define BAD_CAST
Definition: xmlstring.h:35
XMLPUBFUN int XMLCALL xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
Definition: xmlstring.c:160
unsigned char xmlChar
Definition: xmlstring.h:28