ReactOS 0.4.16-dev-2232-gc2aaa52
xmlregexp.c
Go to the documentation of this file.
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/schemas mechanisms now available in
6 * XML related specifications these include:
7 * - XML-1.0 DTD validation
8 * - XML Schemas structure part 1
9 * - XML Schemas Datatypes part 2 especially Appendix F
10 * - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22#include <stdio.h>
23#include <string.h>
24#include <limits.h>
25
26#include <libxml/tree.h>
28#include <libxml/xmlregexp.h>
29#include <libxml/xmlautomata.h>
30#include <libxml/xmlunicode.h>
31
32#include "private/error.h"
33#include "private/regexp.h"
34
35#ifndef SIZE_MAX
36#define SIZE_MAX ((size_t) -1)
37#endif
38
39#define MAX_PUSH 10000000
40
41/*
42 * -2 and -3 are used by xmlValidateElementType for other things.
43 */
44#define XML_REGEXP_OK 0
45#define XML_REGEXP_NOT_FOUND (-1)
46#define XML_REGEXP_INTERNAL_ERROR (-4)
47#define XML_REGEXP_OUT_OF_MEMORY (-5)
48#define XML_REGEXP_INTERNAL_LIMIT (-6)
49#define XML_REGEXP_INVALID_UTF8 (-7)
50
51#ifdef ERROR
52#undef ERROR
53#endif
54#define ERROR(str) \
55 ctxt->error = XML_REGEXP_COMPILE_ERROR; \
56 xmlRegexpErrCompile(ctxt, str);
57#define NEXT ctxt->cur++
58#define CUR (*(ctxt->cur))
59#define NXT(index) (ctxt->cur[index])
60
61#define NEXTL(l) ctxt->cur += l;
62#define XML_REG_STRING_SEPARATOR '|'
63/*
64 * Need PREV to check on a '-' within a Character Group. May only be used
65 * when it's guaranteed that cur is not at the beginning of ctxt->string!
66 */
67#define PREV (ctxt->cur[-1])
68
74#define TODO \
75 xmlGenericError(xmlGenericErrorContext, \
76 "Unimplemented block at %s:%d\n", \
77 __FILE__, __LINE__);
78
79/************************************************************************
80 * *
81 * Datatypes and structures *
82 * *
83 ************************************************************************/
84
85/*
86 * Note: the order of the enums below is significant, do not shuffle
87 */
88typedef enum {
89 XML_REGEXP_EPSILON = 1,
90 XML_REGEXP_CHARVAL,
91 XML_REGEXP_RANGES,
92 XML_REGEXP_SUBREG, /* used for () sub regexps */
93 XML_REGEXP_STRING,
94 XML_REGEXP_ANYCHAR, /* . */
95 XML_REGEXP_ANYSPACE, /* \s */
96 XML_REGEXP_NOTSPACE, /* \S */
97 XML_REGEXP_INITNAME, /* \l */
98 XML_REGEXP_NOTINITNAME, /* \L */
99 XML_REGEXP_NAMECHAR, /* \c */
100 XML_REGEXP_NOTNAMECHAR, /* \C */
101 XML_REGEXP_DECIMAL, /* \d */
102 XML_REGEXP_NOTDECIMAL, /* \D */
103 XML_REGEXP_REALCHAR, /* \w */
104 XML_REGEXP_NOTREALCHAR, /* \W */
105 XML_REGEXP_LETTER = 100,
106 XML_REGEXP_LETTER_UPPERCASE,
107 XML_REGEXP_LETTER_LOWERCASE,
108 XML_REGEXP_LETTER_TITLECASE,
109 XML_REGEXP_LETTER_MODIFIER,
110 XML_REGEXP_LETTER_OTHERS,
111 XML_REGEXP_MARK,
112 XML_REGEXP_MARK_NONSPACING,
113 XML_REGEXP_MARK_SPACECOMBINING,
114 XML_REGEXP_MARK_ENCLOSING,
115 XML_REGEXP_NUMBER,
116 XML_REGEXP_NUMBER_DECIMAL,
117 XML_REGEXP_NUMBER_LETTER,
118 XML_REGEXP_NUMBER_OTHERS,
119 XML_REGEXP_PUNCT,
120 XML_REGEXP_PUNCT_CONNECTOR,
121 XML_REGEXP_PUNCT_DASH,
122 XML_REGEXP_PUNCT_OPEN,
123 XML_REGEXP_PUNCT_CLOSE,
124 XML_REGEXP_PUNCT_INITQUOTE,
125 XML_REGEXP_PUNCT_FINQUOTE,
126 XML_REGEXP_PUNCT_OTHERS,
127 XML_REGEXP_SEPAR,
128 XML_REGEXP_SEPAR_SPACE,
129 XML_REGEXP_SEPAR_LINE,
130 XML_REGEXP_SEPAR_PARA,
131 XML_REGEXP_SYMBOL,
132 XML_REGEXP_SYMBOL_MATH,
133 XML_REGEXP_SYMBOL_CURRENCY,
134 XML_REGEXP_SYMBOL_MODIFIER,
135 XML_REGEXP_SYMBOL_OTHERS,
136 XML_REGEXP_OTHER,
137 XML_REGEXP_OTHER_CONTROL,
138 XML_REGEXP_OTHER_FORMAT,
139 XML_REGEXP_OTHER_PRIVATE,
140 XML_REGEXP_OTHER_NA,
141 XML_REGEXP_BLOCK_NAME
142} xmlRegAtomType;
143
144typedef enum {
145 XML_REGEXP_QUANT_EPSILON = 1,
146 XML_REGEXP_QUANT_ONCE,
147 XML_REGEXP_QUANT_OPT,
148 XML_REGEXP_QUANT_MULT,
149 XML_REGEXP_QUANT_PLUS,
150 XML_REGEXP_QUANT_ONCEONLY,
151 XML_REGEXP_QUANT_ALL,
152 XML_REGEXP_QUANT_RANGE
153} xmlRegQuantType;
154
155typedef enum {
156 XML_REGEXP_START_STATE = 1,
157 XML_REGEXP_FINAL_STATE,
158 XML_REGEXP_TRANS_STATE,
159 XML_REGEXP_SINK_STATE,
160 XML_REGEXP_UNREACH_STATE
161} xmlRegStateType;
162
163typedef enum {
164 XML_REGEXP_MARK_NORMAL = 0,
165 XML_REGEXP_MARK_START,
166 XML_REGEXP_MARK_VISITED
167} xmlRegMarkedType;
168
169typedef struct _xmlRegRange xmlRegRange;
170typedef xmlRegRange *xmlRegRangePtr;
171
172struct _xmlRegRange {
173 int neg; /* 0 normal, 1 not, 2 exclude */
174 xmlRegAtomType type;
175 int start;
176 int end;
177 xmlChar *blockName;
178};
179
180typedef struct _xmlRegAtom xmlRegAtom;
181typedef xmlRegAtom *xmlRegAtomPtr;
182
183typedef struct _xmlAutomataState xmlRegState;
184typedef xmlRegState *xmlRegStatePtr;
185
186struct _xmlRegAtom {
187 int no;
188 xmlRegAtomType type;
189 xmlRegQuantType quant;
190 int min;
191 int max;
192
193 void *valuep;
194 void *valuep2;
195 int neg;
196 int codepoint;
197 xmlRegStatePtr start;
198 xmlRegStatePtr start0;
199 xmlRegStatePtr stop;
200 int maxRanges;
201 int nbRanges;
202 xmlRegRangePtr *ranges;
203 void *data;
204};
205
206typedef struct _xmlRegCounter xmlRegCounter;
207typedef xmlRegCounter *xmlRegCounterPtr;
208
209struct _xmlRegCounter {
210 int min;
211 int max;
212};
213
214typedef struct _xmlRegTrans xmlRegTrans;
215typedef xmlRegTrans *xmlRegTransPtr;
216
217struct _xmlRegTrans {
218 xmlRegAtomPtr atom;
219 int to;
220 int counter;
221 int count;
222 int nd;
223};
224
225struct _xmlAutomataState {
226 xmlRegStateType type;
227 xmlRegMarkedType mark;
228 xmlRegMarkedType markd;
229 xmlRegMarkedType reached;
230 int no;
231 int maxTrans;
232 int nbTrans;
233 xmlRegTrans *trans;
234 /* knowing states pointing to us can speed things up */
235 int maxTransTo;
236 int nbTransTo;
237 int *transTo;
238};
239
240typedef struct _xmlAutomata xmlRegParserCtxt;
241typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
242
243#define AM_AUTOMATA_RNG 1
244
245struct _xmlAutomata {
247 xmlChar *cur;
248
249 int error;
250 int neg;
251
252 xmlRegStatePtr start;
253 xmlRegStatePtr end;
254 xmlRegStatePtr state;
255
256 xmlRegAtomPtr atom;
257
258 int maxAtoms;
259 int nbAtoms;
260 xmlRegAtomPtr *atoms;
261
262 int maxStates;
263 int nbStates;
264 xmlRegStatePtr *states;
265
266 int maxCounters;
267 int nbCounters;
268 xmlRegCounter *counters;
269
270 int determinist;
271 int negs;
272 int flags;
273
274 int depth;
275};
276
277struct _xmlRegexp {
279 int nbStates;
280 xmlRegStatePtr *states;
281 int nbAtoms;
282 xmlRegAtomPtr *atoms;
283 int nbCounters;
284 xmlRegCounter *counters;
285 int determinist;
286 int flags;
287 /*
288 * That's the compact form for determinists automatas
289 */
290 int nbstates;
291 int *compact;
292 void **transdata;
293 int nbstrings;
294 xmlChar **stringMap;
295};
296
297typedef struct _xmlRegExecRollback xmlRegExecRollback;
298typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
299
300struct _xmlRegExecRollback {
301 xmlRegStatePtr state;/* the current state */
302 int index; /* the index in the input stack */
303 int nextbranch; /* the next transition to explore in that state */
304 int *counts; /* save the automata state if it has some */
305};
306
307typedef struct _xmlRegInputToken xmlRegInputToken;
308typedef xmlRegInputToken *xmlRegInputTokenPtr;
309
310struct _xmlRegInputToken {
311 xmlChar *value;
312 void *data;
313};
314
315struct _xmlRegExecCtxt {
316 int status; /* execution status != 0 indicate an error */
317 int determinist; /* did we find an indeterministic behaviour */
318 xmlRegexpPtr comp; /* the compiled regexp */
319 xmlRegExecCallbacks callback;
320 void *data;
321
322 xmlRegStatePtr state;/* the current state */
323 int transno; /* the current transition on that state */
324 int transcount; /* the number of chars in char counted transitions */
325
326 /*
327 * A stack of rollback states
328 */
329 int maxRollbacks;
330 int nbRollbacks;
331 xmlRegExecRollback *rollbacks;
332
333 /*
334 * The state of the automata if any
335 */
336 int *counts;
337
338 /*
339 * The input stack
340 */
341 int inputStackMax;
342 int inputStackNr;
343 int index;
344 int *charStack;
345 const xmlChar *inputString; /* when operating on characters */
346 xmlRegInputTokenPtr inputStack;/* when operating on strings */
347
348 /*
349 * error handling
350 */
351 int errStateNo; /* the error state number */
352 xmlRegStatePtr errState; /* the error state */
353 xmlChar *errString; /* the string raising the error */
354 int *errCounts; /* counters at the error state */
355 int nbPush;
356};
357
358#define REGEXP_ALL_COUNTER 0x123456
359#define REGEXP_ALL_LAX_COUNTER 0x123457
360
361static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
362static void xmlRegFreeState(xmlRegStatePtr state);
363static void xmlRegFreeAtom(xmlRegAtomPtr atom);
364static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
365static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
366static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
367 int neg, int start, int end, const xmlChar *blockName);
368
369/************************************************************************
370 * *
371 * Regexp memory error handler *
372 * *
373 ************************************************************************/
380static void
381xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
382{
383 const char *regexp = NULL;
384 if (ctxt != NULL) {
385 regexp = (const char *) ctxt->string;
386 ctxt->error = XML_ERR_NO_MEMORY;
387 }
390 regexp, NULL, 0, 0,
391 "Memory allocation failed : %s\n", extra);
392}
393
400static void
401xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
402{
403 const char *regexp = NULL;
404 int idx = 0;
405
406 if (ctxt != NULL) {
407 regexp = (const char *) ctxt->string;
408 idx = ctxt->cur - ctxt->string;
409 ctxt->error = XML_REGEXP_COMPILE_ERROR;
410 }
413 regexp, NULL, idx, 0,
414 "failed to compile: %s\n", extra);
415}
416
417/************************************************************************
418 * *
419 * Allocation/Deallocation *
420 * *
421 ************************************************************************/
422
423static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
424
435static void*
436xmlRegCalloc2(size_t dim1, size_t dim2, size_t elemSize) {
437 size_t totalSize;
438 void *ret;
439
440 /* Check for overflow */
441 if ((dim2 == 0) || (elemSize == 0) ||
442 (dim1 > SIZE_MAX / dim2 / elemSize))
443 return (NULL);
444 totalSize = dim1 * dim2 * elemSize;
445 ret = xmlMalloc(totalSize);
446 if (ret != NULL)
447 memset(ret, 0, totalSize);
448 return (ret);
449}
450
459static xmlRegexpPtr
460xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
461 xmlRegexpPtr ret;
462
463 ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
464 if (ret == NULL) {
465 xmlRegexpErrMemory(ctxt, "compiling regexp");
466 return(NULL);
467 }
468 memset(ret, 0, sizeof(xmlRegexp));
469 ret->string = ctxt->string;
470 ret->nbStates = ctxt->nbStates;
471 ret->states = ctxt->states;
472 ret->nbAtoms = ctxt->nbAtoms;
473 ret->atoms = ctxt->atoms;
474 ret->nbCounters = ctxt->nbCounters;
475 ret->counters = ctxt->counters;
476 ret->determinist = ctxt->determinist;
477 ret->flags = ctxt->flags;
478 if (ret->determinist == -1) {
479 if (xmlRegexpIsDeterminist(ret) < 0) {
480 xmlRegexpErrMemory(ctxt, "checking determinism");
481 xmlFree(ret);
482 return(NULL);
483 }
484 }
485
486 if ((ret->determinist != 0) &&
487 (ret->nbCounters == 0) &&
488 (ctxt->negs == 0) &&
489 (ret->atoms != NULL) &&
490 (ret->atoms[0] != NULL) &&
491 (ret->atoms[0]->type == XML_REGEXP_STRING)) {
492 int i, j, nbstates = 0, nbatoms = 0;
493 int *stateRemap;
494 int *stringRemap;
495 int *transitions;
496 void **transdata;
497 xmlChar **stringMap;
498 xmlChar *value;
499
500 /*
501 * Switch to a compact representation
502 * 1/ counting the effective number of states left
503 * 2/ counting the unique number of atoms, and check that
504 * they are all of the string type
505 * 3/ build a table state x atom for the transitions
506 */
507
508 stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
509 if (stateRemap == NULL) {
510 xmlRegexpErrMemory(ctxt, "compiling regexp");
511 xmlFree(ret);
512 return(NULL);
513 }
514 for (i = 0;i < ret->nbStates;i++) {
515 if (ret->states[i] != NULL) {
516 stateRemap[i] = nbstates;
517 nbstates++;
518 } else {
519 stateRemap[i] = -1;
520 }
521 }
522 stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
523 if (stringMap == NULL) {
524 xmlRegexpErrMemory(ctxt, "compiling regexp");
525 xmlFree(stateRemap);
526 xmlFree(ret);
527 return(NULL);
528 }
529 stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
530 if (stringRemap == NULL) {
531 xmlRegexpErrMemory(ctxt, "compiling regexp");
532 xmlFree(stringMap);
533 xmlFree(stateRemap);
534 xmlFree(ret);
535 return(NULL);
536 }
537 for (i = 0;i < ret->nbAtoms;i++) {
538 if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
539 (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
540 value = ret->atoms[i]->valuep;
541 for (j = 0;j < nbatoms;j++) {
542 if (xmlStrEqual(stringMap[j], value)) {
543 stringRemap[i] = j;
544 break;
545 }
546 }
547 if (j >= nbatoms) {
548 stringRemap[i] = nbatoms;
549 stringMap[nbatoms] = xmlStrdup(value);
550 if (stringMap[nbatoms] == NULL) {
551 for (i = 0;i < nbatoms;i++)
552 xmlFree(stringMap[i]);
553 xmlFree(stringRemap);
554 xmlFree(stringMap);
555 xmlFree(stateRemap);
556 xmlFree(ret);
557 return(NULL);
558 }
559 nbatoms++;
560 }
561 } else {
562 xmlFree(stateRemap);
563 xmlFree(stringRemap);
564 for (i = 0;i < nbatoms;i++)
565 xmlFree(stringMap[i]);
566 xmlFree(stringMap);
567 xmlFree(ret);
568 return(NULL);
569 }
570 }
571 transitions = (int *) xmlRegCalloc2(nbstates + 1, nbatoms + 1,
572 sizeof(int));
573 if (transitions == NULL) {
574 xmlFree(stateRemap);
575 xmlFree(stringRemap);
576 for (i = 0;i < nbatoms;i++)
577 xmlFree(stringMap[i]);
578 xmlFree(stringMap);
579 xmlFree(ret);
580 return(NULL);
581 }
582
583 /*
584 * Allocate the transition table. The first entry for each
585 * state corresponds to the state type.
586 */
587 transdata = NULL;
588
589 for (i = 0;i < ret->nbStates;i++) {
590 int stateno, atomno, targetno, prev;
591 xmlRegStatePtr state;
592 xmlRegTransPtr trans;
593
594 stateno = stateRemap[i];
595 if (stateno == -1)
596 continue;
597 state = ret->states[i];
598
599 transitions[stateno * (nbatoms + 1)] = state->type;
600
601 for (j = 0;j < state->nbTrans;j++) {
602 trans = &(state->trans[j]);
603 if ((trans->to < 0) || (trans->atom == NULL))
604 continue;
605 atomno = stringRemap[trans->atom->no];
606 if ((trans->atom->data != NULL) && (transdata == NULL)) {
607 transdata = (void **) xmlRegCalloc2(nbstates, nbatoms,
608 sizeof(void *));
609 if (transdata == NULL) {
610 xmlRegexpErrMemory(ctxt, "compiling regexp");
611 break;
612 }
613 }
614 targetno = stateRemap[trans->to];
615 /*
616 * if the same atom can generate transitions to 2 different
617 * states then it means the automata is not deterministic and
618 * the compact form can't be used !
619 */
620 prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
621 if (prev != 0) {
622 if (prev != targetno + 1) {
623 ret->determinist = 0;
624 if (transdata != NULL)
625 xmlFree(transdata);
626 xmlFree(transitions);
627 xmlFree(stateRemap);
628 xmlFree(stringRemap);
629 for (i = 0;i < nbatoms;i++)
630 xmlFree(stringMap[i]);
631 xmlFree(stringMap);
632 goto not_determ;
633 }
634 } else {
635#if 0
636 printf("State %d trans %d: atom %d to %d : %d to %d\n",
637 i, j, trans->atom->no, trans->to, atomno, targetno);
638#endif
639 transitions[stateno * (nbatoms + 1) + atomno + 1] =
640 targetno + 1; /* to avoid 0 */
641 if (transdata != NULL)
642 transdata[stateno * nbatoms + atomno] =
643 trans->atom->data;
644 }
645 }
646 }
647 ret->determinist = 1;
648 /*
649 * Cleanup of the old data
650 */
651 if (ret->states != NULL) {
652 for (i = 0;i < ret->nbStates;i++)
653 xmlRegFreeState(ret->states[i]);
654 xmlFree(ret->states);
655 }
656 ret->states = NULL;
657 ret->nbStates = 0;
658 if (ret->atoms != NULL) {
659 for (i = 0;i < ret->nbAtoms;i++)
660 xmlRegFreeAtom(ret->atoms[i]);
661 xmlFree(ret->atoms);
662 }
663 ret->atoms = NULL;
664 ret->nbAtoms = 0;
665
666 ret->compact = transitions;
667 ret->transdata = transdata;
668 ret->stringMap = stringMap;
669 ret->nbstrings = nbatoms;
670 ret->nbstates = nbstates;
671 xmlFree(stateRemap);
672 xmlFree(stringRemap);
673 }
674not_determ:
675 ctxt->string = NULL;
676 ctxt->nbStates = 0;
677 ctxt->states = NULL;
678 ctxt->nbAtoms = 0;
679 ctxt->atoms = NULL;
680 ctxt->nbCounters = 0;
681 ctxt->counters = NULL;
682 return(ret);
683}
684
693static xmlRegParserCtxtPtr
694xmlRegNewParserCtxt(const xmlChar *string) {
695 xmlRegParserCtxtPtr ret;
696
697 ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
698 if (ret == NULL)
699 return(NULL);
700 memset(ret, 0, sizeof(xmlRegParserCtxt));
701 if (string != NULL)
702 ret->string = xmlStrdup(string);
703 ret->cur = ret->string;
704 ret->neg = 0;
705 ret->negs = 0;
706 ret->error = 0;
707 ret->determinist = -1;
708 return(ret);
709}
710
723static xmlRegRangePtr
724xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
725 int neg, xmlRegAtomType type, int start, int end) {
726 xmlRegRangePtr ret;
727
728 ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
729 if (ret == NULL) {
730 xmlRegexpErrMemory(ctxt, "allocating range");
731 return(NULL);
732 }
733 ret->neg = neg;
734 ret->type = type;
735 ret->start = start;
736 ret->end = end;
737 return(ret);
738}
739
746static void
747xmlRegFreeRange(xmlRegRangePtr range) {
748 if (range == NULL)
749 return;
750
751 if (range->blockName != NULL)
752 xmlFree(range->blockName);
753 xmlFree(range);
754}
755
764static xmlRegRangePtr
765xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
766 xmlRegRangePtr ret;
767
768 if (range == NULL)
769 return(NULL);
770
771 ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
772 range->end);
773 if (ret == NULL)
774 return(NULL);
775 if (range->blockName != NULL) {
776 ret->blockName = xmlStrdup(range->blockName);
777 if (ret->blockName == NULL) {
778 xmlRegexpErrMemory(ctxt, "allocating range");
779 xmlRegFreeRange(ret);
780 return(NULL);
781 }
782 }
783 return(ret);
784}
785
795static xmlRegAtomPtr
796xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
797 xmlRegAtomPtr ret;
798
799 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
800 if (ret == NULL) {
801 xmlRegexpErrMemory(ctxt, "allocating atom");
802 return(NULL);
803 }
804 memset(ret, 0, sizeof(xmlRegAtom));
805 ret->type = type;
806 ret->quant = XML_REGEXP_QUANT_ONCE;
807 ret->min = 0;
808 ret->max = 0;
809 return(ret);
810}
811
818static void
819xmlRegFreeAtom(xmlRegAtomPtr atom) {
820 int i;
821
822 if (atom == NULL)
823 return;
824
825 for (i = 0;i < atom->nbRanges;i++)
826 xmlRegFreeRange(atom->ranges[i]);
827 if (atom->ranges != NULL)
828 xmlFree(atom->ranges);
829 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
830 xmlFree(atom->valuep);
831 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
832 xmlFree(atom->valuep2);
833 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
834 xmlFree(atom->valuep);
835 xmlFree(atom);
836}
837
847static xmlRegAtomPtr
848xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
849 xmlRegAtomPtr ret;
850
851 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
852 if (ret == NULL) {
853 xmlRegexpErrMemory(ctxt, "copying atom");
854 return(NULL);
855 }
856 memset(ret, 0, sizeof(xmlRegAtom));
857 ret->type = atom->type;
858 ret->quant = atom->quant;
859 ret->min = atom->min;
860 ret->max = atom->max;
861 if (atom->nbRanges > 0) {
862 int i;
863
864 ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
865 atom->nbRanges);
866 if (ret->ranges == NULL) {
867 xmlRegexpErrMemory(ctxt, "copying atom");
868 goto error;
869 }
870 for (i = 0;i < atom->nbRanges;i++) {
871 ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
872 if (ret->ranges[i] == NULL)
873 goto error;
874 ret->nbRanges = i + 1;
875 }
876 }
877 return(ret);
878
879error:
880 xmlRegFreeAtom(ret);
881 return(NULL);
882}
883
884static xmlRegStatePtr
885xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
886 xmlRegStatePtr ret;
887
888 ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
889 if (ret == NULL) {
890 xmlRegexpErrMemory(ctxt, "allocating state");
891 return(NULL);
892 }
893 memset(ret, 0, sizeof(xmlRegState));
894 ret->type = XML_REGEXP_TRANS_STATE;
895 ret->mark = XML_REGEXP_MARK_NORMAL;
896 return(ret);
897}
898
905static void
906xmlRegFreeState(xmlRegStatePtr state) {
907 if (state == NULL)
908 return;
909
910 if (state->trans != NULL)
911 xmlFree(state->trans);
912 if (state->transTo != NULL)
913 xmlFree(state->transTo);
914 xmlFree(state);
915}
916
923static void
924xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
925 int i;
926 if (ctxt == NULL)
927 return;
928
929 if (ctxt->string != NULL)
930 xmlFree(ctxt->string);
931 if (ctxt->states != NULL) {
932 for (i = 0;i < ctxt->nbStates;i++)
933 xmlRegFreeState(ctxt->states[i]);
934 xmlFree(ctxt->states);
935 }
936 if (ctxt->atoms != NULL) {
937 for (i = 0;i < ctxt->nbAtoms;i++)
938 xmlRegFreeAtom(ctxt->atoms[i]);
939 xmlFree(ctxt->atoms);
940 }
941 if (ctxt->counters != NULL)
942 xmlFree(ctxt->counters);
943 xmlFree(ctxt);
944}
945
946/************************************************************************
947 * *
948 * Display of Data structures *
949 * *
950 ************************************************************************/
951
952static void
953xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
954 switch (type) {
955 case XML_REGEXP_EPSILON:
956 fprintf(output, "epsilon "); break;
957 case XML_REGEXP_CHARVAL:
958 fprintf(output, "charval "); break;
959 case XML_REGEXP_RANGES:
960 fprintf(output, "ranges "); break;
961 case XML_REGEXP_SUBREG:
962 fprintf(output, "subexpr "); break;
963 case XML_REGEXP_STRING:
964 fprintf(output, "string "); break;
965 case XML_REGEXP_ANYCHAR:
966 fprintf(output, "anychar "); break;
967 case XML_REGEXP_ANYSPACE:
968 fprintf(output, "anyspace "); break;
969 case XML_REGEXP_NOTSPACE:
970 fprintf(output, "notspace "); break;
971 case XML_REGEXP_INITNAME:
972 fprintf(output, "initname "); break;
973 case XML_REGEXP_NOTINITNAME:
974 fprintf(output, "notinitname "); break;
975 case XML_REGEXP_NAMECHAR:
976 fprintf(output, "namechar "); break;
977 case XML_REGEXP_NOTNAMECHAR:
978 fprintf(output, "notnamechar "); break;
979 case XML_REGEXP_DECIMAL:
980 fprintf(output, "decimal "); break;
981 case XML_REGEXP_NOTDECIMAL:
982 fprintf(output, "notdecimal "); break;
983 case XML_REGEXP_REALCHAR:
984 fprintf(output, "realchar "); break;
985 case XML_REGEXP_NOTREALCHAR:
986 fprintf(output, "notrealchar "); break;
987 case XML_REGEXP_LETTER:
988 fprintf(output, "LETTER "); break;
989 case XML_REGEXP_LETTER_UPPERCASE:
990 fprintf(output, "LETTER_UPPERCASE "); break;
991 case XML_REGEXP_LETTER_LOWERCASE:
992 fprintf(output, "LETTER_LOWERCASE "); break;
993 case XML_REGEXP_LETTER_TITLECASE:
994 fprintf(output, "LETTER_TITLECASE "); break;
995 case XML_REGEXP_LETTER_MODIFIER:
996 fprintf(output, "LETTER_MODIFIER "); break;
997 case XML_REGEXP_LETTER_OTHERS:
998 fprintf(output, "LETTER_OTHERS "); break;
999 case XML_REGEXP_MARK:
1000 fprintf(output, "MARK "); break;
1001 case XML_REGEXP_MARK_NONSPACING:
1002 fprintf(output, "MARK_NONSPACING "); break;
1003 case XML_REGEXP_MARK_SPACECOMBINING:
1004 fprintf(output, "MARK_SPACECOMBINING "); break;
1005 case XML_REGEXP_MARK_ENCLOSING:
1006 fprintf(output, "MARK_ENCLOSING "); break;
1007 case XML_REGEXP_NUMBER:
1008 fprintf(output, "NUMBER "); break;
1009 case XML_REGEXP_NUMBER_DECIMAL:
1010 fprintf(output, "NUMBER_DECIMAL "); break;
1011 case XML_REGEXP_NUMBER_LETTER:
1012 fprintf(output, "NUMBER_LETTER "); break;
1013 case XML_REGEXP_NUMBER_OTHERS:
1014 fprintf(output, "NUMBER_OTHERS "); break;
1015 case XML_REGEXP_PUNCT:
1016 fprintf(output, "PUNCT "); break;
1017 case XML_REGEXP_PUNCT_CONNECTOR:
1018 fprintf(output, "PUNCT_CONNECTOR "); break;
1019 case XML_REGEXP_PUNCT_DASH:
1020 fprintf(output, "PUNCT_DASH "); break;
1021 case XML_REGEXP_PUNCT_OPEN:
1022 fprintf(output, "PUNCT_OPEN "); break;
1023 case XML_REGEXP_PUNCT_CLOSE:
1024 fprintf(output, "PUNCT_CLOSE "); break;
1025 case XML_REGEXP_PUNCT_INITQUOTE:
1026 fprintf(output, "PUNCT_INITQUOTE "); break;
1027 case XML_REGEXP_PUNCT_FINQUOTE:
1028 fprintf(output, "PUNCT_FINQUOTE "); break;
1029 case XML_REGEXP_PUNCT_OTHERS:
1030 fprintf(output, "PUNCT_OTHERS "); break;
1031 case XML_REGEXP_SEPAR:
1032 fprintf(output, "SEPAR "); break;
1033 case XML_REGEXP_SEPAR_SPACE:
1034 fprintf(output, "SEPAR_SPACE "); break;
1035 case XML_REGEXP_SEPAR_LINE:
1036 fprintf(output, "SEPAR_LINE "); break;
1037 case XML_REGEXP_SEPAR_PARA:
1038 fprintf(output, "SEPAR_PARA "); break;
1039 case XML_REGEXP_SYMBOL:
1040 fprintf(output, "SYMBOL "); break;
1041 case XML_REGEXP_SYMBOL_MATH:
1042 fprintf(output, "SYMBOL_MATH "); break;
1043 case XML_REGEXP_SYMBOL_CURRENCY:
1044 fprintf(output, "SYMBOL_CURRENCY "); break;
1045 case XML_REGEXP_SYMBOL_MODIFIER:
1046 fprintf(output, "SYMBOL_MODIFIER "); break;
1047 case XML_REGEXP_SYMBOL_OTHERS:
1048 fprintf(output, "SYMBOL_OTHERS "); break;
1049 case XML_REGEXP_OTHER:
1050 fprintf(output, "OTHER "); break;
1051 case XML_REGEXP_OTHER_CONTROL:
1052 fprintf(output, "OTHER_CONTROL "); break;
1053 case XML_REGEXP_OTHER_FORMAT:
1054 fprintf(output, "OTHER_FORMAT "); break;
1055 case XML_REGEXP_OTHER_PRIVATE:
1056 fprintf(output, "OTHER_PRIVATE "); break;
1057 case XML_REGEXP_OTHER_NA:
1058 fprintf(output, "OTHER_NA "); break;
1059 case XML_REGEXP_BLOCK_NAME:
1060 fprintf(output, "BLOCK "); break;
1061 }
1062}
1063
1064static void
1065xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1066 switch (type) {
1067 case XML_REGEXP_QUANT_EPSILON:
1068 fprintf(output, "epsilon "); break;
1069 case XML_REGEXP_QUANT_ONCE:
1070 fprintf(output, "once "); break;
1071 case XML_REGEXP_QUANT_OPT:
1072 fprintf(output, "? "); break;
1073 case XML_REGEXP_QUANT_MULT:
1074 fprintf(output, "* "); break;
1075 case XML_REGEXP_QUANT_PLUS:
1076 fprintf(output, "+ "); break;
1077 case XML_REGEXP_QUANT_RANGE:
1078 fprintf(output, "range "); break;
1079 case XML_REGEXP_QUANT_ONCEONLY:
1080 fprintf(output, "onceonly "); break;
1081 case XML_REGEXP_QUANT_ALL:
1082 fprintf(output, "all "); break;
1083 }
1084}
1085static void
1086xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1087 fprintf(output, " range: ");
1088 if (range->neg)
1089 fprintf(output, "negative ");
1090 xmlRegPrintAtomType(output, range->type);
1091 fprintf(output, "%c - %c\n", range->start, range->end);
1092}
1093
1094static void
1095xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1096 fprintf(output, " atom: ");
1097 if (atom == NULL) {
1098 fprintf(output, "NULL\n");
1099 return;
1100 }
1101 if (atom->neg)
1102 fprintf(output, "not ");
1103 xmlRegPrintAtomType(output, atom->type);
1104 xmlRegPrintQuantType(output, atom->quant);
1105 if (atom->quant == XML_REGEXP_QUANT_RANGE)
1106 fprintf(output, "%d-%d ", atom->min, atom->max);
1107 if (atom->type == XML_REGEXP_STRING)
1108 fprintf(output, "'%s' ", (char *) atom->valuep);
1109 if (atom->type == XML_REGEXP_CHARVAL)
1110 fprintf(output, "char %c\n", atom->codepoint);
1111 else if (atom->type == XML_REGEXP_RANGES) {
1112 int i;
1113 fprintf(output, "%d entries\n", atom->nbRanges);
1114 for (i = 0; i < atom->nbRanges;i++)
1115 xmlRegPrintRange(output, atom->ranges[i]);
1116 } else if (atom->type == XML_REGEXP_SUBREG) {
1117 fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1118 } else {
1119 fprintf(output, "\n");
1120 }
1121}
1122
1123static void
1124xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1125 fprintf(output, " trans: ");
1126 if (trans == NULL) {
1127 fprintf(output, "NULL\n");
1128 return;
1129 }
1130 if (trans->to < 0) {
1131 fprintf(output, "removed\n");
1132 return;
1133 }
1134 if (trans->nd != 0) {
1135 if (trans->nd == 2)
1136 fprintf(output, "last not determinist, ");
1137 else
1138 fprintf(output, "not determinist, ");
1139 }
1140 if (trans->counter >= 0) {
1141 fprintf(output, "counted %d, ", trans->counter);
1142 }
1143 if (trans->count == REGEXP_ALL_COUNTER) {
1144 fprintf(output, "all transition, ");
1145 } else if (trans->count >= 0) {
1146 fprintf(output, "count based %d, ", trans->count);
1147 }
1148 if (trans->atom == NULL) {
1149 fprintf(output, "epsilon to %d\n", trans->to);
1150 return;
1151 }
1152 if (trans->atom->type == XML_REGEXP_CHARVAL)
1153 fprintf(output, "char %c ", trans->atom->codepoint);
1154 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1155}
1156
1157static void
1158xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1159 int i;
1160
1161 fprintf(output, " state: ");
1162 if (state == NULL) {
1163 fprintf(output, "NULL\n");
1164 return;
1165 }
1166 if (state->type == XML_REGEXP_START_STATE)
1167 fprintf(output, "START ");
1168 if (state->type == XML_REGEXP_FINAL_STATE)
1169 fprintf(output, "FINAL ");
1170
1171 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1172 for (i = 0;i < state->nbTrans; i++) {
1173 xmlRegPrintTrans(output, &(state->trans[i]));
1174 }
1175}
1176
1177/************************************************************************
1178 * *
1179 * Finite Automata structures manipulations *
1180 * *
1181 ************************************************************************/
1182
1183static xmlRegRangePtr
1184xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1185 int neg, xmlRegAtomType type, int start, int end,
1186 xmlChar *blockName) {
1187 xmlRegRangePtr range;
1188
1189 if (atom == NULL) {
1190 ERROR("add range: atom is NULL");
1191 return(NULL);
1192 }
1193 if (atom->type != XML_REGEXP_RANGES) {
1194 ERROR("add range: atom is not ranges");
1195 return(NULL);
1196 }
1197 if (atom->maxRanges == 0) {
1198 atom->maxRanges = 4;
1199 atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1200 sizeof(xmlRegRangePtr));
1201 if (atom->ranges == NULL) {
1202 xmlRegexpErrMemory(ctxt, "adding ranges");
1203 atom->maxRanges = 0;
1204 return(NULL);
1205 }
1206 } else if (atom->nbRanges >= atom->maxRanges) {
1207 xmlRegRangePtr *tmp;
1208 atom->maxRanges *= 2;
1209 tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1210 sizeof(xmlRegRangePtr));
1211 if (tmp == NULL) {
1212 xmlRegexpErrMemory(ctxt, "adding ranges");
1213 atom->maxRanges /= 2;
1214 return(NULL);
1215 }
1216 atom->ranges = tmp;
1217 }
1218 range = xmlRegNewRange(ctxt, neg, type, start, end);
1219 if (range == NULL)
1220 return(NULL);
1221 range->blockName = blockName;
1222 atom->ranges[atom->nbRanges++] = range;
1223
1224 return(range);
1225}
1226
1227static int
1228xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1229 if (ctxt->maxCounters == 0) {
1230 ctxt->maxCounters = 4;
1231 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1232 sizeof(xmlRegCounter));
1233 if (ctxt->counters == NULL) {
1234 xmlRegexpErrMemory(ctxt, "allocating counter");
1235 ctxt->maxCounters = 0;
1236 return(-1);
1237 }
1238 } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1239 xmlRegCounter *tmp;
1240 ctxt->maxCounters *= 2;
1241 tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1242 sizeof(xmlRegCounter));
1243 if (tmp == NULL) {
1244 xmlRegexpErrMemory(ctxt, "allocating counter");
1245 ctxt->maxCounters /= 2;
1246 return(-1);
1247 }
1248 ctxt->counters = tmp;
1249 }
1250 ctxt->counters[ctxt->nbCounters].min = -1;
1251 ctxt->counters[ctxt->nbCounters].max = -1;
1252 return(ctxt->nbCounters++);
1253}
1254
1255static int
1256xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1257 if (atom == NULL) {
1258 ERROR("atom push: atom is NULL");
1259 return(-1);
1260 }
1261 if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1262 size_t newSize = ctxt->maxAtoms ? ctxt->maxAtoms * 2 : 4;
1263 xmlRegAtomPtr *tmp;
1264
1265 tmp = xmlRealloc(ctxt->atoms, newSize * sizeof(xmlRegAtomPtr));
1266 if (tmp == NULL) {
1267 xmlRegexpErrMemory(ctxt, "allocating counter");
1268 return(-1);
1269 }
1270 ctxt->atoms = tmp;
1271 ctxt->maxAtoms = newSize;
1272 }
1273 atom->no = ctxt->nbAtoms;
1274 ctxt->atoms[ctxt->nbAtoms++] = atom;
1275 return(0);
1276}
1277
1278static void
1279xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1280 int from) {
1281 if (target->maxTransTo == 0) {
1282 target->maxTransTo = 8;
1283 target->transTo = (int *) xmlMalloc(target->maxTransTo *
1284 sizeof(int));
1285 if (target->transTo == NULL) {
1286 xmlRegexpErrMemory(ctxt, "adding transition");
1287 target->maxTransTo = 0;
1288 return;
1289 }
1290 } else if (target->nbTransTo >= target->maxTransTo) {
1291 int *tmp;
1292 target->maxTransTo *= 2;
1293 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1294 sizeof(int));
1295 if (tmp == NULL) {
1296 xmlRegexpErrMemory(ctxt, "adding transition");
1297 target->maxTransTo /= 2;
1298 return;
1299 }
1300 target->transTo = tmp;
1301 }
1302 target->transTo[target->nbTransTo] = from;
1303 target->nbTransTo++;
1304}
1305
1306static void
1307xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1308 xmlRegAtomPtr atom, xmlRegStatePtr target,
1309 int counter, int count) {
1310
1311 int nrtrans;
1312
1313 if (state == NULL) {
1314 ERROR("add state: state is NULL");
1315 return;
1316 }
1317 if (target == NULL) {
1318 ERROR("add state: target is NULL");
1319 return;
1320 }
1321 /*
1322 * Other routines follow the philosophy 'When in doubt, add a transition'
1323 * so we check here whether such a transition is already present and, if
1324 * so, silently ignore this request.
1325 */
1326
1327 for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1328 xmlRegTransPtr trans = &(state->trans[nrtrans]);
1329 if ((trans->atom == atom) &&
1330 (trans->to == target->no) &&
1331 (trans->counter == counter) &&
1332 (trans->count == count)) {
1333 return;
1334 }
1335 }
1336
1337 if (state->maxTrans == 0) {
1338 state->maxTrans = 8;
1339 state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1340 sizeof(xmlRegTrans));
1341 if (state->trans == NULL) {
1342 xmlRegexpErrMemory(ctxt, "adding transition");
1343 state->maxTrans = 0;
1344 return;
1345 }
1346 } else if (state->nbTrans >= state->maxTrans) {
1347 xmlRegTrans *tmp;
1348 state->maxTrans *= 2;
1349 tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1350 sizeof(xmlRegTrans));
1351 if (tmp == NULL) {
1352 xmlRegexpErrMemory(ctxt, "adding transition");
1353 state->maxTrans /= 2;
1354 return;
1355 }
1356 state->trans = tmp;
1357 }
1358
1359 state->trans[state->nbTrans].atom = atom;
1360 state->trans[state->nbTrans].to = target->no;
1361 state->trans[state->nbTrans].counter = counter;
1362 state->trans[state->nbTrans].count = count;
1363 state->trans[state->nbTrans].nd = 0;
1364 state->nbTrans++;
1365 xmlRegStateAddTransTo(ctxt, target, state->no);
1366}
1367
1368static xmlRegStatePtr
1369xmlRegStatePush(xmlRegParserCtxtPtr ctxt) {
1370 xmlRegStatePtr state;
1371
1372 if (ctxt->nbStates >= ctxt->maxStates) {
1373 size_t newSize = ctxt->maxStates ? ctxt->maxStates * 2 : 4;
1374 xmlRegStatePtr *tmp;
1375
1376 tmp = xmlRealloc(ctxt->states, newSize * sizeof(tmp[0]));
1377 if (tmp == NULL) {
1378 xmlRegexpErrMemory(ctxt, "adding state");
1379 return(NULL);
1380 }
1381 ctxt->states = tmp;
1382 ctxt->maxStates = newSize;
1383 }
1384
1385 state = xmlRegNewState(ctxt);
1386 if (state == NULL)
1387 return(NULL);
1388
1389 state->no = ctxt->nbStates;
1390 ctxt->states[ctxt->nbStates++] = state;
1391
1392 return(state);
1393}
1394
1403static int
1404xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1405 xmlRegStatePtr from, xmlRegStatePtr to,
1406 int lax) {
1407 if (to == NULL) {
1408 to = xmlRegStatePush(ctxt);
1409 if (to == NULL)
1410 return(-1);
1411 ctxt->state = to;
1412 }
1413 if (lax)
1414 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1415 else
1416 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1417 return(0);
1418}
1419
1427static int
1428xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1429 xmlRegStatePtr from, xmlRegStatePtr to) {
1430 if (to == NULL) {
1431 to = xmlRegStatePush(ctxt);
1432 if (to == NULL)
1433 return(-1);
1434 ctxt->state = to;
1435 }
1436 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1437 return(0);
1438}
1439
1448static int
1449xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1450 xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1451 if (to == NULL) {
1452 to = xmlRegStatePush(ctxt);
1453 if (to == NULL)
1454 return(-1);
1455 ctxt->state = to;
1456 }
1457 xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1458 return(0);
1459}
1460
1469static int
1470xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1471 xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1472 if (to == NULL) {
1473 to = xmlRegStatePush(ctxt);
1474 if (to == NULL)
1475 return(-1);
1476 ctxt->state = to;
1477 }
1478 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1479 return(0);
1480}
1481
1491static int
1492xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1493 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1494 xmlRegStatePtr end;
1495 int nullable = 0;
1496
1497 if (atom == NULL) {
1498 ERROR("generate transition: atom == NULL");
1499 return(-1);
1500 }
1501 if (atom->type == XML_REGEXP_SUBREG) {
1502 /*
1503 * this is a subexpression handling one should not need to
1504 * create a new node except for XML_REGEXP_QUANT_RANGE.
1505 */
1506 if ((to != NULL) && (atom->stop != to) &&
1507 (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1508 /*
1509 * Generate an epsilon transition to link to the target
1510 */
1511 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1512#ifdef DV
1513 } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1514 (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1515 to = xmlRegStatePush(ctxt, to);
1516 if (to == NULL)
1517 return(-1);
1518 ctxt->state = to;
1519 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1520#endif
1521 }
1522 switch (atom->quant) {
1523 case XML_REGEXP_QUANT_OPT:
1524 atom->quant = XML_REGEXP_QUANT_ONCE;
1525 /*
1526 * transition done to the state after end of atom.
1527 * 1. set transition from atom start to new state
1528 * 2. set transition from atom end to this state.
1529 */
1530 if (to == NULL) {
1531 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1532 xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1533 ctxt->state);
1534 } else {
1535 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1536 }
1537 break;
1538 case XML_REGEXP_QUANT_MULT:
1539 atom->quant = XML_REGEXP_QUANT_ONCE;
1540 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1541 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1542 break;
1543 case XML_REGEXP_QUANT_PLUS:
1544 atom->quant = XML_REGEXP_QUANT_ONCE;
1545 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1546 break;
1547 case XML_REGEXP_QUANT_RANGE: {
1548 int counter;
1549 xmlRegStatePtr inter, newstate;
1550
1551 /*
1552 * create the final state now if needed
1553 */
1554 if (to != NULL) {
1555 newstate = to;
1556 } else {
1557 newstate = xmlRegStatePush(ctxt);
1558 if (newstate == NULL)
1559 return(-1);
1560 }
1561
1562 /*
1563 * The principle here is to use counted transition
1564 * to avoid explosion in the number of states in the
1565 * graph. This is clearly more complex but should not
1566 * be exploitable at runtime.
1567 */
1568 if ((atom->min == 0) && (atom->start0 == NULL)) {
1569 xmlRegAtomPtr copy;
1570 /*
1571 * duplicate a transition based on atom to count next
1572 * occurrences after 1. We cannot loop to atom->start
1573 * directly because we need an epsilon transition to
1574 * newstate.
1575 */
1576 /* ???? For some reason it seems we never reach that
1577 case, I suppose this got optimized out before when
1578 building the automata */
1579 copy = xmlRegCopyAtom(ctxt, atom);
1580 if (copy == NULL)
1581 return(-1);
1582 copy->quant = XML_REGEXP_QUANT_ONCE;
1583 copy->min = 0;
1584 copy->max = 0;
1585
1586 if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1587 < 0) {
1588 xmlRegFreeAtom(copy);
1589 return(-1);
1590 }
1591 inter = ctxt->state;
1592 counter = xmlRegGetCounter(ctxt);
1593 if (counter < 0)
1594 return(-1);
1595 ctxt->counters[counter].min = atom->min - 1;
1596 ctxt->counters[counter].max = atom->max - 1;
1597 /* count the number of times we see it again */
1598 xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1599 atom->stop, counter);
1600 /* allow a way out based on the count */
1601 xmlFAGenerateCountedTransition(ctxt, inter,
1602 newstate, counter);
1603 /* and also allow a direct exit for 0 */
1604 xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1605 newstate);
1606 } else {
1607 /*
1608 * either we need the atom at least once or there
1609 * is an atom->start0 allowing to easily plug the
1610 * epsilon transition.
1611 */
1612 counter = xmlRegGetCounter(ctxt);
1613 if (counter < 0)
1614 return(-1);
1615 ctxt->counters[counter].min = atom->min - 1;
1616 ctxt->counters[counter].max = atom->max - 1;
1617 /* allow a way out based on the count */
1618 xmlFAGenerateCountedTransition(ctxt, atom->stop,
1619 newstate, counter);
1620 /* count the number of times we see it again */
1621 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1622 atom->start, counter);
1623 /* and if needed allow a direct exit for 0 */
1624 if (atom->min == 0)
1625 xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1626 newstate);
1627
1628 }
1629 atom->min = 0;
1630 atom->max = 0;
1631 atom->quant = XML_REGEXP_QUANT_ONCE;
1632 ctxt->state = newstate;
1633 }
1634 default:
1635 break;
1636 }
1637 if (xmlRegAtomPush(ctxt, atom) < 0)
1638 return(-1);
1639 return(0);
1640 }
1641 if ((atom->min == 0) && (atom->max == 0) &&
1642 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1643 /*
1644 * we can discard the atom and generate an epsilon transition instead
1645 */
1646 if (to == NULL) {
1647 to = xmlRegStatePush(ctxt);
1648 if (to == NULL)
1649 return(-1);
1650 }
1651 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1652 ctxt->state = to;
1653 xmlRegFreeAtom(atom);
1654 return(0);
1655 }
1656 if (to == NULL) {
1657 to = xmlRegStatePush(ctxt);
1658 if (to == NULL)
1659 return(-1);
1660 }
1661 end = to;
1662 if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1663 (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1664 /*
1665 * Do not pollute the target state by adding transitions from
1666 * it as it is likely to be the shared target of multiple branches.
1667 * So isolate with an epsilon transition.
1668 */
1669 xmlRegStatePtr tmp;
1670
1671 tmp = xmlRegStatePush(ctxt);
1672 if (tmp == NULL)
1673 return(-1);
1674 xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1675 to = tmp;
1676 }
1677 if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
1678 (atom->min == 0) && (atom->max > 0)) {
1679 nullable = 1;
1680 atom->min = 1;
1681 if (atom->max == 1)
1682 atom->quant = XML_REGEXP_QUANT_OPT;
1683 }
1684 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1685 ctxt->state = end;
1686 switch (atom->quant) {
1687 case XML_REGEXP_QUANT_OPT:
1688 atom->quant = XML_REGEXP_QUANT_ONCE;
1689 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1690 break;
1691 case XML_REGEXP_QUANT_MULT:
1692 atom->quant = XML_REGEXP_QUANT_ONCE;
1693 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1694 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1695 break;
1696 case XML_REGEXP_QUANT_PLUS:
1697 atom->quant = XML_REGEXP_QUANT_ONCE;
1698 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1699 break;
1700 case XML_REGEXP_QUANT_RANGE:
1701 if (nullable)
1702 xmlFAGenerateEpsilonTransition(ctxt, from, to);
1703 break;
1704 default:
1705 break;
1706 }
1707 if (xmlRegAtomPush(ctxt, atom) < 0)
1708 return(-1);
1709 return(0);
1710}
1711
1720static void
1721xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1722 int tonr, int counter) {
1723 int transnr;
1724 xmlRegStatePtr from;
1725 xmlRegStatePtr to;
1726
1727 from = ctxt->states[fromnr];
1728 if (from == NULL)
1729 return;
1730 to = ctxt->states[tonr];
1731 if (to == NULL)
1732 return;
1733 if ((to->mark == XML_REGEXP_MARK_START) ||
1734 (to->mark == XML_REGEXP_MARK_VISITED))
1735 return;
1736
1737 to->mark = XML_REGEXP_MARK_VISITED;
1738 if (to->type == XML_REGEXP_FINAL_STATE) {
1739 from->type = XML_REGEXP_FINAL_STATE;
1740 }
1741 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1742 xmlRegTransPtr t1 = &to->trans[transnr];
1743 int tcounter;
1744
1745 if (t1->to < 0)
1746 continue;
1747 if (t1->counter >= 0) {
1748 /* assert(counter < 0); */
1749 tcounter = t1->counter;
1750 } else {
1751 tcounter = counter;
1752 }
1753 if (t1->atom == NULL) {
1754 /*
1755 * Don't remove counted transitions
1756 * Don't loop either
1757 */
1758 if (t1->to != fromnr) {
1759 if (t1->count >= 0) {
1760 xmlRegStateAddTrans(ctxt, from, NULL, ctxt->states[t1->to],
1761 -1, t1->count);
1762 } else {
1763 xmlFAReduceEpsilonTransitions(ctxt, fromnr, t1->to,
1764 tcounter);
1765 }
1766 }
1767 } else {
1768 xmlRegStateAddTrans(ctxt, from, t1->atom,
1769 ctxt->states[t1->to], tcounter, -1);
1770 }
1771 }
1772}
1773
1782static void
1783xmlFAFinishReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int tonr) {
1784 int transnr;
1785 xmlRegStatePtr to;
1786
1787 to = ctxt->states[tonr];
1788 if (to == NULL)
1789 return;
1790 if ((to->mark == XML_REGEXP_MARK_START) ||
1791 (to->mark == XML_REGEXP_MARK_NORMAL))
1792 return;
1793
1794 to->mark = XML_REGEXP_MARK_NORMAL;
1795 for (transnr = 0;transnr < to->nbTrans;transnr++) {
1796 xmlRegTransPtr t1 = &to->trans[transnr];
1797 if ((t1->to >= 0) && (t1->atom == NULL))
1798 xmlFAFinishReduceEpsilonTransitions(ctxt, t1->to);
1799 }
1800}
1801
1823static void
1824xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1825 int statenr, i, j, newto;
1826 xmlRegStatePtr state, tmp;
1827
1828 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1829 state = ctxt->states[statenr];
1830 if (state == NULL)
1831 continue;
1832 if (state->nbTrans != 1)
1833 continue;
1834 if (state->type == XML_REGEXP_UNREACH_STATE ||
1835 state->type == XML_REGEXP_FINAL_STATE)
1836 continue;
1837 /* is the only transition out a basic transition */
1838 if ((state->trans[0].atom == NULL) &&
1839 (state->trans[0].to >= 0) &&
1840 (state->trans[0].to != statenr) &&
1841 (state->trans[0].counter < 0) &&
1842 (state->trans[0].count < 0)) {
1843 newto = state->trans[0].to;
1844
1845 if (state->type == XML_REGEXP_START_STATE) {
1846 } else {
1847 for (i = 0;i < state->nbTransTo;i++) {
1848 tmp = ctxt->states[state->transTo[i]];
1849 for (j = 0;j < tmp->nbTrans;j++) {
1850 if (tmp->trans[j].to == statenr) {
1851 tmp->trans[j].to = -1;
1852 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1853 ctxt->states[newto],
1854 tmp->trans[j].counter,
1855 tmp->trans[j].count);
1856 }
1857 }
1858 }
1859 if (state->type == XML_REGEXP_FINAL_STATE)
1860 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1861 /* eliminate the transition completely */
1862 state->nbTrans = 0;
1863
1864 state->type = XML_REGEXP_UNREACH_STATE;
1865
1866 }
1867
1868 }
1869 }
1870}
1876static void
1877xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1878 int statenr, transnr;
1879 xmlRegStatePtr state;
1880 int has_epsilon;
1881
1882 if (ctxt->states == NULL) return;
1883
1884 /*
1885 * Eliminate simple epsilon transition and the associated unreachable
1886 * states.
1887 */
1888 xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1889 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1890 state = ctxt->states[statenr];
1891 if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1892 xmlRegFreeState(state);
1893 ctxt->states[statenr] = NULL;
1894 }
1895 }
1896
1897 has_epsilon = 0;
1898
1899 /*
1900 * Build the completed transitions bypassing the epsilons
1901 * Use a marking algorithm to avoid loops
1902 * Mark sink states too.
1903 * Process from the latest states backward to the start when
1904 * there is long cascading epsilon chains this minimize the
1905 * recursions and transition compares when adding the new ones
1906 */
1907 for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1908 state = ctxt->states[statenr];
1909 if (state == NULL)
1910 continue;
1911 if ((state->nbTrans == 0) &&
1912 (state->type != XML_REGEXP_FINAL_STATE)) {
1913 state->type = XML_REGEXP_SINK_STATE;
1914 }
1915 for (transnr = 0;transnr < state->nbTrans;transnr++) {
1916 if ((state->trans[transnr].atom == NULL) &&
1917 (state->trans[transnr].to >= 0)) {
1918 if (state->trans[transnr].to == statenr) {
1919 state->trans[transnr].to = -1;
1920 } else if (state->trans[transnr].count < 0) {
1921 int newto = state->trans[transnr].to;
1922
1923 has_epsilon = 1;
1924 state->trans[transnr].to = -2;
1925 state->mark = XML_REGEXP_MARK_START;
1926 xmlFAReduceEpsilonTransitions(ctxt, statenr,
1927 newto, state->trans[transnr].counter);
1928 xmlFAFinishReduceEpsilonTransitions(ctxt, newto);
1929 state->mark = XML_REGEXP_MARK_NORMAL;
1930 }
1931 }
1932 }
1933 }
1934 /*
1935 * Eliminate the epsilon transitions
1936 */
1937 if (has_epsilon) {
1938 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1939 state = ctxt->states[statenr];
1940 if (state == NULL)
1941 continue;
1942 for (transnr = 0;transnr < state->nbTrans;transnr++) {
1943 xmlRegTransPtr trans = &(state->trans[transnr]);
1944 if ((trans->atom == NULL) &&
1945 (trans->count < 0) &&
1946 (trans->to >= 0)) {
1947 trans->to = -1;
1948 }
1949 }
1950 }
1951 }
1952
1953 /*
1954 * Use this pass to detect unreachable states too
1955 */
1956 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1957 state = ctxt->states[statenr];
1958 if (state != NULL)
1959 state->reached = XML_REGEXP_MARK_NORMAL;
1960 }
1961 state = ctxt->states[0];
1962 if (state != NULL)
1963 state->reached = XML_REGEXP_MARK_START;
1964 while (state != NULL) {
1965 xmlRegStatePtr target = NULL;
1966 state->reached = XML_REGEXP_MARK_VISITED;
1967 /*
1968 * Mark all states reachable from the current reachable state
1969 */
1970 for (transnr = 0;transnr < state->nbTrans;transnr++) {
1971 if ((state->trans[transnr].to >= 0) &&
1972 ((state->trans[transnr].atom != NULL) ||
1973 (state->trans[transnr].count >= 0))) {
1974 int newto = state->trans[transnr].to;
1975
1976 if (ctxt->states[newto] == NULL)
1977 continue;
1978 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
1979 ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
1980 target = ctxt->states[newto];
1981 }
1982 }
1983 }
1984
1985 /*
1986 * find the next accessible state not explored
1987 */
1988 if (target == NULL) {
1989 for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1990 state = ctxt->states[statenr];
1991 if ((state != NULL) && (state->reached ==
1992 XML_REGEXP_MARK_START)) {
1993 target = state;
1994 break;
1995 }
1996 }
1997 }
1998 state = target;
1999 }
2000 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2001 state = ctxt->states[statenr];
2002 if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2003 xmlRegFreeState(state);
2004 ctxt->states[statenr] = NULL;
2005 }
2006 }
2007
2008}
2009
2010static int
2011xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2012 int ret = 0;
2013
2014 if ((range1->type == XML_REGEXP_RANGES) ||
2015 (range2->type == XML_REGEXP_RANGES) ||
2016 (range2->type == XML_REGEXP_SUBREG) ||
2017 (range1->type == XML_REGEXP_SUBREG) ||
2018 (range1->type == XML_REGEXP_STRING) ||
2019 (range2->type == XML_REGEXP_STRING))
2020 return(-1);
2021
2022 /* put them in order */
2023 if (range1->type > range2->type) {
2024 xmlRegRangePtr tmp;
2025
2026 tmp = range1;
2027 range1 = range2;
2028 range2 = tmp;
2029 }
2030 if ((range1->type == XML_REGEXP_ANYCHAR) ||
2031 (range2->type == XML_REGEXP_ANYCHAR)) {
2032 ret = 1;
2033 } else if ((range1->type == XML_REGEXP_EPSILON) ||
2034 (range2->type == XML_REGEXP_EPSILON)) {
2035 return(0);
2036 } else if (range1->type == range2->type) {
2037 if (range1->type != XML_REGEXP_CHARVAL)
2038 ret = 1;
2039 else if ((range1->end < range2->start) ||
2040 (range2->end < range1->start))
2041 ret = 0;
2042 else
2043 ret = 1;
2044 } else if (range1->type == XML_REGEXP_CHARVAL) {
2045 int codepoint;
2046 int neg = 0;
2047
2048 /*
2049 * just check all codepoints in the range for acceptance,
2050 * this is usually way cheaper since done only once at
2051 * compilation than testing over and over at runtime or
2052 * pushing too many states when evaluating.
2053 */
2054 if (((range1->neg == 0) && (range2->neg != 0)) ||
2055 ((range1->neg != 0) && (range2->neg == 0)))
2056 neg = 1;
2057
2058 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2059 ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2060 0, range2->start, range2->end,
2061 range2->blockName);
2062 if (ret < 0)
2063 return(-1);
2064 if (((neg == 1) && (ret == 0)) ||
2065 ((neg == 0) && (ret == 1)))
2066 return(1);
2067 }
2068 return(0);
2069 } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2070 (range2->type == XML_REGEXP_BLOCK_NAME)) {
2071 if (range1->type == range2->type) {
2072 ret = xmlStrEqual(range1->blockName, range2->blockName);
2073 } else {
2074 /*
2075 * comparing a block range with anything else is way
2076 * too costly, and maintaining the table is like too much
2077 * memory too, so let's force the automata to save state
2078 * here.
2079 */
2080 return(1);
2081 }
2082 } else if ((range1->type < XML_REGEXP_LETTER) ||
2083 (range2->type < XML_REGEXP_LETTER)) {
2084 if ((range1->type == XML_REGEXP_ANYSPACE) &&
2085 (range2->type == XML_REGEXP_NOTSPACE))
2086 ret = 0;
2087 else if ((range1->type == XML_REGEXP_INITNAME) &&
2088 (range2->type == XML_REGEXP_NOTINITNAME))
2089 ret = 0;
2090 else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2091 (range2->type == XML_REGEXP_NOTNAMECHAR))
2092 ret = 0;
2093 else if ((range1->type == XML_REGEXP_DECIMAL) &&
2094 (range2->type == XML_REGEXP_NOTDECIMAL))
2095 ret = 0;
2096 else if ((range1->type == XML_REGEXP_REALCHAR) &&
2097 (range2->type == XML_REGEXP_NOTREALCHAR))
2098 ret = 0;
2099 else {
2100 /* same thing to limit complexity */
2101 return(1);
2102 }
2103 } else {
2104 ret = 0;
2105 /* range1->type < range2->type here */
2106 switch (range1->type) {
2107 case XML_REGEXP_LETTER:
2108 /* all disjoint except in the subgroups */
2109 if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2110 (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2111 (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2112 (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2113 (range2->type == XML_REGEXP_LETTER_OTHERS))
2114 ret = 1;
2115 break;
2116 case XML_REGEXP_MARK:
2117 if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2118 (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2119 (range2->type == XML_REGEXP_MARK_ENCLOSING))
2120 ret = 1;
2121 break;
2122 case XML_REGEXP_NUMBER:
2123 if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2124 (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2125 (range2->type == XML_REGEXP_NUMBER_OTHERS))
2126 ret = 1;
2127 break;
2128 case XML_REGEXP_PUNCT:
2129 if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2130 (range2->type == XML_REGEXP_PUNCT_DASH) ||
2131 (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2132 (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2133 (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2134 (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2135 (range2->type == XML_REGEXP_PUNCT_OTHERS))
2136 ret = 1;
2137 break;
2138 case XML_REGEXP_SEPAR:
2139 if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2140 (range2->type == XML_REGEXP_SEPAR_LINE) ||
2141 (range2->type == XML_REGEXP_SEPAR_PARA))
2142 ret = 1;
2143 break;
2144 case XML_REGEXP_SYMBOL:
2145 if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2146 (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2147 (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2148 (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2149 ret = 1;
2150 break;
2151 case XML_REGEXP_OTHER:
2152 if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2153 (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2154 (range2->type == XML_REGEXP_OTHER_PRIVATE))
2155 ret = 1;
2156 break;
2157 default:
2158 if ((range2->type >= XML_REGEXP_LETTER) &&
2159 (range2->type < XML_REGEXP_BLOCK_NAME))
2160 ret = 0;
2161 else {
2162 /* safety net ! */
2163 return(1);
2164 }
2165 }
2166 }
2167 if (((range1->neg == 0) && (range2->neg != 0)) ||
2168 ((range1->neg != 0) && (range2->neg == 0)))
2169 ret = !ret;
2170 return(ret);
2171}
2172
2183static int
2184xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2185 if ((type1 == XML_REGEXP_EPSILON) ||
2186 (type1 == XML_REGEXP_CHARVAL) ||
2187 (type1 == XML_REGEXP_RANGES) ||
2188 (type1 == XML_REGEXP_SUBREG) ||
2189 (type1 == XML_REGEXP_STRING) ||
2190 (type1 == XML_REGEXP_ANYCHAR))
2191 return(1);
2192 if ((type2 == XML_REGEXP_EPSILON) ||
2193 (type2 == XML_REGEXP_CHARVAL) ||
2194 (type2 == XML_REGEXP_RANGES) ||
2195 (type2 == XML_REGEXP_SUBREG) ||
2196 (type2 == XML_REGEXP_STRING) ||
2197 (type2 == XML_REGEXP_ANYCHAR))
2198 return(1);
2199
2200 if (type1 == type2) return(1);
2201
2202 /* simplify subsequent compares by making sure type1 < type2 */
2203 if (type1 > type2) {
2204 xmlRegAtomType tmp = type1;
2205 type1 = type2;
2206 type2 = tmp;
2207 }
2208 switch (type1) {
2209 case XML_REGEXP_ANYSPACE: /* \s */
2210 /* can't be a letter, number, mark, punctuation, symbol */
2211 if ((type2 == XML_REGEXP_NOTSPACE) ||
2212 ((type2 >= XML_REGEXP_LETTER) &&
2213 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2214 ((type2 >= XML_REGEXP_NUMBER) &&
2215 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2216 ((type2 >= XML_REGEXP_MARK) &&
2217 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2218 ((type2 >= XML_REGEXP_PUNCT) &&
2219 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2220 ((type2 >= XML_REGEXP_SYMBOL) &&
2221 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2222 ) return(0);
2223 break;
2224 case XML_REGEXP_NOTSPACE: /* \S */
2225 break;
2226 case XML_REGEXP_INITNAME: /* \l */
2227 /* can't be a number, mark, separator, punctuation, symbol or other */
2228 if ((type2 == XML_REGEXP_NOTINITNAME) ||
2229 ((type2 >= XML_REGEXP_NUMBER) &&
2230 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2231 ((type2 >= XML_REGEXP_MARK) &&
2232 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2233 ((type2 >= XML_REGEXP_SEPAR) &&
2234 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2235 ((type2 >= XML_REGEXP_PUNCT) &&
2236 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2237 ((type2 >= XML_REGEXP_SYMBOL) &&
2238 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2239 ((type2 >= XML_REGEXP_OTHER) &&
2240 (type2 <= XML_REGEXP_OTHER_NA))
2241 ) return(0);
2242 break;
2243 case XML_REGEXP_NOTINITNAME: /* \L */
2244 break;
2245 case XML_REGEXP_NAMECHAR: /* \c */
2246 /* can't be a mark, separator, punctuation, symbol or other */
2247 if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2248 ((type2 >= XML_REGEXP_MARK) &&
2249 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2250 ((type2 >= XML_REGEXP_PUNCT) &&
2251 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2252 ((type2 >= XML_REGEXP_SEPAR) &&
2253 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2254 ((type2 >= XML_REGEXP_SYMBOL) &&
2255 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2256 ((type2 >= XML_REGEXP_OTHER) &&
2257 (type2 <= XML_REGEXP_OTHER_NA))
2258 ) return(0);
2259 break;
2260 case XML_REGEXP_NOTNAMECHAR: /* \C */
2261 break;
2262 case XML_REGEXP_DECIMAL: /* \d */
2263 /* can't be a letter, mark, separator, punctuation, symbol or other */
2264 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2265 (type2 == XML_REGEXP_REALCHAR) ||
2266 ((type2 >= XML_REGEXP_LETTER) &&
2267 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2268 ((type2 >= XML_REGEXP_MARK) &&
2269 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2270 ((type2 >= XML_REGEXP_PUNCT) &&
2271 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2272 ((type2 >= XML_REGEXP_SEPAR) &&
2273 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2274 ((type2 >= XML_REGEXP_SYMBOL) &&
2275 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2276 ((type2 >= XML_REGEXP_OTHER) &&
2277 (type2 <= XML_REGEXP_OTHER_NA))
2278 )return(0);
2279 break;
2280 case XML_REGEXP_NOTDECIMAL: /* \D */
2281 break;
2282 case XML_REGEXP_REALCHAR: /* \w */
2283 /* can't be a mark, separator, punctuation, symbol or other */
2284 if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2285 ((type2 >= XML_REGEXP_MARK) &&
2286 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2287 ((type2 >= XML_REGEXP_PUNCT) &&
2288 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2289 ((type2 >= XML_REGEXP_SEPAR) &&
2290 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2291 ((type2 >= XML_REGEXP_SYMBOL) &&
2292 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2293 ((type2 >= XML_REGEXP_OTHER) &&
2294 (type2 <= XML_REGEXP_OTHER_NA))
2295 )return(0);
2296 break;
2297 case XML_REGEXP_NOTREALCHAR: /* \W */
2298 break;
2299 /*
2300 * at that point we know both type 1 and type2 are from
2301 * character categories are ordered and are different,
2302 * it becomes simple because this is a partition
2303 */
2304 case XML_REGEXP_LETTER:
2305 if (type2 <= XML_REGEXP_LETTER_OTHERS)
2306 return(1);
2307 return(0);
2308 case XML_REGEXP_LETTER_UPPERCASE:
2309 case XML_REGEXP_LETTER_LOWERCASE:
2310 case XML_REGEXP_LETTER_TITLECASE:
2311 case XML_REGEXP_LETTER_MODIFIER:
2312 case XML_REGEXP_LETTER_OTHERS:
2313 return(0);
2314 case XML_REGEXP_MARK:
2315 if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2316 return(1);
2317 return(0);
2318 case XML_REGEXP_MARK_NONSPACING:
2319 case XML_REGEXP_MARK_SPACECOMBINING:
2320 case XML_REGEXP_MARK_ENCLOSING:
2321 return(0);
2322 case XML_REGEXP_NUMBER:
2323 if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2324 return(1);
2325 return(0);
2326 case XML_REGEXP_NUMBER_DECIMAL:
2327 case XML_REGEXP_NUMBER_LETTER:
2328 case XML_REGEXP_NUMBER_OTHERS:
2329 return(0);
2330 case XML_REGEXP_PUNCT:
2331 if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2332 return(1);
2333 return(0);
2334 case XML_REGEXP_PUNCT_CONNECTOR:
2335 case XML_REGEXP_PUNCT_DASH:
2336 case XML_REGEXP_PUNCT_OPEN:
2337 case XML_REGEXP_PUNCT_CLOSE:
2338 case XML_REGEXP_PUNCT_INITQUOTE:
2339 case XML_REGEXP_PUNCT_FINQUOTE:
2340 case XML_REGEXP_PUNCT_OTHERS:
2341 return(0);
2342 case XML_REGEXP_SEPAR:
2343 if (type2 <= XML_REGEXP_SEPAR_PARA)
2344 return(1);
2345 return(0);
2346 case XML_REGEXP_SEPAR_SPACE:
2347 case XML_REGEXP_SEPAR_LINE:
2348 case XML_REGEXP_SEPAR_PARA:
2349 return(0);
2350 case XML_REGEXP_SYMBOL:
2351 if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2352 return(1);
2353 return(0);
2354 case XML_REGEXP_SYMBOL_MATH:
2355 case XML_REGEXP_SYMBOL_CURRENCY:
2356 case XML_REGEXP_SYMBOL_MODIFIER:
2357 case XML_REGEXP_SYMBOL_OTHERS:
2358 return(0);
2359 case XML_REGEXP_OTHER:
2360 if (type2 <= XML_REGEXP_OTHER_NA)
2361 return(1);
2362 return(0);
2363 case XML_REGEXP_OTHER_CONTROL:
2364 case XML_REGEXP_OTHER_FORMAT:
2365 case XML_REGEXP_OTHER_PRIVATE:
2366 case XML_REGEXP_OTHER_NA:
2367 return(0);
2368 default:
2369 break;
2370 }
2371 return(1);
2372}
2373
2385static int
2386xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2387 int ret = 0;
2388
2389 if (atom1 == atom2)
2390 return(1);
2391 if ((atom1 == NULL) || (atom2 == NULL))
2392 return(0);
2393
2394 if (atom1->type != atom2->type)
2395 return(0);
2396 switch (atom1->type) {
2397 case XML_REGEXP_EPSILON:
2398 ret = 0;
2399 break;
2400 case XML_REGEXP_STRING:
2401 if (!deep)
2402 ret = (atom1->valuep == atom2->valuep);
2403 else
2404 ret = xmlStrEqual((xmlChar *)atom1->valuep,
2405 (xmlChar *)atom2->valuep);
2406 break;
2407 case XML_REGEXP_CHARVAL:
2408 ret = (atom1->codepoint == atom2->codepoint);
2409 break;
2410 case XML_REGEXP_RANGES:
2411 /* too hard to do in the general case */
2412 ret = 0;
2413 default:
2414 break;
2415 }
2416 return(ret);
2417}
2418
2430static int
2431xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2432 int ret = 1;
2433
2434 if (atom1 == atom2)
2435 return(1);
2436 if ((atom1 == NULL) || (atom2 == NULL))
2437 return(0);
2438
2439 if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2440 (atom2->type == XML_REGEXP_ANYCHAR))
2441 return(1);
2442
2443 if (atom1->type > atom2->type) {
2444 xmlRegAtomPtr tmp;
2445 tmp = atom1;
2446 atom1 = atom2;
2447 atom2 = tmp;
2448 }
2449 if (atom1->type != atom2->type) {
2450 ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2451 /* if they can't intersect at the type level break now */
2452 if (ret == 0)
2453 return(0);
2454 }
2455 switch (atom1->type) {
2456 case XML_REGEXP_STRING:
2457 if (!deep)
2458 ret = (atom1->valuep != atom2->valuep);
2459 else {
2460 xmlChar *val1 = (xmlChar *)atom1->valuep;
2461 xmlChar *val2 = (xmlChar *)atom2->valuep;
2462 int compound1 = (xmlStrchr(val1, '|') != NULL);
2463 int compound2 = (xmlStrchr(val2, '|') != NULL);
2464
2465 /* Ignore negative match flag for ##other namespaces */
2466 if (compound1 != compound2)
2467 return(0);
2468
2469 ret = xmlRegStrEqualWildcard(val1, val2);
2470 }
2471 break;
2472 case XML_REGEXP_EPSILON:
2473 goto not_determinist;
2474 case XML_REGEXP_CHARVAL:
2475 if (atom2->type == XML_REGEXP_CHARVAL) {
2476 ret = (atom1->codepoint == atom2->codepoint);
2477 } else {
2478 ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2479 if (ret < 0)
2480 ret = 1;
2481 }
2482 break;
2483 case XML_REGEXP_RANGES:
2484 if (atom2->type == XML_REGEXP_RANGES) {
2485 int i, j, res;
2486 xmlRegRangePtr r1, r2;
2487
2488 /*
2489 * need to check that none of the ranges eventually matches
2490 */
2491 for (i = 0;i < atom1->nbRanges;i++) {
2492 for (j = 0;j < atom2->nbRanges;j++) {
2493 r1 = atom1->ranges[i];
2494 r2 = atom2->ranges[j];
2495 res = xmlFACompareRanges(r1, r2);
2496 if (res == 1) {
2497 ret = 1;
2498 goto done;
2499 }
2500 }
2501 }
2502 ret = 0;
2503 }
2504 break;
2505 default:
2506 goto not_determinist;
2507 }
2508done:
2509 if (atom1->neg != atom2->neg) {
2510 ret = !ret;
2511 }
2512 if (ret == 0)
2513 return(0);
2514not_determinist:
2515 return(1);
2516}
2517
2526static int
2527xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2528 int fromnr, int tonr, xmlRegAtomPtr atom) {
2529 int ret = 1;
2530 int res;
2531 int transnr, nbTrans;
2532 xmlRegTransPtr t1;
2533 int deep = 1;
2534
2535 if (state == NULL)
2536 return(ret);
2537 if (state->markd == XML_REGEXP_MARK_VISITED)
2538 return(ret);
2539
2540 if (ctxt->flags & AM_AUTOMATA_RNG)
2541 deep = 0;
2542
2543 /*
2544 * don't recurse on transitions potentially added in the course of
2545 * the elimination.
2546 */
2547 nbTrans = state->nbTrans;
2548 for (transnr = 0;transnr < nbTrans;transnr++) {
2549 t1 = &(state->trans[transnr]);
2550 /*
2551 * check transitions conflicting with the one looked at
2552 */
2553 if ((t1->to < 0) || (t1->to == fromnr))
2554 continue;
2555 if (t1->atom == NULL) {
2556 state->markd = XML_REGEXP_MARK_VISITED;
2557 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2558 fromnr, tonr, atom);
2559 if (res == 0) {
2560 ret = 0;
2561 /* t1->nd = 1; */
2562 }
2563 continue;
2564 }
2565 if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2566 /* Treat equal transitions as deterministic. */
2567 if ((t1->to != tonr) ||
2568 (!xmlFAEqualAtoms(t1->atom, atom, deep)))
2569 ret = 0;
2570 /* mark the transition as non-deterministic */
2571 t1->nd = 1;
2572 }
2573 }
2574 return(ret);
2575}
2576
2583static void
2584xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
2585 int transnr, nbTrans;
2586
2587 if (state == NULL)
2588 return;
2589 if (state->markd != XML_REGEXP_MARK_VISITED)
2590 return;
2591 state->markd = 0;
2592
2593 nbTrans = state->nbTrans;
2594 for (transnr = 0; transnr < nbTrans; transnr++) {
2595 xmlRegTransPtr t1 = &state->trans[transnr];
2596 if ((t1->atom == NULL) && (t1->to >= 0))
2597 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
2598 }
2599}
2600
2609static int
2610xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2611 int statenr, transnr;
2612 xmlRegStatePtr state;
2613 xmlRegTransPtr t1, t2, last;
2614 int i;
2615 int ret = 1;
2616 int deep = 1;
2617
2618 if (ctxt->determinist != -1)
2619 return(ctxt->determinist);
2620
2621 if (ctxt->flags & AM_AUTOMATA_RNG)
2622 deep = 0;
2623
2624 /*
2625 * First cleanup the automata removing cancelled transitions
2626 */
2627 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2628 state = ctxt->states[statenr];
2629 if (state == NULL)
2630 continue;
2631 if (state->nbTrans < 2)
2632 continue;
2633 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2634 t1 = &(state->trans[transnr]);
2635 /*
2636 * Determinism checks in case of counted or all transitions
2637 * will have to be handled separately
2638 */
2639 if (t1->atom == NULL) {
2640 /* t1->nd = 1; */
2641 continue;
2642 }
2643 if (t1->to < 0) /* eliminated */
2644 continue;
2645 for (i = 0;i < transnr;i++) {
2646 t2 = &(state->trans[i]);
2647 if (t2->to < 0) /* eliminated */
2648 continue;
2649 if (t2->atom != NULL) {
2650 if (t1->to == t2->to) {
2651 /*
2652 * Here we use deep because we want to keep the
2653 * transitions which indicate a conflict
2654 */
2655 if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2656 (t1->counter == t2->counter) &&
2657 (t1->count == t2->count))
2658 t2->to = -1; /* eliminated */
2659 }
2660 }
2661 }
2662 }
2663 }
2664
2665 /*
2666 * Check for all states that there aren't 2 transitions
2667 * with the same atom and a different target.
2668 */
2669 for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2670 state = ctxt->states[statenr];
2671 if (state == NULL)
2672 continue;
2673 if (state->nbTrans < 2)
2674 continue;
2675 last = NULL;
2676 for (transnr = 0;transnr < state->nbTrans;transnr++) {
2677 t1 = &(state->trans[transnr]);
2678 /*
2679 * Determinism checks in case of counted or all transitions
2680 * will have to be handled separately
2681 */
2682 if (t1->atom == NULL) {
2683 continue;
2684 }
2685 if (t1->to < 0) /* eliminated */
2686 continue;
2687 for (i = 0;i < transnr;i++) {
2688 t2 = &(state->trans[i]);
2689 if (t2->to < 0) /* eliminated */
2690 continue;
2691 if (t2->atom != NULL) {
2692 /*
2693 * But here we don't use deep because we want to
2694 * find transitions which indicate a conflict
2695 */
2696 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2697 /*
2698 * Treat equal counter transitions that couldn't be
2699 * eliminated as deterministic.
2700 */
2701 if ((t1->to != t2->to) ||
2702 (t1->counter == t2->counter) ||
2703 (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
2704 ret = 0;
2705 /* mark the transitions as non-deterministic ones */
2706 t1->nd = 1;
2707 t2->nd = 1;
2708 last = t1;
2709 }
2710 } else {
2711 int res;
2712
2713 /*
2714 * do the closure in case of remaining specific
2715 * epsilon transitions like choices or all
2716 */
2717 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
2718 statenr, t1->to, t1->atom);
2719 xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
2720 /* don't shortcut the computation so all non deterministic
2721 transition get marked down
2722 if (ret == 0)
2723 return(0);
2724 */
2725 if (res == 0) {
2726 t1->nd = 1;
2727 /* t2->nd = 1; */
2728 last = t1;
2729 ret = 0;
2730 }
2731 }
2732 }
2733 /* don't shortcut the computation so all non deterministic
2734 transition get marked down
2735 if (ret == 0)
2736 break; */
2737 }
2738
2739 /*
2740 * mark specifically the last non-deterministic transition
2741 * from a state since there is no need to set-up rollback
2742 * from it
2743 */
2744 if (last != NULL) {
2745 last->nd = 2;
2746 }
2747
2748 /* don't shortcut the computation so all non deterministic
2749 transition get marked down
2750 if (ret == 0)
2751 break; */
2752 }
2753
2754 ctxt->determinist = ret;
2755 return(ret);
2756}
2757
2758/************************************************************************
2759 * *
2760 * Routines to check input against transition atoms *
2761 * *
2762 ************************************************************************/
2763
2764static int
2765xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2766 int start, int end, const xmlChar *blockName) {
2767 int ret = 0;
2768
2769 switch (type) {
2770 case XML_REGEXP_STRING:
2771 case XML_REGEXP_SUBREG:
2772 case XML_REGEXP_RANGES:
2773 case XML_REGEXP_EPSILON:
2774 return(-1);
2775 case XML_REGEXP_ANYCHAR:
2776 ret = ((codepoint != '\n') && (codepoint != '\r'));
2777 break;
2778 case XML_REGEXP_CHARVAL:
2779 ret = ((codepoint >= start) && (codepoint <= end));
2780 break;
2781 case XML_REGEXP_NOTSPACE:
2782 neg = !neg;
2783 /* Falls through. */
2784 case XML_REGEXP_ANYSPACE:
2785 ret = ((codepoint == '\n') || (codepoint == '\r') ||
2786 (codepoint == '\t') || (codepoint == ' '));
2787 break;
2788 case XML_REGEXP_NOTINITNAME:
2789 neg = !neg;
2790 /* Falls through. */
2791 case XML_REGEXP_INITNAME:
2792 ret = (IS_LETTER(codepoint) ||
2793 (codepoint == '_') || (codepoint == ':'));
2794 break;
2795 case XML_REGEXP_NOTNAMECHAR:
2796 neg = !neg;
2797 /* Falls through. */
2798 case XML_REGEXP_NAMECHAR:
2799 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2800 (codepoint == '.') || (codepoint == '-') ||
2801 (codepoint == '_') || (codepoint == ':') ||
2802 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2803 break;
2804 case XML_REGEXP_NOTDECIMAL:
2805 neg = !neg;
2806 /* Falls through. */
2807 case XML_REGEXP_DECIMAL:
2808 ret = xmlUCSIsCatNd(codepoint);
2809 break;
2810 case XML_REGEXP_REALCHAR:
2811 neg = !neg;
2812 /* Falls through. */
2813 case XML_REGEXP_NOTREALCHAR:
2814 ret = xmlUCSIsCatP(codepoint);
2815 if (ret == 0)
2816 ret = xmlUCSIsCatZ(codepoint);
2817 if (ret == 0)
2818 ret = xmlUCSIsCatC(codepoint);
2819 break;
2820 case XML_REGEXP_LETTER:
2821 ret = xmlUCSIsCatL(codepoint);
2822 break;
2823 case XML_REGEXP_LETTER_UPPERCASE:
2824 ret = xmlUCSIsCatLu(codepoint);
2825 break;
2826 case XML_REGEXP_LETTER_LOWERCASE:
2827 ret = xmlUCSIsCatLl(codepoint);
2828 break;
2829 case XML_REGEXP_LETTER_TITLECASE:
2830 ret = xmlUCSIsCatLt(codepoint);
2831 break;
2832 case XML_REGEXP_LETTER_MODIFIER:
2833 ret = xmlUCSIsCatLm(codepoint);
2834 break;
2835 case XML_REGEXP_LETTER_OTHERS:
2836 ret = xmlUCSIsCatLo(codepoint);
2837 break;
2838 case XML_REGEXP_MARK:
2839 ret = xmlUCSIsCatM(codepoint);
2840 break;
2841 case XML_REGEXP_MARK_NONSPACING:
2842 ret = xmlUCSIsCatMn(codepoint);
2843 break;
2844 case XML_REGEXP_MARK_SPACECOMBINING:
2845 ret = xmlUCSIsCatMc(codepoint);
2846 break;
2847 case XML_REGEXP_MARK_ENCLOSING:
2848 ret = xmlUCSIsCatMe(codepoint);
2849 break;
2850 case XML_REGEXP_NUMBER:
2851 ret = xmlUCSIsCatN(codepoint);
2852 break;
2853 case XML_REGEXP_NUMBER_DECIMAL:
2854 ret = xmlUCSIsCatNd(codepoint);
2855 break;
2856 case XML_REGEXP_NUMBER_LETTER:
2857 ret = xmlUCSIsCatNl(codepoint);
2858 break;
2859 case XML_REGEXP_NUMBER_OTHERS:
2860 ret = xmlUCSIsCatNo(codepoint);
2861 break;
2862 case XML_REGEXP_PUNCT:
2863 ret = xmlUCSIsCatP(codepoint);
2864 break;
2865 case XML_REGEXP_PUNCT_CONNECTOR:
2866 ret = xmlUCSIsCatPc(codepoint);
2867 break;
2868 case XML_REGEXP_PUNCT_DASH:
2869 ret = xmlUCSIsCatPd(codepoint);
2870 break;
2871 case XML_REGEXP_PUNCT_OPEN:
2872 ret = xmlUCSIsCatPs(codepoint);
2873 break;
2874 case XML_REGEXP_PUNCT_CLOSE:
2875 ret = xmlUCSIsCatPe(codepoint);
2876 break;
2877 case XML_REGEXP_PUNCT_INITQUOTE:
2878 ret = xmlUCSIsCatPi(codepoint);
2879 break;
2880 case XML_REGEXP_PUNCT_FINQUOTE:
2881 ret = xmlUCSIsCatPf(codepoint);
2882 break;
2883 case XML_REGEXP_PUNCT_OTHERS:
2884 ret = xmlUCSIsCatPo(codepoint);
2885 break;
2886 case XML_REGEXP_SEPAR:
2887 ret = xmlUCSIsCatZ(codepoint);
2888 break;
2889 case XML_REGEXP_SEPAR_SPACE:
2890 ret = xmlUCSIsCatZs(codepoint);
2891 break;
2892 case XML_REGEXP_SEPAR_LINE:
2893 ret = xmlUCSIsCatZl(codepoint);
2894 break;
2895 case XML_REGEXP_SEPAR_PARA:
2896 ret = xmlUCSIsCatZp(codepoint);
2897 break;
2898 case XML_REGEXP_SYMBOL:
2899 ret = xmlUCSIsCatS(codepoint);
2900 break;
2901 case XML_REGEXP_SYMBOL_MATH:
2902 ret = xmlUCSIsCatSm(codepoint);
2903 break;
2904 case XML_REGEXP_SYMBOL_CURRENCY:
2905 ret = xmlUCSIsCatSc(codepoint);
2906 break;
2907 case XML_REGEXP_SYMBOL_MODIFIER:
2908 ret = xmlUCSIsCatSk(codepoint);
2909 break;
2910 case XML_REGEXP_SYMBOL_OTHERS:
2911 ret = xmlUCSIsCatSo(codepoint);
2912 break;
2913 case XML_REGEXP_OTHER:
2914 ret = xmlUCSIsCatC(codepoint);
2915 break;
2916 case XML_REGEXP_OTHER_CONTROL:
2917 ret = xmlUCSIsCatCc(codepoint);
2918 break;
2919 case XML_REGEXP_OTHER_FORMAT:
2920 ret = xmlUCSIsCatCf(codepoint);
2921 break;
2922 case XML_REGEXP_OTHER_PRIVATE:
2923 ret = xmlUCSIsCatCo(codepoint);
2924 break;
2925 case XML_REGEXP_OTHER_NA:
2926 /* ret = xmlUCSIsCatCn(codepoint); */
2927 /* Seems it doesn't exist anymore in recent Unicode releases */
2928 ret = 0;
2929 break;
2930 case XML_REGEXP_BLOCK_NAME:
2931 ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2932 break;
2933 }
2934 if (neg)
2935 return(!ret);
2936 return(ret);
2937}
2938
2939static int
2940xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2941 int i, ret = 0;
2942 xmlRegRangePtr range;
2943
2944 if ((atom == NULL) || (!IS_CHAR(codepoint)))
2945 return(-1);
2946
2947 switch (atom->type) {
2948 case XML_REGEXP_SUBREG:
2949 case XML_REGEXP_EPSILON:
2950 return(-1);
2951 case XML_REGEXP_CHARVAL:
2952 return(codepoint == atom->codepoint);
2953 case XML_REGEXP_RANGES: {
2954 int accept = 0;
2955
2956 for (i = 0;i < atom->nbRanges;i++) {
2957 range = atom->ranges[i];
2958 if (range->neg == 2) {
2959 ret = xmlRegCheckCharacterRange(range->type, codepoint,
2960 0, range->start, range->end,
2961 range->blockName);
2962 if (ret != 0)
2963 return(0); /* excluded char */
2964 } else if (range->neg) {
2965 ret = xmlRegCheckCharacterRange(range->type, codepoint,
2966 0, range->start, range->end,
2967 range->blockName);
2968 if (ret == 0)
2969 accept = 1;
2970 else
2971 return(0);
2972 } else {
2973 ret = xmlRegCheckCharacterRange(range->type, codepoint,
2974 0, range->start, range->end,
2975 range->blockName);
2976 if (ret != 0)
2977 accept = 1; /* might still be excluded */
2978 }
2979 }
2980 return(accept);
2981 }
2982 case XML_REGEXP_STRING:
2983 printf("TODO: XML_REGEXP_STRING\n");
2984 return(-1);
2985 case XML_REGEXP_ANYCHAR:
2986 case XML_REGEXP_ANYSPACE:
2987 case XML_REGEXP_NOTSPACE:
2988 case XML_REGEXP_INITNAME:
2989 case XML_REGEXP_NOTINITNAME:
2990 case XML_REGEXP_NAMECHAR:
2991 case XML_REGEXP_NOTNAMECHAR:
2992 case XML_REGEXP_DECIMAL:
2993 case XML_REGEXP_NOTDECIMAL:
2994 case XML_REGEXP_REALCHAR:
2995 case XML_REGEXP_NOTREALCHAR:
2996 case XML_REGEXP_LETTER:
2997 case XML_REGEXP_LETTER_UPPERCASE:
2998 case XML_REGEXP_LETTER_LOWERCASE:
2999 case XML_REGEXP_LETTER_TITLECASE:
3000 case XML_REGEXP_LETTER_MODIFIER:
3001 case XML_REGEXP_LETTER_OTHERS:
3002 case XML_REGEXP_MARK:
3003 case XML_REGEXP_MARK_NONSPACING:
3004 case XML_REGEXP_MARK_SPACECOMBINING:
3005 case XML_REGEXP_MARK_ENCLOSING:
3006 case XML_REGEXP_NUMBER:
3007 case XML_REGEXP_NUMBER_DECIMAL:
3008 case XML_REGEXP_NUMBER_LETTER:
3009 case XML_REGEXP_NUMBER_OTHERS:
3010 case XML_REGEXP_PUNCT:
3011 case XML_REGEXP_PUNCT_CONNECTOR:
3012 case XML_REGEXP_PUNCT_DASH:
3013 case XML_REGEXP_PUNCT_OPEN:
3014 case XML_REGEXP_PUNCT_CLOSE:
3015 case XML_REGEXP_PUNCT_INITQUOTE:
3016 case XML_REGEXP_PUNCT_FINQUOTE:
3017 case XML_REGEXP_PUNCT_OTHERS:
3018 case XML_REGEXP_SEPAR:
3019 case XML_REGEXP_SEPAR_SPACE:
3020 case XML_REGEXP_SEPAR_LINE:
3021 case XML_REGEXP_SEPAR_PARA:
3022 case XML_REGEXP_SYMBOL:
3023 case XML_REGEXP_SYMBOL_MATH:
3024 case XML_REGEXP_SYMBOL_CURRENCY:
3025 case XML_REGEXP_SYMBOL_MODIFIER:
3026 case XML_REGEXP_SYMBOL_OTHERS:
3027 case XML_REGEXP_OTHER:
3028 case XML_REGEXP_OTHER_CONTROL:
3029 case XML_REGEXP_OTHER_FORMAT:
3030 case XML_REGEXP_OTHER_PRIVATE:
3031 case XML_REGEXP_OTHER_NA:
3032 case XML_REGEXP_BLOCK_NAME:
3033 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3034 (const xmlChar *)atom->valuep);
3035 if (atom->neg)
3036 ret = !ret;
3037 break;
3038 }
3039 return(ret);
3040}
3041
3042/************************************************************************
3043 * *
3044 * Saving and restoring state of an execution context *
3045 * *
3046 ************************************************************************/
3047
3048static void
3049xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3050#ifdef MAX_PUSH
3051 if (exec->nbPush > MAX_PUSH) {
3052 exec->status = XML_REGEXP_INTERNAL_LIMIT;
3053 return;
3054 }
3055 exec->nbPush++;
3056#endif
3057
3058 if (exec->maxRollbacks == 0) {
3059 exec->maxRollbacks = 4;
3060 exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3061 sizeof(xmlRegExecRollback));
3062 if (exec->rollbacks == NULL) {
3063 xmlRegexpErrMemory(NULL, "saving regexp");
3064 exec->maxRollbacks = 0;
3065 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3066 return;
3067 }
3068 memset(exec->rollbacks, 0,
3069 exec->maxRollbacks * sizeof(xmlRegExecRollback));
3070 } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3071 xmlRegExecRollback *tmp;
3072 int len = exec->maxRollbacks;
3073
3074 exec->maxRollbacks *= 2;
3075 tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3076 exec->maxRollbacks * sizeof(xmlRegExecRollback));
3077 if (tmp == NULL) {
3078 xmlRegexpErrMemory(NULL, "saving regexp");
3079 exec->maxRollbacks /= 2;
3080 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3081 return;
3082 }
3083 exec->rollbacks = tmp;
3084 tmp = &exec->rollbacks[len];
3085 memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3086 }
3087 exec->rollbacks[exec->nbRollbacks].state = exec->state;
3088 exec->rollbacks[exec->nbRollbacks].index = exec->index;
3089 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3090 if (exec->comp->nbCounters > 0) {
3091 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3092 exec->rollbacks[exec->nbRollbacks].counts = (int *)
3093 xmlMalloc(exec->comp->nbCounters * sizeof(int));
3094 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3095 xmlRegexpErrMemory(NULL, "saving regexp");
3096 exec->status = XML_REGEXP_OUT_OF_MEMORY;
3097 return;
3098 }
3099 }
3100 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3101 exec->comp->nbCounters * sizeof(int));
3102 }
3103 exec->nbRollbacks++;
3104}
3105
3106static void
3107xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3108 if (exec->status != XML_REGEXP_OK)
3109 return;
3110 if (exec->nbRollbacks <= 0) {
3111 exec->status = XML_REGEXP_NOT_FOUND;
3112 return;
3113 }
3114 exec->nbRollbacks--;
3115 exec->state = exec->rollbacks[exec->nbRollbacks].state;
3116 exec->index = exec->rollbacks[exec->nbRollbacks].index;
3117 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3118 if (exec->comp->nbCounters > 0) {
3119 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3120 fprintf(stderr, "exec save: allocation failed");
3121 exec->status = XML_REGEXP_INTERNAL_ERROR;
3122 return;
3123 }
3124 if (exec->counts) {
3125 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3126 exec->comp->nbCounters * sizeof(int));
3127 }
3128 }
3129}
3130
3131/************************************************************************
3132 * *
3133 * Verifier, running an input against a compiled regexp *
3134 * *
3135 ************************************************************************/
3136
3137static int
3138xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3139 xmlRegExecCtxt execval;
3140 xmlRegExecCtxtPtr exec = &execval;
3141 int ret, codepoint = 0, len, deter;
3142
3143 exec->inputString = content;
3144 exec->index = 0;
3145 exec->nbPush = 0;
3146 exec->determinist = 1;
3147 exec->maxRollbacks = 0;
3148 exec->nbRollbacks = 0;
3149 exec->rollbacks = NULL;
3150 exec->status = XML_REGEXP_OK;
3151 exec->comp = comp;
3152 exec->state = comp->states[0];
3153 exec->transno = 0;
3154 exec->transcount = 0;
3155 exec->inputStack = NULL;
3156 exec->inputStackMax = 0;
3157 if (comp->nbCounters > 0) {
3158 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3159 if (exec->counts == NULL) {
3160 xmlRegexpErrMemory(NULL, "running regexp");
3161 return(XML_REGEXP_OUT_OF_MEMORY);
3162 }
3163 memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3164 } else
3165 exec->counts = NULL;
3166 while ((exec->status == XML_REGEXP_OK) && (exec->state != NULL) &&
3167 ((exec->inputString[exec->index] != 0) ||
3168 ((exec->state != NULL) &&
3169 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3170 xmlRegTransPtr trans;
3171 xmlRegAtomPtr atom;
3172
3173 /*
3174 * If end of input on non-terminal state, rollback, however we may
3175 * still have epsilon like transition for counted transitions
3176 * on counters, in that case don't break too early. Additionally,
3177 * if we are working on a range like "AB{0,2}", where B is not present,
3178 * we don't want to break.
3179 */
3180 len = 1;
3181 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3182 /*
3183 * if there is a transition, we must check if
3184 * atom allows minOccurs of 0
3185 */
3186 if (exec->transno < exec->state->nbTrans) {
3187 trans = &exec->state->trans[exec->transno];
3188 if (trans->to >=0) {
3189 atom = trans->atom;
3190 if (!((atom->min == 0) && (atom->max > 0)))
3191 goto rollback;
3192 }
3193 } else
3194 goto rollback;
3195 }
3196
3197 exec->transcount = 0;
3198 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3199 trans = &exec->state->trans[exec->transno];
3200 if (trans->to < 0)
3201 continue;
3202 atom = trans->atom;
3203 ret = 0;
3204 deter = 1;
3205 if (trans->count >= 0) {
3206 int count;
3207 xmlRegCounterPtr counter;
3208
3209 if (exec->counts == NULL) {
3210 exec->status = XML_REGEXP_INTERNAL_ERROR;
3211 goto error;
3212 }
3213 /*
3214 * A counted transition.
3215 */
3216
3217 count = exec->counts[trans->count];
3218 counter = &exec->comp->counters[trans->count];
3219 ret = ((count >= counter->min) && (count <= counter->max));
3220 if ((ret) && (counter->min != counter->max))
3221 deter = 0;
3222 } else if (atom == NULL) {
3223 fprintf(stderr, "epsilon transition left at runtime\n");
3224 exec->status = XML_REGEXP_INTERNAL_ERROR;
3225 break;
3226 } else if (exec->inputString[exec->index] != 0) {
3227 len = 4;
3228 codepoint = xmlGetUTF8Char(&exec->inputString[exec->index],
3229 &len);
3230 if (codepoint < 0) {
3231 exec->status = XML_REGEXP_INVALID_UTF8;
3232 goto error;
3233 }
3234 ret = xmlRegCheckCharacter(atom, codepoint);
3235 if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3236 xmlRegStatePtr to = comp->states[trans->to];
3237
3238 /*
3239 * this is a multiple input sequence
3240 * If there is a counter associated increment it now.
3241 * do not increment if the counter is already over the
3242 * maximum limit in which case get to next transition
3243 */
3244 if (trans->counter >= 0) {
3245 xmlRegCounterPtr counter;
3246
3247 if ((exec->counts == NULL) ||
3248 (exec->comp == NULL) ||
3249 (exec->comp->counters == NULL)) {
3250 exec->status = XML_REGEXP_INTERNAL_ERROR;
3251 goto error;
3252 }
3253 counter = &exec->comp->counters[trans->counter];
3254 if (exec->counts[trans->counter] >= counter->max)
3255 continue; /* for loop on transitions */
3256 }
3257 /* Save before incrementing */
3258 if (exec->state->nbTrans > exec->transno + 1) {
3259 xmlFARegExecSave(exec);
3260 if (exec->status != XML_REGEXP_OK)
3261 goto error;
3262 }
3263 if (trans->counter >= 0) {
3264 exec->counts[trans->counter]++;
3265 }
3266 exec->transcount = 1;
3267 do {
3268 /*
3269 * Try to progress as much as possible on the input
3270 */
3271 if (exec->transcount == atom->max) {
3272 break;
3273 }
3274 exec->index += len;
3275 /*
3276 * End of input: stop here
3277 */
3278 if (exec->inputString[exec->index] == 0) {
3279 exec->index -= len;
3280 break;
3281 }
3282 if (exec->transcount >= atom->min) {
3283 int transno = exec->transno;
3284 xmlRegStatePtr state = exec->state;
3285
3286 /*
3287 * The transition is acceptable save it
3288 */
3289 exec->transno = -1; /* trick */
3290 exec->state = to;
3291 xmlFARegExecSave(exec);
3292 if (exec->status != XML_REGEXP_OK)
3293 goto error;
3294 exec->transno = transno;
3295 exec->state = state;
3296 }
3297 len = 4;
3298 codepoint = xmlGetUTF8Char(
3299 &exec->inputString[exec->index], &len);
3300 if (codepoint < 0) {
3301 exec->status = XML_REGEXP_INVALID_UTF8;
3302 goto error;
3303 }
3304 ret = xmlRegCheckCharacter(atom, codepoint);
3305 exec->transcount++;
3306 } while (ret == 1);
3307 if (exec->transcount < atom->min)
3308 ret = 0;
3309
3310 /*
3311 * If the last check failed but one transition was found
3312 * possible, rollback
3313 */
3314 if (ret < 0)
3315 ret = 0;
3316 if (ret == 0) {
3317 goto rollback;
3318 }
3319 if (trans->counter >= 0) {
3320 if (exec->counts == NULL) {
3321 exec->status = XML_REGEXP_INTERNAL_ERROR;
3322 goto error;
3323 }
3324 exec->counts[trans->counter]--;
3325 }
3326 } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3327 /*
3328 * we don't match on the codepoint, but minOccurs of 0
3329 * says that's ok. Setting len to 0 inhibits stepping
3330 * over the codepoint.
3331 */
3332 exec->transcount = 1;
3333 len = 0;
3334 ret = 1;
3335 }
3336 } else if ((atom->min == 0) && (atom->max > 0)) {
3337 /* another spot to match when minOccurs is 0 */
3338 exec->transcount = 1;
3339 len = 0;
3340 ret = 1;
3341 }
3342 if (ret == 1) {
3343 if ((trans->nd == 1) ||
3344 ((trans->count >= 0) && (deter == 0) &&
3345 (exec->state->nbTrans > exec->transno + 1))) {
3346 xmlFARegExecSave(exec);
3347 if (exec->status != XML_REGEXP_OK)
3348 goto error;
3349 }
3350 if (trans->counter >= 0) {
3351 xmlRegCounterPtr counter;
3352
3353 /* make sure we don't go over the counter maximum value */
3354 if ((exec->counts == NULL) ||
3355 (exec->comp == NULL) ||
3356 (exec->comp->counters == NULL)) {
3357 exec->status = XML_REGEXP_INTERNAL_ERROR;
3358 goto error;
3359 }
3360 counter = &exec->comp->counters[trans->counter];
3361 if (exec->counts[trans->counter] >= counter->max)
3362 continue; /* for loop on transitions */
3363 exec->counts[trans->counter]++;
3364 }
3365 if ((trans->count >= 0) &&
3366 (trans->count < REGEXP_ALL_COUNTER)) {
3367 if (exec->counts == NULL) {
3368 exec->status = XML_REGEXP_INTERNAL_ERROR;
3369 goto error;
3370 }
3371 exec->counts[trans->count] = 0;
3372 }
3373 exec->state = comp->states[trans->to];
3374 exec->transno = 0;
3375 if (trans->atom != NULL) {
3376 exec->index += len;
3377 }
3378 goto progress;
3379 } else if (ret < 0) {
3380 exec->status = XML_REGEXP_INTERNAL_ERROR;
3381 break;
3382 }
3383 }
3384 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3385rollback:
3386 /*
3387 * Failed to find a way out
3388 */
3389 exec->determinist = 0;
3390 xmlFARegExecRollBack(exec);
3391 }
3392progress:
3393 continue;
3394 }
3395error:
3396 if (exec->rollbacks != NULL) {
3397 if (exec->counts != NULL) {
3398 int i;
3399
3400 for (i = 0;i < exec->maxRollbacks;i++)
3401 if (exec->rollbacks[i].counts != NULL)
3402 xmlFree(exec->rollbacks[i].counts);
3403 }
3404 xmlFree(exec->rollbacks);
3405 }
3406 if (exec->state == NULL)
3407 return(XML_REGEXP_INTERNAL_ERROR);
3408 if (exec->counts != NULL)
3409 xmlFree(exec->counts);
3410 if (exec->status == XML_REGEXP_OK)
3411 return(1);
3412 if (exec->status == XML_REGEXP_NOT_FOUND)
3413 return(0);
3414 return(exec->status);
3415}
3416
3417/************************************************************************
3418 * *
3419 * Progressive interface to the verifier one atom at a time *
3420 * *
3421 ************************************************************************/
3422
3434xmlRegExecCtxtPtr
3435xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3436 xmlRegExecCtxtPtr exec;
3437
3438 if (comp == NULL)
3439 return(NULL);
3440 if ((comp->compact == NULL) && (comp->states == NULL))
3441 return(NULL);
3442 exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3443 if (exec == NULL) {
3444 xmlRegexpErrMemory(NULL, "creating execution context");
3445 return(NULL);
3446 }
3447 memset(exec, 0, sizeof(xmlRegExecCtxt));
3448 exec->inputString = NULL;
3449 exec->index = 0;
3450 exec->determinist = 1;
3451 exec->maxRollbacks = 0;
3452 exec->nbRollbacks = 0;
3453 exec->rollbacks = NULL;
3454 exec->status = XML_REGEXP_OK;
3455 exec->comp = comp;
3456 if (comp->compact == NULL)
3457 exec->state = comp->states[0];
3458 exec->transno = 0;
3459 exec->transcount = 0;
3460 exec->callback = callback;
3461 exec->data = data;
3462 if (comp->nbCounters > 0) {
3463 /*
3464 * For error handling, exec->counts is allocated twice the size
3465 * the second half is used to store the data in case of rollback
3466 */
3467 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3468 * 2);
3469 if (exec->counts == NULL) {
3470 xmlRegexpErrMemory(NULL, "creating execution context");
3471 xmlFree(exec);
3472 return(NULL);
3473 }
3474 memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3475 exec->errCounts = &exec->counts[comp->nbCounters];
3476 } else {
3477 exec->counts = NULL;
3478 exec->errCounts = NULL;
3479 }
3480 exec->inputStackMax = 0;
3481 exec->inputStackNr = 0;
3482 exec->inputStack = NULL;
3483 exec->errStateNo = -1;
3484 exec->errString = NULL;
3485 exec->nbPush = 0;
3486 return(exec);
3487}
3488
3495void
3496xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3497 if (exec == NULL)
3498 return;
3499
3500 if (exec->rollbacks != NULL) {
3501 if (exec->counts != NULL) {
3502 int i;
3503
3504 for (i = 0;i < exec->maxRollbacks;i++)
3505 if (exec->rollbacks[i].counts != NULL)
3506 xmlFree(exec->rollbacks[i].counts);
3507 }
3508 xmlFree(exec->rollbacks);
3509 }
3510 if (exec->counts != NULL)
3511 xmlFree(exec->counts);
3512 if (exec->inputStack != NULL) {
3513 int i;
3514
3515 for (i = 0;i < exec->inputStackNr;i++) {
3516 if (exec->inputStack[i].value != NULL)
3517 xmlFree(exec->inputStack[i].value);
3518 }
3519 xmlFree(exec->inputStack);
3520 }
3521 if (exec->errString != NULL)
3522 xmlFree(exec->errString);
3523 xmlFree(exec);
3524}
3525
3526static void
3527xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3528 void *data) {
3529 if (exec->inputStackMax == 0) {
3530 exec->inputStackMax = 4;
3531 exec->inputStack = (xmlRegInputTokenPtr)
3532 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3533 if (exec->inputStack == NULL) {
3534 xmlRegexpErrMemory(NULL, "pushing input string");
3535 exec->inputStackMax = 0;
3536 return;
3537 }
3538 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3539 xmlRegInputTokenPtr tmp;
3540
3541 exec->inputStackMax *= 2;
3542 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3543 exec->inputStackMax * sizeof(xmlRegInputToken));
3544 if (tmp == NULL) {
3545 xmlRegexpErrMemory(NULL, "pushing input string");
3546 exec->inputStackMax /= 2;
3547 return;
3548 }
3549 exec->inputStack = tmp;
3550 }
3551 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3552 exec->inputStack[exec->inputStackNr].data = data;
3553 exec->inputStackNr++;
3554 exec->inputStack[exec->inputStackNr].value = NULL;
3555 exec->inputStack[exec->inputStackNr].data = NULL;
3556}
3557
3571static int
3572xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3573 if (expStr == valStr) return(1);
3574 if (expStr == NULL) return(0);
3575 if (valStr == NULL) return(0);
3576 do {
3577 /*
3578 * Eval if we have a wildcard for the current item.
3579 */
3580 if (*expStr != *valStr) {
3581 /* if one of them starts with a wildcard make valStr be it */
3582 if (*valStr == '*') {
3583 const xmlChar *tmp;
3584
3585 tmp = valStr;
3586 valStr = expStr;
3587 expStr = tmp;
3588 }
3589 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3590 do {
3591 if (*valStr == XML_REG_STRING_SEPARATOR)
3592 break;
3593 valStr++;
3594 } while (*valStr != 0);
3595 continue;
3596 } else
3597 return(0);
3598 }
3599 expStr++;
3600 valStr++;
3601 } while (*valStr != 0);
3602 if (*expStr != 0)
3603 return (0);
3604 else
3605 return (1);
3606}
3607
3620static int
3621xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3622 xmlRegexpPtr comp,
3623 const xmlChar *value,
3624 void *data) {
3625 int state = exec->index;
3626 int i, target;
3627
3628 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3629 return(-1);
3630
3631 if (value == NULL) {
3632 /*
3633 * are we at a final state ?
3634 */
3635 if (comp->compact[state * (comp->nbstrings + 1)] ==
3636 XML_REGEXP_FINAL_STATE)
3637 return(1);
3638 return(0);
3639 }
3640
3641 /*
3642 * Examine all outside transitions from current state
3643 */
3644 for (i = 0;i < comp->nbstrings;i++) {
3645 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3646 if ((target > 0) && (target <= comp->nbstates)) {
3647 target--; /* to avoid 0 */
3648 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3649 exec->index = target;
3650 if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3651 exec->callback(exec->data, value,
3652 comp->transdata[state * comp->nbstrings + i], data);
3653 }
3654 if (comp->compact[target * (comp->nbstrings + 1)] ==
3655 XML_REGEXP_SINK_STATE)
3656 goto error;
3657
3658 if (comp->compact[target * (comp->nbstrings + 1)] ==
3659 XML_REGEXP_FINAL_STATE)
3660 return(1);
3661 return(0);
3662 }
3663 }
3664 }
3665 /*
3666 * Failed to find an exit transition out from current state for the
3667 * current token
3668 */
3669error:
3670 if (exec->errString != NULL)
3671 xmlFree(exec->errString);
3672 exec->errString = xmlStrdup(value);
3673 exec->errStateNo = state;
3674 exec->status = XML_REGEXP_NOT_FOUND;
3675 return(XML_REGEXP_NOT_FOUND);
3676}
3677
3690static int
3691xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3692 void *data, int compound) {
3693 xmlRegTransPtr trans;
3694 xmlRegAtomPtr atom;
3695 int ret;
3696 int final = 0;
3697 int progress = 1;
3698
3699 if (exec == NULL)
3700 return(-1);
3701 if (exec->comp == NULL)
3702 return(-1);
3703 if (exec->status != XML_REGEXP_OK)
3704 return(exec->status);
3705
3706 if (exec->comp->compact != NULL)
3707 return(xmlRegCompactPushString(exec, exec->comp, value, data));
3708
3709 if (value == NULL) {
3710 if (exec->state->type == XML_REGEXP_FINAL_STATE)
3711 return(1);
3712 final = 1;
3713 }
3714
3715 /*
3716 * If we have an active rollback stack push the new value there
3717 * and get back to where we were left
3718 */
3719 if ((value != NULL) && (exec->inputStackNr > 0)) {
3720 xmlFARegExecSaveInputString(exec, value, data);
3721 value = exec->inputStack[exec->index].value;
3722 data = exec->inputStack[exec->index].data;
3723 }
3724
3725 while ((exec->status == XML_REGEXP_OK) &&
3726 ((value != NULL) ||
3727 ((final == 1) &&
3728 (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3729
3730 /*
3731 * End of input on non-terminal state, rollback, however we may
3732 * still have epsilon like transition for counted transitions
3733 * on counters, in that case don't break too early.
3734 */
3735 if ((value == NULL) && (exec->counts == NULL))
3736 goto rollback;
3737
3738 exec->transcount = 0;
3739 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3740 trans = &exec->state->trans[exec->transno];
3741 if (trans->to < 0)
3742 continue;
3743 atom = trans->atom;
3744 ret = 0;
3745 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3746 int i;
3747 int count;
3748 xmlRegTransPtr t;
3749 xmlRegCounterPtr counter;
3750
3751 ret = 0;
3752
3753 /*
3754 * Check all counted transitions from the current state
3755 */
3756 if ((value == NULL) && (final)) {
3757 ret = 1;
3758 } else if (value != NULL) {
3759 for (i = 0;i < exec->state->nbTrans;i++) {
3760 t = &exec->state->trans[i];
3761 if ((t->counter < 0) || (t == trans))
3762 continue;
3763 counter = &exec->comp->counters[t->counter];
3764 count = exec->counts[t->counter];
3765 if ((count < counter->max) &&
3766 (t->atom != NULL) &&
3767 (xmlStrEqual(value, t->atom->valuep))) {
3768 ret = 0;
3769 break;
3770 }
3771 if ((count >= counter->min) &&
3772 (count < counter->max) &&
3773 (t->atom != NULL) &&
3774 (xmlStrEqual(value, t->atom->valuep))) {
3775 ret = 1;
3776 break;
3777 }
3778 }
3779 }
3780 } else if (trans->count == REGEXP_ALL_COUNTER) {
3781 int i;
3782 int count;
3783 xmlRegTransPtr t;
3784 xmlRegCounterPtr counter;
3785
3786 ret = 1;
3787
3788 /*
3789 * Check all counted transitions from the current state
3790 */
3791 for (i = 0;i < exec->state->nbTrans;i++) {
3792 t = &exec->state->trans[i];
3793 if ((t->counter < 0) || (t == trans))
3794 continue;
3795 counter = &exec->comp->counters[t->counter];
3796 count = exec->counts[t->counter];
3797 if ((count < counter->min) || (count > counter->max)) {
3798 ret = 0;
3799 break;
3800 }
3801 }
3802 } else if (trans->count >= 0) {
3803 int count;
3804 xmlRegCounterPtr counter;
3805
3806 /*
3807 * A counted transition.
3808 */
3809
3810 count = exec->counts[trans->count];
3811 counter = &exec->comp->counters[trans->count];
3812 ret = ((count >= counter->min) && (count <= counter->max));
3813 } else if (atom == NULL) {
3814 fprintf(stderr, "epsilon transition left at runtime\n");
3815 exec->status = XML_REGEXP_INTERNAL_ERROR;
3816 break;
3817 } else if (value != NULL) {
3818 ret = xmlRegStrEqualWildcard(atom->valuep, value);
3819 if (atom->neg) {
3820 ret = !ret;
3821 if (!compound)
3822 ret = 0;
3823 }
3824 if ((ret == 1) && (trans->counter >= 0)) {
3825 xmlRegCounterPtr counter;
3826 int count;
3827
3828 count = exec->counts[trans->counter];
3829 counter = &exec->comp->counters[trans->counter];
3830 if (count >= counter->max)
3831 ret = 0;
3832 }
3833
3834 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3835 xmlRegStatePtr to = exec->comp->states[trans->to];
3836
3837 /*
3838 * this is a multiple input sequence
3839 */
3840 if (exec->state->nbTrans > exec->transno + 1) {
3841 if (exec->inputStackNr <= 0) {
3842 xmlFARegExecSaveInputString(exec, value, data);
3843 }
3844 xmlFARegExecSave(exec);
3845 }
3846 exec->transcount = 1;
3847 do {
3848 /*
3849 * Try to progress as much as possible on the input
3850 */
3851 if (exec->transcount == atom->max) {
3852 break;
3853 }
3854 exec->index++;
3855 value = exec->inputStack[exec->index].value;
3856 data = exec->inputStack[exec->index].data;
3857
3858 /*
3859 * End of input: stop here
3860 */
3861 if (value == NULL) {
3862 exec->index --;
3863 break;
3864 }
3865 if (exec->transcount >= atom->min) {
3866 int transno = exec->transno;
3867 xmlRegStatePtr state = exec->state;
3868
3869 /*
3870 * The transition is acceptable save it
3871 */
3872 exec->transno = -1; /* trick */
3873 exec->state = to;
3874 if (exec->inputStackNr <= 0) {
3875 xmlFARegExecSaveInputString(exec, value, data);
3876 }
3877 xmlFARegExecSave(exec);
3878 exec->transno = transno;
3879 exec->state = state;
3880 }
3881 ret = xmlStrEqual(value, atom->valuep);
3882 exec->transcount++;
3883 } while (ret == 1);
3884 if (exec->transcount < atom->min)
3885 ret = 0;
3886
3887 /*
3888 * If the last check failed but one transition was found
3889 * possible, rollback
3890 */
3891 if (ret < 0)
3892 ret = 0;
3893 if (ret == 0) {
3894 goto rollback;
3895 }
3896 }
3897 }
3898 if (ret == 1) {
3899 if ((exec->callback != NULL) && (atom != NULL) &&
3900 (data != NULL)) {
3901 exec->callback(exec->data, atom->valuep,
3902 atom->data, data);
3903 }
3904 if (exec->state->nbTrans > exec->transno + 1) {
3905 if (exec->inputStackNr <= 0) {
3906 xmlFARegExecSaveInputString(exec, value, data);
3907 }
3908 xmlFARegExecSave(exec);
3909 }
3910 if (trans->counter >= 0) {
3911 exec->counts[trans->counter]++;
3912 }
3913 if ((trans->count >= 0) &&
3914 (trans->count < REGEXP_ALL_COUNTER)) {
3915 exec->counts[trans->count] = 0;
3916 }
3917 if ((exec->comp->states[trans->to] != NULL) &&
3918 (exec->comp->states[trans->to]->type ==
3919 XML_REGEXP_SINK_STATE)) {
3920 /*
3921 * entering a sink state, save the current state as error
3922 * state.
3923 */
3924 if (exec->errString != NULL)
3925 xmlFree(exec->errString);
3926 exec->errString = xmlStrdup(value);
3927 exec->errState = exec->state;
3928 memcpy(exec->errCounts, exec->counts,
3929 exec->comp->nbCounters * sizeof(int));
3930 }
3931 exec->state = exec->comp->states[trans->to];
3932 exec->transno = 0;
3933 if (trans->atom != NULL) {
3934 if (exec->inputStack != NULL) {
3935 exec->index++;
3936 if (exec->index < exec->inputStackNr) {
3937 value = exec->inputStack[exec->index].value;
3938 data = exec->inputStack[exec->index].data;
3939 } else {
3940 value = NULL;
3941 data = NULL;
3942 }
3943 } else {
3944 value = NULL;
3945 data = NULL;
3946 }
3947 }
3948 goto progress;
3949 } else if (ret < 0) {
3950 exec->status = XML_REGEXP_INTERNAL_ERROR;
3951 break;
3952 }
3953 }
3954 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3955rollback:
3956 /*
3957 * if we didn't yet rollback on the current input
3958 * store the current state as the error state.
3959 */
3960 if ((progress) && (exec->state != NULL) &&
3961 (exec->state->type != XML_REGEXP_SINK_STATE)) {
3962 progress = 0;
3963 if (exec->errString != NULL)
3964 xmlFree(exec->errString);
3965 exec->errString = xmlStrdup(value);
3966 exec->errState = exec->state;
3967 if (exec->comp->nbCounters)
3968 memcpy(exec->errCounts, exec->counts,
3969 exec->comp->nbCounters * sizeof(int));
3970 }
3971
3972 /*
3973 * Failed to find a way out
3974 */
3975 exec->determinist = 0;
3976 xmlFARegExecRollBack(exec);
3977 if ((exec->inputStack != NULL ) &&
3978 (exec->status == XML_REGEXP_OK)) {
3979 value = exec->inputStack[exec->index].value;
3980 data = exec->inputStack[exec->index].data;
3981 }
3982 }
3983 continue;
3984progress:
3985 progress = 1;
3986 continue;
3987 }
3988 if (exec->status == XML_REGEXP_OK) {
3989 return(exec->state->type == XML_REGEXP_FINAL_STATE);
3990 }
3991 return(exec->status);
3992}
3993
4005int
4006xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4007 void *data) {
4008 return(xmlRegExecPushStringInternal(exec, value, data, 0));
4009}
4010
4023int
4024xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4025 const xmlChar *value2, void *data) {
4026 xmlChar buf[150];
4027 int lenn, lenp, ret;
4028 xmlChar *str;
4029
4030 if (exec == NULL)
4031 return(-1);
4032 if (exec->comp == NULL)
4033 return(-1);
4034 if (exec->status != XML_REGEXP_OK)
4035 return(exec->status);
4036
4037 if (value2 == NULL)
4038 return(xmlRegExecPushString(exec, value, data));
4039
4040 lenn = strlen((char *) value2);
4041 lenp = strlen((char *) value);
4042
4043 if (150 < lenn + lenp + 2) {
4044 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4045 if (str == NULL) {
4046 exec->status = XML_REGEXP_OUT_OF_MEMORY;
4047 return(-1);
4048 }
4049 } else {
4050 str = buf;
4051 }
4052 memcpy(&str[0], value, lenp);
4053 str[lenp] = XML_REG_STRING_SEPARATOR;
4054 memcpy(&str[lenp + 1], value2, lenn);
4055 str[lenn + lenp + 1] = 0;
4056
4057 if (exec->comp->compact != NULL)
4058 ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4059 else
4060 ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4061
4062 if (str != buf)
4063 xmlFree(str);
4064 return(ret);
4065}
4066
4081static int
4082xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4083 int *nbval, int *nbneg,
4084 xmlChar **values, int *terminal) {
4085 int maxval;
4086 int nb = 0;
4087
4088 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4089 (values == NULL) || (*nbval <= 0))
4090 return(-1);
4091
4092 maxval = *nbval;
4093 *nbval = 0;
4094 *nbneg = 0;
4095 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4096 xmlRegexpPtr comp;
4097 int target, i, state;
4098
4099 comp = exec->comp;
4100
4101 if (err) {
4102 if (exec->errStateNo == -1) return(-1);
4103 state = exec->errStateNo;
4104 } else {
4105 state = exec->index;
4106 }
4107 if (terminal != NULL) {
4108 if (comp->compact[state * (comp->nbstrings + 1)] ==
4109 XML_REGEXP_FINAL_STATE)
4110 *terminal = 1;
4111 else
4112 *terminal = 0;
4113 }
4114 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4115 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4116 if ((target > 0) && (target <= comp->nbstates) &&
4117 (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4118 XML_REGEXP_SINK_STATE)) {
4119 values[nb++] = comp->stringMap[i];
4120 (*nbval)++;
4121 }
4122 }
4123 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4124 target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4125 if ((target > 0) && (target <= comp->nbstates) &&
4126 (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4127 XML_REGEXP_SINK_STATE)) {
4128 values[nb++] = comp->stringMap[i];
4129 (*nbneg)++;
4130 }
4131 }
4132 } else {
4133 int transno;
4134 xmlRegTransPtr trans;
4135 xmlRegAtomPtr atom;
4136 xmlRegStatePtr state;
4137
4138 if (terminal != NULL) {
4139 if (exec->state->type == XML_REGEXP_FINAL_STATE)
4140 *terminal = 1;
4141 else
4142 *terminal = 0;
4143 }
4144
4145 if (err) {
4146 if (exec->errState == NULL) return(-1);
4147 state = exec->errState;
4148 } else {
4149 if (exec->state == NULL) return(-1);
4150 state = exec->state;
4151 }
4152 for (transno = 0;
4153 (transno < state->nbTrans) && (nb < maxval);
4154 transno++) {
4155 trans = &state->trans[transno];
4156 if (trans->to < 0)
4157 continue;
4158 atom = trans->atom;
4159 if ((atom == NULL) || (atom->valuep == NULL))
4160 continue;
4161 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4162 /* this should not be reached but ... */
4163 TODO;
4164 } else if (trans->count == REGEXP_ALL_COUNTER) {
4165 /* this should not be reached but ... */
4166 TODO;
4167 } else if (trans->counter >= 0) {
4168 xmlRegCounterPtr counter = NULL;
4169 int count;
4170
4171 if (err)
4172 count = exec->errCounts[trans->counter];
4173 else
4174 count = exec->counts[trans->counter];
4175 if (exec->comp != NULL)
4176 counter = &exec->comp->counters[trans->counter];
4177 if ((counter == NULL) || (count < counter->max)) {
4178 if (atom->neg)
4179 values[nb++] = (xmlChar *) atom->valuep2;
4180 else
4181 values[nb++] = (xmlChar *) atom->valuep;
4182 (*nbval)++;
4183 }
4184 } else {
4185 if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
4186 (exec->comp->states[trans->to]->type !=
4187 XML_REGEXP_SINK_STATE)) {
4188 if (atom->neg)
4189 values[nb++] = (xmlChar *) atom->valuep2;
4190 else
4191 values[nb++] = (xmlChar *) atom->valuep;
4192 (*nbval)++;
4193 }
4194 }
4195 }
4196 for (transno = 0;
4197 (transno < state->nbTrans) && (nb < maxval);
4198 transno++) {
4199 trans = &state->trans[transno];
4200 if (trans->to < 0)
4201 continue;
4202 atom = trans->atom;
4203 if ((atom == NULL) || (atom->valuep == NULL))
4204 continue;
4205 if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4206 continue;
4207 } else if (trans->count == REGEXP_ALL_COUNTER) {
4208 continue;
4209 } else if (trans->counter >= 0) {
4210 continue;
4211 } else {
4212 if ((exec->comp->states[trans->to] != NULL) &&
4213 (exec->comp->states[trans->to]->type ==
4214 XML_REGEXP_SINK_STATE)) {
4215 if (atom->neg)
4216 values[nb++] = (xmlChar *) atom->valuep2;
4217 else
4218 values[nb++] = (xmlChar *) atom->valuep;
4219 (*nbneg)++;
4220 }
4221 }
4222 }
4223 }
4224 return(0);
4225}
4226
4244int
4245xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4246 xmlChar **values, int *terminal) {
4247 return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4248}
4249
4269int
4270xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4271 int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4272 if (exec == NULL)
4273 return(-1);
4274 if (string != NULL) {
4275 if (exec->status != XML_REGEXP_OK)
4276 *string = exec->errString;
4277 else
4278 *string = NULL;
4279 }
4280 return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4281}
4282
4283#if 0
4284static int
4285xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4286 xmlRegTransPtr trans;
4287 xmlRegAtomPtr atom;
4288 int ret;
4289 int codepoint, len;
4290
4291 if (exec == NULL)
4292 return(-1);
4293 if (exec->status != XML_REGEXP_OK)
4294 return(exec->status);
4295
4296 while ((exec->status == XML_REGEXP_OK) &&
4297 ((exec->inputString[exec->index] != 0) ||
4298 (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4299
4300 /*
4301 * End of input on non-terminal state, rollback, however we may
4302 * still have epsilon like transition for counted transitions
4303 * on counters, in that case don't break too early.
4304 */
4305 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4306 goto rollback;
4307
4308 exec->transcount = 0;
4309 for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4310 trans = &exec->state->trans[exec->transno];
4311 if (trans->to < 0)
4312 continue;
4313 atom = trans->atom;
4314 ret = 0;
4315 if (trans->count >= 0) {
4316 int count;
4317 xmlRegCounterPtr counter;
4318
4319 /*
4320 * A counted transition.
4321 */
4322
4323 count = exec->counts[trans->count];
4324 counter = &exec->comp->counters[trans->count];
4325 ret = ((count >= counter->min) && (count <= counter->max));
4326 } else if (atom == NULL) {
4327 fprintf(stderr, "epsilon transition left at runtime\n");
4328 exec->status = XML_REGEXP_INTERNAL_ERROR;
4329 break;
4330 } else if (exec->inputString[exec->index] != 0) {
4331 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4332 ret = xmlRegCheckCharacter(atom, codepoint);
4333 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4334 xmlRegStatePtr to = exec->comp->states[trans->to];
4335
4336 /*
4337 * this is a multiple input sequence
4338 */
4339 if (exec->state->nbTrans > exec->transno + 1) {
4340 xmlFARegExecSave(exec);
4341 }
4342 exec->transcount = 1;
4343 do {
4344 /*
4345 * Try to progress as much as possible on the input
4346 */
4347 if (exec->transcount == atom->max) {
4348 break;
4349 }
4350 exec->index += len;
4351 /*
4352 * End of input: stop here
4353 */
4354 if (exec->inputString[exec->index] == 0) {
4355 exec->index -= len;
4356 break;
4357 }
4358 if (exec->transcount >= atom->min) {
4359 int transno = exec->transno;
4360 xmlRegStatePtr state = exec->state;
4361
4362 /*
4363 * The transition is acceptable save it
4364 */
4365 exec->transno = -1; /* trick */
4366 exec->state = to;
4367 xmlFARegExecSave(exec);
4368 exec->transno = transno;
4369 exec->state = state;
4370 }
4371 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4372 len);
4373 ret = xmlRegCheckCharacter(atom, codepoint);
4374 exec->transcount++;
4375 } while (ret == 1);
4376 if (exec->transcount < atom->min)
4377 ret = 0;
4378
4379 /*
4380 * If the last check failed but one transition was found
4381 * possible, rollback
4382 */
4383 if (ret < 0)
4384 ret = 0;
4385 if (ret == 0) {
4386 goto rollback;
4387 }
4388 }
4389 }
4390 if (ret == 1) {
4391 if (exec->state->nbTrans > exec->transno + 1) {
4392 xmlFARegExecSave(exec);
4393 }
4394 /*
4395 * restart count for expressions like this ((abc){2})*
4396 */
4397 if (trans->count >= 0) {
4398 exec->counts[trans->count] = 0;
4399 }
4400 if (trans->counter >= 0) {
4401 exec->counts[trans->counter]++;
4402 }
4403 exec->state = exec->comp->states[trans->to];
4404 exec->transno = 0;
4405 if (trans->atom != NULL) {
4406 exec->index += len;
4407 }
4408 goto progress;
4409 } else if (ret < 0) {
4410 exec->status = XML_REGEXP_INTERNAL_ERROR;
4411 break;
4412 }
4413 }
4414 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4415rollback:
4416 /*
4417 * Failed to find a way out
4418 */
4419 exec->determinist = 0;
4420 xmlFARegExecRollBack(exec);
4421 }
4422progress:
4423 continue;
4424 }
4425}
4426#endif
4427/************************************************************************
4428 * *
4429 * Parser for the Schemas Datatype Regular Expressions *
4430 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
4431 * *
4432 ************************************************************************/
4433
4440static int
4441xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4442 int cur;
4443 int len;
4444
4445 len = 4;
4446 cur = xmlGetUTF8Char(ctxt->cur, &len);
4447 if (cur < 0) {
4448 ERROR("Invalid UTF-8");
4449 return(0);
4450 }
4451 if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4452 (cur == '*') || (cur == '+') || (cur == '(') ||
4453 (cur == ')') || (cur == '|') || (cur == 0x5B) ||
4454 (cur == 0x5D) || (cur == 0))
4455 return(-1);
4456 return(cur);
4457}
4458
4475static void
4476xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4477 int cur;
4478 xmlRegAtomType type = (xmlRegAtomType) 0;
4479 xmlChar *blockName = NULL;
4480
4481 cur = CUR;
4482 if (cur == 'L') {
4483 NEXT;
4484 cur = CUR;
4485 if (cur == 'u') {
4486 NEXT;
4487 type = XML_REGEXP_LETTER_UPPERCASE;
4488 } else if (cur == 'l') {
4489 NEXT;
4490 type = XML_REGEXP_LETTER_LOWERCASE;
4491 } else if (cur == 't') {
4492 NEXT;
4493 type = XML_REGEXP_LETTER_TITLECASE;
4494 } else if (cur == 'm') {
4495 NEXT;
4496 type = XML_REGEXP_LETTER_MODIFIER;
4497 } else if (cur == 'o') {
4498 NEXT;
4499 type = XML_REGEXP_LETTER_OTHERS;
4500 } else {
4501 type = XML_REGEXP_LETTER;
4502 }
4503 } else if (cur == 'M') {
4504 NEXT;
4505 cur = CUR;
4506 if (cur == 'n') {
4507 NEXT;
4508 /* nonspacing */
4509 type = XML_REGEXP_MARK_NONSPACING;
4510 } else if (cur == 'c') {
4511 NEXT;
4512 /* spacing combining */
4513 type = XML_REGEXP_MARK_SPACECOMBINING;
4514 } else if (cur == 'e') {
4515 NEXT;
4516 /* enclosing */
4517 type = XML_REGEXP_MARK_ENCLOSING;
4518 } else {
4519 /* all marks */
4520 type = XML_REGEXP_MARK;
4521 }
4522 } else if (cur == 'N') {
4523 NEXT;
4524 cur = CUR;
4525 if (cur == 'd') {
4526 NEXT;
4527 /* digital */
4528 type = XML_REGEXP_NUMBER_DECIMAL;
4529 } else if (cur == 'l') {
4530 NEXT;
4531 /* letter */
4532 type = XML_REGEXP_NUMBER_LETTER;
4533 } else if (cur == 'o') {
4534 NEXT;
4535 /* other */
4536 type = XML_REGEXP_NUMBER_OTHERS;
4537 } else {
4538 /* all numbers */
4539 type = XML_REGEXP_NUMBER;
4540 }
4541 } else if (cur == 'P') {
4542 NEXT;
4543 cur = CUR;
4544 if (cur == 'c') {
4545 NEXT;
4546 /* connector */
4547 type = XML_REGEXP_PUNCT_CONNECTOR;
4548 } else if (cur == 'd') {
4549 NEXT;
4550 /* dash */
4551 type = XML_REGEXP_PUNCT_DASH;
4552 } else if (cur == 's') {
4553 NEXT;
4554 /* open */
4555 type = XML_REGEXP_PUNCT_OPEN;
4556 } else if (cur == 'e') {
4557 NEXT;
4558 /* close */
4559 type = XML_REGEXP_PUNCT_CLOSE;
4560 } else if (cur == 'i') {
4561 NEXT;
4562 /* initial quote */
4563 type = XML_REGEXP_PUNCT_INITQUOTE;
4564 } else if (cur == 'f') {
4565 NEXT;
4566 /* final quote */
4567 type = XML_REGEXP_PUNCT_FINQUOTE;
4568 } else if (cur == 'o') {
4569 NEXT;
4570 /* other */
4571 type = XML_REGEXP_PUNCT_OTHERS;
4572 } else {
4573 /* all punctuation */
4574 type = XML_REGEXP_PUNCT;
4575 }
4576 } else if (cur == 'Z') {
4577 NEXT;
4578 cur = CUR;
4579 if (cur == 's') {
4580 NEXT;
4581 /* space */
4582 type = XML_REGEXP_SEPAR_SPACE;
4583 } else if (cur == 'l') {
4584 NEXT;
4585 /* line */
4586 type = XML_REGEXP_SEPAR_LINE;
4587 } else if (cur == 'p') {
4588 NEXT;
4589 /* paragraph */
4590 type = XML_REGEXP_SEPAR_PARA;
4591 } else {
4592 /* all separators */
4593 type = XML_REGEXP_SEPAR;
4594 }
4595 } else if (cur == 'S') {
4596 NEXT;
4597 cur = CUR;
4598 if (cur == 'm') {
4599 NEXT;
4600 type = XML_REGEXP_SYMBOL_MATH;
4601 /* math */
4602 } else if (cur == 'c') {
4603 NEXT;
4604 type = XML_REGEXP_SYMBOL_CURRENCY;
4605 /* currency */
4606 } else if (cur == 'k') {
4607 NEXT;
4608 type = XML_REGEXP_SYMBOL_MODIFIER;
4609 /* modifiers */
4610 } else if (cur == 'o') {
4611 NEXT;
4612 type = XML_REGEXP_SYMBOL_OTHERS;
4613 /* other */
4614 } else {
4615 /* all symbols */
4616 type = XML_REGEXP_SYMBOL;
4617 }
4618 } else if (cur == 'C') {
4619 NEXT;
4620 cur = CUR;
4621 if (cur == 'c') {
4622 NEXT;
4623 /* control */
4624 type = XML_REGEXP_OTHER_CONTROL;
4625 } else if (cur == 'f') {
4626 NEXT;
4627 /* format */
4628 type = XML_REGEXP_OTHER_FORMAT;
4629 } else if (cur == 'o') {
4630 NEXT;
4631 /* private use */
4632 type = XML_REGEXP_OTHER_PRIVATE;
4633 } else if (cur == 'n') {
4634 NEXT;
4635 /* not assigned */
4636 type = XML_REGEXP_OTHER_NA;
4637 } else {
4638 /* all others */
4639 type = XML_REGEXP_OTHER;
4640 }
4641 } else if (cur == 'I') {
4642 const xmlChar *start;
4643 NEXT;
4644 cur = CUR;
4645 if (cur != 's') {
4646 ERROR("IsXXXX expected");
4647 return;
4648 }
4649 NEXT;
4650 start = ctxt->cur;
4651 cur = CUR;
4652 if (((cur >= 'a') && (cur <= 'z')) ||
4653 ((cur >= 'A') && (cur <= 'Z')) ||
4654 ((cur >= '0') && (cur <= '9')) ||
4655 (cur == 0x2D)) {
4656 NEXT;
4657 cur = CUR;
4658 while (((cur >= 'a') && (cur <= 'z')) ||
4659 ((cur >= 'A') && (cur <= 'Z')) ||
4660 ((cur >= '0') && (cur <= '9')) ||
4661 (cur == 0x2D)) {
4662 NEXT;
4663 cur = CUR;
4664 }
4665 }
4666 type = XML_REGEXP_BLOCK_NAME;
4667 blockName = xmlStrndup(start, ctxt->cur - start);
4668 } else {
4669 ERROR("Unknown char property");
4670 return;
4671 }
4672 if (ctxt->atom == NULL) {
4673 ctxt->atom = xmlRegNewAtom(ctxt, type);
4674 if (ctxt->atom == NULL) {
4675 xmlFree(blockName);
4676 return;
4677 }
4678 ctxt->atom->valuep = blockName;
4679 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4680 if (xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4681 type, 0, 0, blockName) == NULL) {
4682 xmlFree(blockName);
4683 }
4684 }
4685}
4686
4687static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
4688{
4689 int val = 0, i, cur;
4690 for (i = 0; i < 4; i++) {
4691 NEXT;
4692 val *= 16;
4693 cur = CUR;
4694 if (cur >= '0' && cur <= '9') {
4695 val += cur - '0';
4696 } else if (cur >= 'A' && cur <= 'F') {
4697 val += cur - 'A' + 10;
4698 } else if (cur >= 'a' && cur <= 'f') {
4699 val += cur - 'a' + 10;
4700 } else {
4701 ERROR("Expecting hex digit");
4702 return -1;
4703 }
4704 }
4705 return val;
4706}
4707
4708static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
4709{
4710 int val = parse_escaped_codeunit(ctxt);
4711 if (0xD800 <= val && val <= 0xDBFF) {
4712 NEXT;
4713 if (CUR == '\\') {
4714 NEXT;
4715 if (CUR == 'u') {
4716 int low = parse_escaped_codeunit(ctxt);
4717 if (0xDC00 <= low && low <= 0xDFFF) {
4718 return (val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
4719 }
4720 }
4721 }
4722 ERROR("Invalid low surrogate pair code unit");
4723 val = -1;
4724 }
4725 return val;
4726}
4727
4738static void
4739xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4740 int cur;
4741
4742 if (CUR == '.') {
4743 if (ctxt->atom == NULL) {
4744 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4745 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4746 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4747 XML_REGEXP_ANYCHAR, 0, 0, NULL);
4748 }
4749 NEXT;
4750 return;
4751 }
4752 if (CUR != '\\') {
4753 ERROR("Escaped sequence: expecting \\");
4754 return;
4755 }
4756 NEXT;
4757 cur = CUR;
4758 if (cur == 'p') {
4759 NEXT;
4760 if (CUR != '{') {
4761 ERROR("Expecting '{'");
4762 return;
4763 }
4764 NEXT;
4765 xmlFAParseCharProp(ctxt);
4766 if (CUR != '}') {
4767 ERROR("Expecting '}'");
4768 return;
4769 }
4770 NEXT;
4771 } else if (cur == 'P') {
4772 NEXT;
4773 if (CUR != '{') {
4774 ERROR("Expecting '{'");
4775 return;
4776 }
4777 NEXT;
4778 xmlFAParseCharProp(ctxt);
4779 if (ctxt->atom != NULL)
4780 ctxt->atom->neg = 1;
4781 if (CUR != '}') {
4782 ERROR("Expecting '}'");
4783 return;
4784 }
4785 NEXT;
4786 } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4787 (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4788 (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4789 (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4790 (cur == 0x5E) ||
4791
4792 /* Non-standard escape sequences:
4793 * Java 1.8|.NET Core 3.1|MSXML 6 */
4794 (cur == '!') || /* + | + | + */
4795 (cur == '"') || /* + | + | + */
4796 (cur == '#') || /* + | + | + */
4797 (cur == '$') || /* + | + | + */
4798 (cur == '%') || /* + | + | + */
4799 (cur == ',') || /* + | + | + */
4800 (cur == '/') || /* + | + | + */
4801 (cur == ':') || /* + | + | + */
4802 (cur == ';') || /* + | + | + */
4803 (cur == '=') || /* + | + | + */
4804 (cur == '>') || /* | + | + */
4805 (cur == '@') || /* + | + | + */
4806 (cur == '`') || /* + | + | + */
4807 (cur == '~') || /* + | + | + */
4808 (cur == 'u')) { /* | + | + */
4809 if (ctxt->atom == NULL) {
4810 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4811 if (ctxt->atom != NULL) {
4812 switch (cur) {
4813 case 'n':
4814 ctxt->atom->codepoint = '\n';
4815 break;
4816 case 'r':
4817 ctxt->atom->codepoint = '\r';
4818 break;
4819 case 't':
4820 ctxt->atom->codepoint = '\t';
4821 break;
4822 case 'u':
4823 cur = parse_escaped_codepoint(ctxt);
4824 if (cur < 0) {
4825 return;
4826 }
4827 ctxt->atom->codepoint = cur;
4828 break;
4829 default:
4830 ctxt->atom->codepoint = cur;
4831 }
4832 }
4833 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4834 switch (cur) {
4835 case 'n':
4836 cur = '\n';
4837 break;
4838 case 'r':
4839 cur = '\r';
4840 break;
4841 case 't':
4842 cur = '\t';
4843 break;
4844 }
4845 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4846 XML_REGEXP_CHARVAL, cur, cur, NULL);
4847 }
4848 NEXT;
4849 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4850 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4851 (cur == 'w') || (cur == 'W')) {
4852 xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4853
4854 switch (cur) {
4855 case 's':
4856 type = XML_REGEXP_ANYSPACE;
4857 break;
4858 case 'S':
4859 type = XML_REGEXP_NOTSPACE;
4860 break;
4861 case 'i':
4862 type = XML_REGEXP_INITNAME;
4863 break;
4864 case 'I':
4865 type = XML_REGEXP_NOTINITNAME;
4866 break;
4867 case 'c':
4868 type = XML_REGEXP_NAMECHAR;
4869 break;
4870 case 'C':
4871 type = XML_REGEXP_NOTNAMECHAR;
4872 break;
4873 case 'd':
4874 type = XML_REGEXP_DECIMAL;
4875 break;
4876 case 'D':
4877 type = XML_REGEXP_NOTDECIMAL;
4878 break;
4879 case 'w':
4880 type = XML_REGEXP_REALCHAR;
4881 break;
4882 case 'W':
4883 type = XML_REGEXP_NOTREALCHAR;
4884 break;
4885 }
4886 NEXT;
4887 if (ctxt->atom == NULL) {
4888 ctxt->atom = xmlRegNewAtom(ctxt, type);
4889 } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4890 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4891 type, 0, 0, NULL);
4892 }
4893 } else {
4894 ERROR("Wrong escape sequence, misuse of character '\\'");
4895 }
4896}
4897
4908static void
4909xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4910 int cur, len;
4911 int start = -1;
4912 int end = -1;
4913
4914 if (CUR == '\0') {
4915 ERROR("Expecting ']'");
4916 return;
4917 }
4918
4919 cur = CUR;
4920 if (cur == '\\') {
4921 NEXT;
4922 cur = CUR;
4923 switch (cur) {
4924 case 'n': start = 0xA; break;
4925 case 'r': start = 0xD; break;
4926 case 't': start = 0x9; break;
4927 case '\\': case '|': case '.': case '-': case '^': case '?':
4928 case '*': case '+': case '{': case '}': case '(': case ')':
4929 case '[': case ']':
4930 start = cur; break;
4931 default:
4932 ERROR("Invalid escape value");
4933 return;
4934 }
4935 end = start;
4936 len = 1;
4937 } else if ((cur != 0x5B) && (cur != 0x5D)) {
4938 len = 4;
4939 end = start = xmlGetUTF8Char(ctxt->cur, &len);
4940 if (start < 0) {
4941 ERROR("Invalid UTF-8");
4942 return;
4943 }
4944 } else {
4945 ERROR("Expecting a char range");
4946 return;
4947 }
4948 /*
4949 * Since we are "inside" a range, we can assume ctxt->cur is past
4950 * the start of ctxt->string, and PREV should be safe
4951 */
4952 if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
4953 NEXTL(len);
4954 return;
4955 }
4956 NEXTL(len);
4957 cur = CUR;
4958 if ((cur != '-') || (NXT(1) == '[') || (NXT(1) == ']')) {
4959 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4960 XML_REGEXP_CHARVAL, start, end, NULL);
4961 return;
4962 }
4963 NEXT;
4964 cur = CUR;
4965 if (cur == '\\') {
4966 NEXT;
4967 cur = CUR;
4968 switch (cur) {
4969 case 'n': end = 0xA; break;
4970 case 'r': end = 0xD; break;
4971 case 't': end = 0x9; break;
4972 case '\\': case '|': case '.': case '-': case '^': case '?':
4973 case '*': case '+': case '{': case '}': case '(': case ')':
4974 case '[': case ']':
4975 end = cur; break;
4976 default:
4977 ERROR("Invalid escape value");
4978 return;
4979 }
4980 len = 1;
4981 } else if ((cur != '\0') && (cur != 0x5B) && (cur != 0x5D)) {
4982 len = 4;
4983 end = xmlGetUTF8Char(ctxt->cur, &len);
4984 if (end < 0) {
4985 ERROR("Invalid UTF-8");
4986 return;
4987 }
4988 } else {
4989 ERROR("Expecting the end of a char range");
4990 return;
4991 }
4992
4993 /* TODO check that the values are acceptable character ranges for XML */
4994 if (end < start) {
4995 ERROR("End of range is before start of range");
4996 } else {
4997 NEXTL(len);
4998 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4999 XML_REGEXP_CHARVAL, start, end, NULL);
5000 }
5001 return;
5002}
5003
5010static void
5011xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5012 do {
5013 if (CUR == '\\') {
5014 xmlFAParseCharClassEsc(ctxt);
5015 } else {
5016 xmlFAParseCharRange(ctxt);
5017 }
5018 } while ((CUR != ']') && (CUR != '-') &&
5019 (CUR != 0) && (ctxt->error == 0));
5020}
5021
5031static void
5032xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5033 int neg = ctxt->neg;
5034
5035 if (CUR == '^') {
5036 NEXT;
5037 ctxt->neg = !ctxt->neg;
5038 xmlFAParsePosCharGroup(ctxt);
5039 ctxt->neg = neg;
5040 }
5041 while ((CUR != ']') && (ctxt->error == 0)) {
5042 if ((CUR == '-') && (NXT(1) == '[')) {
5043 NEXT; /* eat the '-' */
5044 NEXT; /* eat the '[' */
5045 ctxt->neg = 2;
5046 xmlFAParseCharGroup(ctxt);
5047 ctxt->neg = neg;
5048 if (CUR == ']') {
5049 NEXT;
5050 } else {
5051 ERROR("charClassExpr: ']' expected");
5052 }
5053 break;
5054 } else {
5055 xmlFAParsePosCharGroup(ctxt);
5056 }
5057 }
5058}
5059
5067static void
5068xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5069 if (CUR == '[') {
5070 NEXT;
5071 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5072 if (ctxt->atom == NULL)
5073 return;
5074 xmlFAParseCharGroup(ctxt);
5075 if (CUR == ']') {
5076 NEXT;
5077 } else {
5078 ERROR("xmlFAParseCharClass: ']' expected");
5079 }
5080 } else {
5081 xmlFAParseCharClassEsc(ctxt);
5082 }
5083}
5084
5093static int
5094xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5095 int ret = 0;
5096 int ok = 0;
5097 int overflow = 0;
5098
5099 while ((CUR >= '0') && (CUR <= '9')) {
5100 if (ret > INT_MAX / 10) {
5101 overflow = 1;
5102 } else {
5103 int digit = CUR - '0';
5104
5105 ret *= 10;
5106 if (ret > INT_MAX - digit)
5107 overflow = 1;
5108 else
5109 ret += digit;
5110 }
5111 ok = 1;
5112 NEXT;
5113 }
5114 if ((ok != 1) || (overflow == 1)) {
5115 return(-1);
5116 }
5117 return(ret);
5118}
5119
5130static int
5131xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5132 int cur;
5133
5134 cur = CUR;
5135 if ((cur == '?') || (cur == '*') || (cur == '+')) {
5136 if (ctxt->atom != NULL) {
5137 if (cur == '?')
5138 ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5139 else if (cur == '*')
5140 ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5141 else if (cur == '+')
5142 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5143 }
5144 NEXT;
5145 return(1);
5146 }
5147 if (cur == '{') {
5148 int min = 0, max = 0;
5149
5150 NEXT;
5151 cur = xmlFAParseQuantExact(ctxt);
5152 if (cur >= 0)
5153 min = cur;
5154 else {
5155 ERROR("Improper quantifier");
5156 }
5157 if (CUR == ',') {
5158 NEXT;
5159 if (CUR == '}')
5160 max = INT_MAX;
5161 else {
5162 cur = xmlFAParseQuantExact(ctxt);
5163 if (cur >= 0)
5164 max = cur;
5165 else {
5166 ERROR("Improper quantifier");
5167 }
5168 }
5169 }
5170 if (CUR == '}') {
5171 NEXT;
5172 } else {
5173 ERROR("Unterminated quantifier");
5174 }
5175 if (max == 0)
5176 max = min;
5177 if (ctxt->atom != NULL) {
5178 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5179 ctxt->atom->min = min;
5180 ctxt->atom->max = max;
5181 }
5182 return(1);
5183 }
5184 return(0);
5185}
5186
5193static int
5194xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5195 int codepoint, len;
5196
5197 codepoint = xmlFAIsChar(ctxt);
5198 if (codepoint > 0) {
5199 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5200 if (ctxt->atom == NULL)
5201 return(-1);
5202 len = 4;
5203 codepoint = xmlGetUTF8Char(ctxt->cur, &len);
5204 if (codepoint < 0) {
5205 ERROR("Invalid UTF-8");
5206 return(-1);
5207 }
5208 ctxt->atom->codepoint = codepoint;
5209 NEXTL(len);
5210 return(1);
5211 } else if (CUR == '|') {
5212 return(0);
5213 } else if (CUR == 0) {
5214 return(0);
5215 } else if (CUR == ')') {
5216 return(0);
5217 } else if (CUR == '(') {
5218 xmlRegStatePtr start, oldend, start0;
5219
5220 NEXT;
5221 if (ctxt->depth >= 50) {
5222 ERROR("xmlFAParseAtom: maximum nesting depth exceeded");
5223 return(-1);
5224 }
5225 /*
5226 * this extra Epsilon transition is needed if we count with 0 allowed
5227 * unfortunately this can't be known at that point
5228 */
5229 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5230 start0 = ctxt->state;
5231 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5232 start = ctxt->state;
5233 oldend = ctxt->end;
5234 ctxt->end = NULL;
5235 ctxt->atom = NULL;
5236 ctxt->depth++;
5237 xmlFAParseRegExp(ctxt, 0);
5238 ctxt->depth--;
5239 if (CUR == ')') {
5240 NEXT;
5241 } else {
5242 ERROR("xmlFAParseAtom: expecting ')'");
5243 }
5244 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5245 if (ctxt->atom == NULL)
5246 return(-1);
5247 ctxt->atom->start = start;
5248 ctxt->atom->start0 = start0;
5249 ctxt->atom->stop = ctxt->state;
5250 ctxt->end = oldend;
5251 return(1);
5252 } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5253 xmlFAParseCharClass(ctxt);
5254 return(1);
5255 }
5256 return(0);
5257}
5258
5265static int
5266xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5267 int ret;
5268
5269 ctxt->atom = NULL;
5270 ret = xmlFAParseAtom(ctxt);
5271 if (ret == 0)
5272 return(0);
5273 if (ctxt->atom == NULL) {
5274 ERROR("internal: no atom generated");
5275 }
5276 xmlFAParseQuantifier(ctxt);
5277 return(1);
5278}
5279
5290static int
5291xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5292 xmlRegStatePtr previous;
5293 int ret;
5294
5295 previous = ctxt->state;
5296 ret = xmlFAParsePiece(ctxt);
5297 if (ret == 0) {
5298 /* Empty branch */
5299 xmlFAGenerateEpsilonTransition(ctxt, previous, to);
5300 } else {
5301 if (xmlFAGenerateTransitions(ctxt, previous,
5302 (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
5303 ctxt->atom) < 0) {
5304 xmlRegFreeAtom(ctxt->atom);
5305 ctxt->atom = NULL;
5306 return(-1);
5307 }
5308 previous = ctxt->state;
5309 ctxt->atom = NULL;
5310 }
5311 while ((ret != 0) && (ctxt->error == 0)) {
5312 ret = xmlFAParsePiece(ctxt);
5313 if (ret != 0) {
5314 if (xmlFAGenerateTransitions(ctxt, previous,
5315 (CUR=='|' || CUR==')' || CUR==0) ? to : NULL,
5316 ctxt->atom) < 0) {
5317 xmlRegFreeAtom(ctxt->atom);
5318 ctxt->atom = NULL;
5319 return(-1);
5320 }
5321 previous = ctxt->state;
5322 ctxt->atom = NULL;
5323 }
5324 }
5325 return(0);
5326}
5327
5335static void
5336xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5337 xmlRegStatePtr start, end;
5338
5339 /* if not top start should have been generated by an epsilon trans */
5340 start = ctxt->state;
5341 ctxt->end = NULL;
5342 xmlFAParseBranch(ctxt, NULL);
5343 if (top) {
5344 ctxt->state->type = XML_REGEXP_FINAL_STATE;
5345 }
5346 if (CUR != '|') {
5347 ctxt->end = ctxt->state;
5348 return;
5349 }
5350 end = ctxt->state;
5351 while ((CUR == '|') && (ctxt->error == 0)) {
5352 NEXT;
5353 ctxt->state = start;
5354 ctxt->end = NULL;
5355 xmlFAParseBranch(ctxt, end);
5356 }
5357 if (!top) {
5358 ctxt->state = end;
5359 ctxt->end = end;
5360 }
5361}
5362
5363/************************************************************************
5364 * *
5365 * The basic API *
5366 * *
5367 ************************************************************************/
5368
5376void
5377xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5378 int i;
5379
5380 if (output == NULL)
5381 return;
5382 fprintf(output, " regexp: ");
5383 if (regexp == NULL) {
5384 fprintf(output, "NULL\n");
5385 return;
5386 }
5387 fprintf(output, "'%s' ", regexp->string);
5388 fprintf(output, "\n");
5389 fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5390 for (i = 0;i < regexp->nbAtoms; i++) {
5391 fprintf(output, " %02d ", i);
5392 xmlRegPrintAtom(output, regexp->atoms[i]);
5393 }
5394 fprintf(output, "%d states:", regexp->nbStates);
5395 fprintf(output, "\n");
5396 for (i = 0;i < regexp->nbStates; i++) {
5397 xmlRegPrintState(output, regexp->states[i]);
5398 }
5399 fprintf(output, "%d counters:\n", regexp->nbCounters);
5400 for (i = 0;i < regexp->nbCounters; i++) {
5401 fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5402 regexp->counters[i].max);
5403 }
5404}
5405
5416xmlRegexpPtr
5417xmlRegexpCompile(const xmlChar *regexp) {
5418 xmlRegexpPtr ret = NULL;
5419 xmlRegParserCtxtPtr ctxt;
5420
5421 if (regexp == NULL)
5422 return(NULL);
5423
5424 ctxt = xmlRegNewParserCtxt(regexp);
5425 if (ctxt == NULL)
5426 return(NULL);
5427
5428 /* initialize the parser */
5429 ctxt->state = xmlRegStatePush(ctxt);
5430 if (ctxt->state == NULL)
5431 goto error;
5432 ctxt->start = ctxt->state;
5433 ctxt->end = NULL;
5434
5435 /* parse the expression building an automata */
5436 xmlFAParseRegExp(ctxt, 1);
5437 if (CUR != 0) {
5438 ERROR("xmlFAParseRegExp: extra characters");
5439 }
5440 if (ctxt->error != 0)
5441 goto error;
5442 ctxt->end = ctxt->state;
5443 ctxt->start->type = XML_REGEXP_START_STATE;
5444 ctxt->end->type = XML_REGEXP_FINAL_STATE;
5445
5446 /* remove the Epsilon except for counted transitions */
5447 xmlFAEliminateEpsilonTransitions(ctxt);
5448
5449
5450 if (ctxt->error != 0)
5451 goto error;
5452 ret = xmlRegEpxFromParse(ctxt);
5453
5454error:
5455 xmlRegFreeParserCtxt(ctxt);
5456 return(ret);
5457}
5458
5468int
5469xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5470 if ((comp == NULL) || (content == NULL))
5471 return(-1);
5472 return(xmlFARegExec(comp, content));
5473}
5474
5483int
5484xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5485 xmlAutomataPtr am;
5486 int ret;
5487
5488 if (comp == NULL)
5489 return(-1);
5490 if (comp->determinist != -1)
5491 return(comp->determinist);
5492
5493 am = xmlNewAutomata();
5494 if (am == NULL)
5495 return(-1);
5496 if (am->states != NULL) {
5497 int i;
5498
5499 for (i = 0;i < am->nbStates;i++)
5500 xmlRegFreeState(am->states[i]);
5501 xmlFree(am->states);
5502 }
5503 am->nbAtoms = comp->nbAtoms;
5504 am->atoms = comp->atoms;
5505 am->nbStates = comp->nbStates;
5506 am->states = comp->states;
5507 am->determinist = -1;
5508 am->flags = comp->flags;
5509 ret = xmlFAComputesDeterminism(am);
5510 am->atoms = NULL;
5511 am->states = NULL;
5512 xmlFreeAutomata(am);
5513 comp->determinist = ret;
5514 return(ret);
5515}
5516
5523void
5524xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5525 int i;
5526 if (regexp == NULL)
5527 return;
5528
5529 if (regexp->string != NULL)
5530 xmlFree(regexp->string);
5531 if (regexp->states != NULL) {
5532 for (i = 0;i < regexp->nbStates;i++)
5533 xmlRegFreeState(regexp->states[i]);
5534 xmlFree(regexp->states);
5535 }
5536 if (regexp->atoms != NULL) {
5537 for (i = 0;i < regexp->nbAtoms;i++)
5538 xmlRegFreeAtom(regexp->atoms[i]);
5539 xmlFree(regexp->atoms);
5540 }
5541 if (regexp->counters != NULL)
5542 xmlFree(regexp->counters);
5543 if (regexp->compact != NULL)
5544 xmlFree(regexp->compact);
5545 if (regexp->transdata != NULL)
5546 xmlFree(regexp->transdata);
5547 if (regexp->stringMap != NULL) {
5548 for (i = 0; i < regexp->nbstrings;i++)
5549 xmlFree(regexp->stringMap[i]);
5550 xmlFree(regexp->stringMap);
5551 }
5552
5553 xmlFree(regexp);
5554}
5555
5556#ifdef LIBXML_AUTOMATA_ENABLED
5557/************************************************************************
5558 * *
5559 * The Automata interface *
5560 * *
5561 ************************************************************************/
5562
5570xmlAutomataPtr
5571xmlNewAutomata(void) {
5572 xmlAutomataPtr ctxt;
5573
5574 ctxt = xmlRegNewParserCtxt(NULL);
5575 if (ctxt == NULL)
5576 return(NULL);
5577
5578 /* initialize the parser */
5579 ctxt->state = xmlRegStatePush(ctxt);
5580 if (ctxt->state == NULL) {
5581 xmlFreeAutomata(ctxt);
5582 return(NULL);
5583 }
5584 ctxt->start = ctxt->state;
5585 ctxt->end = NULL;
5586
5587 ctxt->start->type = XML_REGEXP_START_STATE;
5588 ctxt->flags = 0;
5589
5590 return(ctxt);
5591}
5592
5599void
5600xmlFreeAutomata(xmlAutomataPtr am) {
5601 if (am == NULL)
5602 return;
5603 xmlRegFreeParserCtxt(am);
5604}
5605
5613void
5614xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
5615 if (am == NULL)
5616 return;
5617 am->flags |= flags;
5618}
5619
5628xmlAutomataStatePtr
5629xmlAutomataGetInitState(xmlAutomataPtr am) {
5630 if (am == NULL)
5631 return(NULL);
5632 return(am->start);
5633}
5634
5644int
5645xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5646 if ((am == NULL) || (state == NULL))
5647 return(-1);
5648 state->type = XML_REGEXP_FINAL_STATE;
5649 return(0);
5650}
5651
5666xmlAutomataStatePtr
5667xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5668 xmlAutomataStatePtr to, const xmlChar *token,
5669 void *data) {
5670 xmlRegAtomPtr atom;
5671
5672 if ((am == NULL) || (from == NULL) || (token == NULL))
5673 return(NULL);
5674 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5675 if (atom == NULL)
5676 return(NULL);
5677 atom->data = data;
5678 atom->valuep = xmlStrdup(token);
5679
5680 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5681 xmlRegFreeAtom(atom);
5682 return(NULL);
5683 }
5684 if (to == NULL)
5685 return(am->state);
5686 return(to);
5687}
5688
5704xmlAutomataStatePtr
5705xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5706 xmlAutomataStatePtr to, const xmlChar *token,
5707 const xmlChar *token2, void *data) {
5708 xmlRegAtomPtr atom;
5709
5710 if ((am == NULL) || (from == NULL) || (token == NULL))
5711 return(NULL);
5712 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5713 if (atom == NULL)
5714 return(NULL);
5715 atom->data = data;
5716 if ((token2 == NULL) || (*token2 == 0)) {
5717 atom->valuep = xmlStrdup(token);
5718 } else {
5719 int lenn, lenp;
5720 xmlChar *str;
5721
5722 lenn = strlen((char *) token2);
5723 lenp = strlen((char *) token);
5724
5725 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5726 if (str == NULL) {
5727 xmlRegFreeAtom(atom);
5728 return(NULL);
5729 }
5730 memcpy(&str[0], token, lenp);
5731 str[lenp] = '|';
5732 memcpy(&str[lenp + 1], token2, lenn);
5733 str[lenn + lenp + 1] = 0;
5734
5735 atom->valuep = str;
5736 }
5737
5738 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5739 xmlRegFreeAtom(atom);
5740 return(NULL);
5741 }
5742 if (to == NULL)
5743 return(am->state);
5744 return(to);
5745}
5746
5764xmlAutomataStatePtr
5765xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5766 xmlAutomataStatePtr to, const xmlChar *token,
5767 const xmlChar *token2, void *data) {
5768 xmlRegAtomPtr atom;
5769 xmlChar err_msg[200];
5770
5771 if ((am == NULL) || (from == NULL) || (token == NULL))
5772 return(NULL);
5773 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5774 if (atom == NULL)
5775 return(NULL);
5776 atom->data = data;
5777 atom->neg = 1;
5778 if ((token2 == NULL) || (*token2 == 0)) {
5779 atom->valuep = xmlStrdup(token);
5780 } else {
5781 int lenn, lenp;
5782 xmlChar *str;
5783
5784 lenn = strlen((char *) token2);
5785 lenp = strlen((char *) token);
5786
5787 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5788 if (str == NULL) {
5789 xmlRegFreeAtom(atom);
5790 return(NULL);
5791 }
5792 memcpy(&str[0], token, lenp);
5793 str[lenp] = '|';
5794 memcpy(&str[lenp + 1], token2, lenn);
5795 str[lenn + lenp + 1] = 0;
5796
5797 atom->valuep = str;
5798 }
5799 snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5800 err_msg[199] = 0;
5801 atom->valuep2 = xmlStrdup(err_msg);
5802
5803 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5804 xmlRegFreeAtom(atom);
5805 return(NULL);
5806 }
5807 am->negs++;
5808 if (to == NULL)
5809 return(am->state);
5810 return(to);
5811}
5812
5831xmlAutomataStatePtr
5832xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5833 xmlAutomataStatePtr to, const xmlChar *token,
5834 const xmlChar *token2,
5835 int min, int max, void *data) {
5836 xmlRegAtomPtr atom;
5837 int counter;
5838
5839 if ((am == NULL) || (from == NULL) || (token == NULL))
5840 return(NULL);
5841 if (min < 0)
5842 return(NULL);
5843 if ((max < min) || (max < 1))
5844 return(NULL);
5845 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5846 if (atom == NULL)
5847 return(NULL);
5848 if ((token2 == NULL) || (*token2 == 0)) {
5849 atom->valuep = xmlStrdup(token);
5850 if (atom->valuep == NULL)
5851 goto error;
5852 } else {
5853 int lenn, lenp;
5854 xmlChar *str;
5855
5856 lenn = strlen((char *) token2);
5857 lenp = strlen((char *) token);
5858
5859 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5860 if (str == NULL)
5861 goto error;
5862 memcpy(&str[0], token, lenp);
5863 str[lenp] = '|';
5864 memcpy(&str[lenp + 1], token2, lenn);
5865 str[lenn + lenp + 1] = 0;
5866
5867 atom->valuep = str;
5868 }
5869 atom->data = data;
5870 if (min == 0)
5871 atom->min = 1;
5872 else
5873 atom->min = min;
5874 atom->max = max;
5875
5876 /*
5877 * associate a counter to the transition.
5878 */
5879 counter = xmlRegGetCounter(am);
5880 if (counter < 0)
5881 goto error;
5882 am->counters[counter].min = min;
5883 am->counters[counter].max = max;
5884
5885 /* xmlFAGenerateTransitions(am, from, to, atom); */
5886 if (to == NULL) {
5887 to = xmlRegStatePush(am);
5888 if (to == NULL)
5889 goto error;
5890 }
5891 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5892 if (xmlRegAtomPush(am, atom) < 0)
5893 goto error;
5894 am->state = to;
5895
5896 if (to == NULL)
5897 to = am->state;
5898 if (to == NULL)
5899 return(NULL);
5900 if (min == 0)
5901 xmlFAGenerateEpsilonTransition(am, from, to);
5902 return(to);
5903
5904error:
5905 xmlRegFreeAtom(atom);
5906 return(NULL);
5907}
5908
5926xmlAutomataStatePtr
5927xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5928 xmlAutomataStatePtr to, const xmlChar *token,
5929 int min, int max, void *data) {
5930 xmlRegAtomPtr atom;
5931 int counter;
5932
5933 if ((am == NULL) || (from == NULL) || (token == NULL))
5934 return(NULL);
5935 if (min < 0)
5936 return(NULL);
5937 if ((max < min) || (max < 1))
5938 return(NULL);
5939 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5940 if (atom == NULL)
5941 return(NULL);
5942 atom->valuep = xmlStrdup(token);
5943 if (atom->valuep == NULL)
5944 goto error;
5945 atom->data = data;
5946 if (min == 0)
5947 atom->min = 1;
5948 else
5949 atom->min = min;
5950 atom->max = max;
5951
5952 /*
5953 * associate a counter to the transition.
5954 */
5955 counter = xmlRegGetCounter(am);
5956 if (counter < 0)
5957 goto error;
5958 am->counters[counter].min = min;
5959 am->counters[counter].max = max;
5960
5961 /* xmlFAGenerateTransitions(am, from, to, atom); */
5962 if (to == NULL) {
5963 to = xmlRegStatePush(am);
5964 if (to == NULL)
5965 goto error;
5966 }
5967 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5968 if (xmlRegAtomPush(am, atom) < 0)
5969 goto error;
5970 am->state = to;
5971
5972 if (to == NULL)
5973 to = am->state;
5974 if (to == NULL)
5975 return(NULL);
5976 if (min == 0)
5977 xmlFAGenerateEpsilonTransition(am, from, to);
5978 return(to);
5979
5980error:
5981 xmlRegFreeAtom(atom);
5982 return(NULL);
5983}
5984
6004xmlAutomataStatePtr
6005xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6006 xmlAutomataStatePtr to, const xmlChar *token,
6007 const xmlChar *token2,
6008 int min, int max, void *data) {
6009 xmlRegAtomPtr atom;
6010 int counter;
6011
6012 if ((am == NULL) || (from == NULL) || (token == NULL))
6013 return(NULL);
6014 if (min < 1)
6015 return(NULL);
6016 if (max < min)
6017 return(NULL);
6018 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6019 if (atom == NULL)
6020 return(NULL);
6021 if ((token2 == NULL) || (*token2 == 0)) {
6022 atom->valuep = xmlStrdup(token);
6023 if (atom->valuep == NULL)
6024 goto error;
6025 } else {
6026 int lenn, lenp;
6027 xmlChar *str;
6028
6029 lenn = strlen((char *) token2);
6030 lenp = strlen((char *) token);
6031
6032 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6033 if (str == NULL)
6034 goto error;
6035 memcpy(&str[0], token, lenp);
6036 str[lenp] = '|';
6037 memcpy(&str[lenp + 1], token2, lenn);
6038 str[lenn + lenp + 1] = 0;
6039
6040 atom->valuep = str;
6041 }
6042 atom->data = data;
6043 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6044 atom->min = min;
6045 atom->max = max;
6046 /*
6047 * associate a counter to the transition.
6048 */
6049 counter = xmlRegGetCounter(am);
6050 if (counter < 0)
6051 goto error;
6052 am->counters[counter].min = 1;
6053 am->counters[counter].max = 1;
6054
6055 /* xmlFAGenerateTransitions(am, from, to, atom); */
6056 if (to == NULL) {
6057 to = xmlRegStatePush(am);
6058 if (to == NULL)
6059 goto error;
6060 }
6061 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6062 if (xmlRegAtomPush(am, atom) < 0)
6063 goto error;
6064 am->state = to;
6065 return(to);
6066
6067error:
6068 xmlRegFreeAtom(atom);
6069 return(NULL);
6070}
6071
6072
6073
6092xmlAutomataStatePtr
6093xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6094 xmlAutomataStatePtr to, const xmlChar *token,
6095 int min, int max, void *data) {
6096 xmlRegAtomPtr atom;
6097 int counter;
6098
6099 if ((am == NULL) || (from == NULL) || (token == NULL))
6100 return(NULL);
6101 if (min < 1)
6102 return(NULL);
6103 if (max < min)
6104 return(NULL);
6105 atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6106 if (atom == NULL)
6107 return(NULL);
6108 atom->valuep = xmlStrdup(token);
6109 atom->data = data;
6110 atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6111 atom->min = min;
6112 atom->max = max;
6113 /*
6114 * associate a counter to the transition.
6115 */
6116 counter = xmlRegGetCounter(am);
6117 if (counter < 0)
6118 goto error;
6119 am->counters[counter].min = 1;
6120 am->counters[counter].max = 1;
6121
6122 /* xmlFAGenerateTransitions(am, from, to, atom); */
6123 if (to == NULL) {
6124 to = xmlRegStatePush(am);
6125 if (to == NULL)
6126 goto error;
6127 }
6128 xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6129 if (xmlRegAtomPush(am, atom) < 0)
6130 goto error;
6131 am->state = to;
6132 return(to);
6133
6134error:
6135 xmlRegFreeAtom(atom);
6136 return(NULL);
6137}
6138
6147xmlAutomataStatePtr
6148xmlAutomataNewState(xmlAutomataPtr am) {
6149 if (am == NULL)
6150 return(NULL);
6151 return(xmlRegStatePush(am));
6152}
6153
6166xmlAutomataStatePtr
6167xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6168 xmlAutomataStatePtr to) {
6169 if ((am == NULL) || (from == NULL))
6170 return(NULL);
6171 xmlFAGenerateEpsilonTransition(am, from, to);
6172 if (to == NULL)
6173 return(am->state);
6174 return(to);
6175}
6176
6191xmlAutomataStatePtr
6192xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6193 xmlAutomataStatePtr to, int lax) {
6194 if ((am == NULL) || (from == NULL))
6195 return(NULL);
6196 xmlFAGenerateAllTransition(am, from, to, lax);
6197 if (to == NULL)
6198 return(am->state);
6199 return(to);
6200}
6201
6212int
6213xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6214 int ret;
6215
6216 if (am == NULL)
6217 return(-1);
6218
6219 ret = xmlRegGetCounter(am);
6220 if (ret < 0)
6221 return(-1);
6222 am->counters[ret].min = min;
6223 am->counters[ret].max = max;
6224 return(ret);
6225}
6226
6240xmlAutomataStatePtr
6241xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6242 xmlAutomataStatePtr to, int counter) {
6243 if ((am == NULL) || (from == NULL) || (counter < 0))
6244 return(NULL);
6245 xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6246 if (to == NULL)
6247 return(am->state);
6248 return(to);
6249}
6250
6264xmlAutomataStatePtr
6265xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6266 xmlAutomataStatePtr to, int counter) {
6267 if ((am == NULL) || (from == NULL) || (counter < 0))
6268 return(NULL);
6269 xmlFAGenerateCountedTransition(am, from, to, counter);
6270 if (to == NULL)
6271 return(am->state);
6272 return(to);
6273}
6274
6284xmlRegexpPtr
6285xmlAutomataCompile(xmlAutomataPtr am) {
6286 xmlRegexpPtr ret;
6287
6288 if ((am == NULL) || (am->error != 0)) return(NULL);
6289 xmlFAEliminateEpsilonTransitions(am);
6290 /* xmlFAComputesDeterminism(am); */
6291 ret = xmlRegEpxFromParse(am);
6292
6293 return(ret);
6294}
6295
6304int
6305xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6306 int ret;
6307
6308 if (am == NULL)
6309 return(-1);
6310
6311 ret = xmlFAComputesDeterminism(am);
6312 return(ret);
6313}
6314#endif /* LIBXML_AUTOMATA_ENABLED */
6315
6316#ifdef LIBXML_EXPR_ENABLED
6317/************************************************************************
6318 * *
6319 * Formal Expression handling code *
6320 * *
6321 ************************************************************************/
6322/************************************************************************
6323 * *
6324 * Expression handling context *
6325 * *
6326 ************************************************************************/
6327
6328struct _xmlExpCtxt {
6329 xmlDictPtr dict;
6330 xmlExpNodePtr *table;
6331 int size;
6332 int nbElems;
6333 int nb_nodes;
6334 int maxNodes;
6335 const char *expr;
6336 const char *cur;
6337 int nb_cons;
6338 int tabSize;
6339};
6340
6350xmlExpCtxtPtr
6351xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6352 xmlExpCtxtPtr ret;
6353 int size = 256;
6354
6355 if (maxNodes <= 4096)
6356 maxNodes = 4096;
6357
6358 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6359 if (ret == NULL)
6360 return(NULL);
6361 memset(ret, 0, sizeof(xmlExpCtxt));
6362 ret->size = size;
6363 ret->nbElems = 0;
6364 ret->maxNodes = maxNodes;
6365 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6366 if (ret->table == NULL) {
6367 xmlFree(ret);
6368 return(NULL);
6369 }
6370 memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6371 if (dict == NULL) {
6372 ret->dict = xmlDictCreate();
6373 if (ret->dict == NULL) {
6374 xmlFree(ret->table);
6375 xmlFree(ret);
6376 return(NULL);
6377 }
6378 } else {
6379 ret->dict = dict;
6380 xmlDictReference(ret->dict);
6381 }
6382 return(ret);
6383}
6384
6391void
6392xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6393 if (ctxt == NULL)
6394 return;
6395 xmlDictFree(ctxt->dict);
6396 if (ctxt->table != NULL)
6397 xmlFree(ctxt->table);
6398 xmlFree(ctxt);
6399}
6400
6401/************************************************************************
6402 * *
6403 * Structure associated to an expression node *
6404 * *
6405 ************************************************************************/
6406#define MAX_NODES 10000
6407
6408/*
6409 * TODO:
6410 * - Wildcards
6411 * - public API for creation
6412 *
6413 * Started
6414 * - regression testing
6415 *
6416 * Done
6417 * - split into module and test tool
6418 * - memleaks
6419 */
6420
6421typedef enum {
6422 XML_EXP_NILABLE = (1 << 0)
6423} xmlExpNodeInfo;
6424
6425#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6426
6427struct _xmlExpNode {
6428 unsigned char type;/* xmlExpNodeType */
6429 unsigned char info;/* OR of xmlExpNodeInfo */
6430 unsigned short key; /* the hash key */
6431 unsigned int ref; /* The number of references */
6432 int c_max; /* the maximum length it can consume */
6433 xmlExpNodePtr exp_left;
6434 xmlExpNodePtr next;/* the next node in the hash table or free list */
6435 union {
6436 struct {
6437 int f_min;
6438 int f_max;
6439 } count;
6440 struct {
6441 xmlExpNodePtr f_right;
6442 } children;
6443 const xmlChar *f_str;
6444 } field;
6445};
6446
6447#define exp_min field.count.f_min
6448#define exp_max field.count.f_max
6449/* #define exp_left field.children.f_left */
6450#define exp_right field.children.f_right
6451#define exp_str field.f_str
6452
6453static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6454static xmlExpNode forbiddenExpNode = {
6455 XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6456};
6457xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6458static xmlExpNode emptyExpNode = {
6459 XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6460};
6461xmlExpNodePtr emptyExp = &emptyExpNode;
6462
6463/************************************************************************
6464 * *
6465 * The custom hash table for unicity and canonicalization *
6466 * of sub-expressions pointers *
6467 * *
6468 ************************************************************************/
6469/*
6470 * xmlExpHashNameComputeKey:
6471 * Calculate the hash key for a token
6472 */
6473static unsigned short
6474xmlExpHashNameComputeKey(const xmlChar *name) {
6475 unsigned short value = 0L;
6476 char ch;
6477
6478 if (name != NULL) {
6479 value += 30 * (*name);
6480 while ((ch = *name++) != 0) {
6481 value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
6482 }
6483 }
6484 return (value);
6485}
6486
6487/*
6488 * xmlExpHashComputeKey:
6489 * Calculate the hash key for a compound expression
6490 */
6491static unsigned short
6492xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6493 xmlExpNodePtr right) {
6494 unsigned long value;
6495 unsigned short ret;
6496
6497 switch (type) {
6498 case XML_EXP_SEQ:
6499 value = left->key;
6500 value += right->key;
6501 value *= 3;
6502 ret = (unsigned short) value;
6503 break;
6504 case XML_EXP_OR:
6505 value = left->key;
6506 value += right->key;
6507 value *= 7;
6508 ret = (unsigned short) value;
6509 break;
6510 case XML_EXP_COUNT:
6511 value = left->key;
6512 value += right->key;
6513 ret = (unsigned short) value;
6514 break;
6515 default:
6516 ret = 0;
6517 }
6518 return(ret);
6519}
6520
6521
6522static xmlExpNodePtr
6523xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6524 xmlExpNodePtr ret;
6525
6526 if (ctxt->nb_nodes >= MAX_NODES)
6527 return(NULL);
6528 ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6529 if (ret == NULL)
6530 return(NULL);
6531 memset(ret, 0, sizeof(xmlExpNode));
6532 ret->type = type;
6533 ret->next = NULL;
6534 ctxt->nb_nodes++;
6535 ctxt->nb_cons++;
6536 return(ret);
6537}
6538
6549static xmlExpNodePtr
6550xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6551 xmlExpNodePtr left, xmlExpNodePtr right,
6552 const xmlChar *name, int min, int max) {
6553 unsigned short kbase, key;
6554 xmlExpNodePtr entry;
6555 xmlExpNodePtr insert;
6556
6557 if (ctxt == NULL)
6558 return(NULL);
6559
6560 /*
6561 * Check for duplicate and insertion location.
6562 */
6563 if (type == XML_EXP_ATOM) {
6564 kbase = xmlExpHashNameComputeKey(name);
6565 } else if (type == XML_EXP_COUNT) {
6566 /* COUNT reduction rule 1 */
6567 /* a{1} -> a */
6568 if (min == max) {
6569 if (min == 1) {
6570 return(left);
6571 }
6572 if (min == 0) {
6573 xmlExpFree(ctxt, left);
6574 return(emptyExp);
6575 }
6576 }
6577 if (min < 0) {
6578 xmlExpFree(ctxt, left);
6579 return(forbiddenExp);
6580 }
6581 if (max == -1)
6582 kbase = min + 79;
6583 else
6584 kbase = max - min;
6585 kbase += left->key;
6586 } else if (type == XML_EXP_OR) {
6587 /* Forbid reduction rules */
6588 if (left->type == XML_EXP_FORBID) {
6589 xmlExpFree(ctxt, left);
6590 return(right);
6591 }
6592 if (right->type == XML_EXP_FORBID) {
6593 xmlExpFree(ctxt, right);
6594 return(left);
6595 }
6596
6597 /* OR reduction rule 1 */
6598 /* a | a reduced to a */
6599 if (left == right) {
6600 xmlExpFree(ctxt, right);
6601 return(left);
6602 }
6603 /* OR canonicalization rule 1 */
6604 /* linearize (a | b) | c into a | (b | c) */
6605 if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6606 xmlExpNodePtr tmp = left;
6607 left = right;
6608 right = tmp;
6609 }
6610 /* OR reduction rule 2 */
6611 /* a | (a | b) and b | (a | b) are reduced to a | b */
6612 if (right->type == XML_EXP_OR) {
6613 if ((left == right->exp_left) ||
6614 (left == right->exp_right)) {
6615 xmlExpFree(ctxt, left);
6616 return(right);
6617 }
6618 }
6619 /* OR canonicalization rule 2 */
6620 /* linearize (a | b) | c into a | (b | c) */
6621 if (left->type == XML_EXP_OR) {
6622 xmlExpNodePtr tmp;
6623
6624 /* OR canonicalization rule 2 */
6625 if ((left->exp_right->type != XML_EXP_OR) &&
6626 (left->exp_right->key < left->exp_left->key)) {
6627 tmp = left->exp_right;
6628 left->exp_right = left->exp_left;
6629 left->exp_left = tmp;
6630 }
6631 left->exp_right->ref++;
6632 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6633 NULL, 0, 0);
6634 left->exp_left->ref++;
6635 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6636 NULL, 0, 0);
6637
6638 xmlExpFree(ctxt, left);
6639 return(tmp);
6640 }
6641 if (right->type == XML_EXP_OR) {
6642 /* Ordering in the tree */
6643 /* C | (A | B) -> A | (B | C) */
6644 if (left->key > right->exp_right->key) {
6645 xmlExpNodePtr tmp;
6646 right->exp_right->ref++;
6647 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6648 left, NULL, 0, 0);
6649 right->exp_left->ref++;
6650 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6651 tmp, NULL, 0, 0);
6652 xmlExpFree(ctxt, right);
6653 return(tmp);
6654 }
6655 /* Ordering in the tree */
6656 /* B | (A | C) -> A | (B | C) */
6657 if (left->key > right->exp_left->key) {
6658 xmlExpNodePtr tmp;
6659 right->exp_right->ref++;
6660 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6661 right->exp_right, NULL, 0, 0);
6662 right->exp_left->ref++;
6663 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6664 tmp, NULL, 0, 0);
6665 xmlExpFree(ctxt, right);
6666 return(tmp);
6667 }
6668 }
6669 /* we know both types are != XML_EXP_OR here */
6670 else if (left->key > right->key) {
6671 xmlExpNodePtr tmp = left;
6672 left = right;
6673 right = tmp;
6674 }
6675 kbase = xmlExpHashComputeKey(type, left, right);
6676 } else if (type == XML_EXP_SEQ) {
6677 /* Forbid reduction rules */
6678 if (left->type == XML_EXP_FORBID) {
6679 xmlExpFree(ctxt, right);
6680 return(left);
6681 }
6682 if (right->type == XML_EXP_FORBID) {
6683 xmlExpFree(ctxt, left);
6684 return(right);
6685 }
6686 /* Empty reduction rules */
6687 if (right->type == XML_EXP_EMPTY) {
6688 return(left);
6689 }
6690 if (left->type == XML_EXP_EMPTY) {
6691 return(right);
6692 }
6693 kbase = xmlExpHashComputeKey(type, left, right);
6694 } else
6695 return(NULL);
6696
6697 key = kbase % ctxt->size;
6698 if (ctxt->table[key] != NULL) {
6699 for (insert = ctxt->table[key]; insert != NULL;
6700 insert = insert->next) {
6701 if ((insert->key == kbase) &&
6702 (insert->type == type)) {
6703 if (type == XML_EXP_ATOM) {
6704 if (name == insert->exp_str) {
6705 insert->ref++;
6706 return(insert);
6707 }
6708 } else if (type == XML_EXP_COUNT) {
6709 if ((insert->exp_min == min) && (insert->exp_max == max) &&
6710 (insert->exp_left == left)) {
6711 insert->ref++;
6712 left->ref--;
6713 return(insert);
6714 }
6715 } else if ((insert->exp_left == left) &&
6716 (insert->exp_right == right)) {
6717 insert->ref++;
6718 left->ref--;
6719 right->ref--;
6720 return(insert);
6721 }
6722 }
6723 }
6724 }
6725
6726 entry = xmlExpNewNode(ctxt, type);
6727 if (entry == NULL)
6728 return(NULL);
6729 entry->key = kbase;
6730 if (type == XML_EXP_ATOM) {
6731 entry->exp_str = name;
6732 entry->c_max = 1;
6733 } else if (type == XML_EXP_COUNT) {
6734 entry->exp_min = min;
6735 entry->exp_max = max;
6736 entry->exp_left = left;
6737 if ((min == 0) || (IS_NILLABLE(left)))
6738 entry->info |= XML_EXP_NILABLE;
6739 if (max < 0)
6740 entry->c_max = -1;
6741 else
6742 entry->c_max = max * entry->exp_left->c_max;
6743 } else {
6744 entry->exp_left = left;
6745 entry->exp_right = right;
6746 if (type == XML_EXP_OR) {
6747 if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6748 entry->info |= XML_EXP_NILABLE;
6749 if ((entry->exp_left->c_max == -1) ||
6750 (entry->exp_right->c_max == -1))
6751 entry->c_max = -1;
6752 else if (entry->exp_left->c_max > entry->exp_right->c_max)
6753 entry->c_max = entry->exp_left->c_max;
6754 else
6755 entry->c_max = entry->exp_right->c_max;
6756 } else {
6757 if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6758 entry->info |= XML_EXP_NILABLE;
6759 if ((entry->exp_left->c_max == -1) ||
6760 (entry->exp_right->c_max == -1))
6761 entry->c_max = -1;
6762 else
6763 entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6764 }
6765 }
6766 entry->ref = 1;
6767 if (ctxt->table[key] != NULL)
6768 entry->next = ctxt->table[key];
6769
6770 ctxt->table[key] = entry;
6771 ctxt->nbElems++;
6772
6773 return(entry);
6774}
6775
6783void
6784xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6785 if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6786 return;
6787 exp->ref--;
6788 if (exp->ref == 0) {
6789 unsigned short key;
6790
6791 /* Unlink it first from the hash table */
6792 key = exp->key % ctxt->size;
6793 if (ctxt->table[key] == exp) {
6794 ctxt->table[key] = exp->next;
6795 } else {
6796 xmlExpNodePtr tmp;
6797
6798 tmp = ctxt->table[key];
6799 while (tmp != NULL) {
6800 if (tmp->next == exp) {
6801 tmp->next = exp->next;
6802 break;
6803 }
6804 tmp = tmp->next;
6805 }
6806 }
6807
6808 if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6809 xmlExpFree(ctxt, exp->exp_left);
6810 xmlExpFree(ctxt, exp->exp_right);
6811 } else if (exp->type == XML_EXP_COUNT) {
6812 xmlExpFree(ctxt, exp->exp_left);
6813 }
6814 xmlFree(exp);
6815 ctxt->nb_nodes--;
6816 }
6817}
6818
6825void
6826xmlExpRef(xmlExpNodePtr exp) {
6827 if (exp != NULL)
6828 exp->ref++;
6829}
6830
6841xmlExpNodePtr
6842xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6843 if ((ctxt == NULL) || (name == NULL))
6844 return(NULL);
6845 name = xmlDictLookup(ctxt->dict, name, len);
6846 if (name == NULL)
6847 return(NULL);
6848 return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
6849}
6850
6864xmlExpNodePtr
6865xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6866 if (ctxt == NULL)
6867 return(NULL);
6868 if ((left == NULL) || (right == NULL)) {
6869 xmlExpFree(ctxt, left);
6870 xmlExpFree(ctxt, right);
6871 return(NULL);
6872 }
6873 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
6874}
6875
6889xmlExpNodePtr
6890xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6891 if (ctxt == NULL)
6892 return(NULL);
6893 if ((left == NULL) || (right == NULL)) {
6894 xmlExpFree(ctxt, left);
6895 xmlExpFree(ctxt, right);
6896 return(NULL);
6897 }
6898 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
6899}
6900
6915xmlExpNodePtr
6916xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
6917 if (ctxt == NULL)
6918 return(NULL);
6919 if ((subset == NULL) || (min < 0) || (max < -1) ||
6920 ((max >= 0) && (min > max))) {
6921 xmlExpFree(ctxt, subset);
6922 return(NULL);
6923 }
6924 return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6925 NULL, NULL, min, max));
6926}
6927
6928/************************************************************************
6929 * *
6930 * Public API for operations on expressions *
6931 * *
6932 ************************************************************************/
6933
6934static int
6935xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6936 const xmlChar**list, int len, int nb) {
6937 int tmp, tmp2;
6938tail:
6939 switch (exp->type) {
6940 case XML_EXP_EMPTY:
6941 return(0);
6942 case XML_EXP_ATOM:
6943 for (tmp = 0;tmp < nb;tmp++)
6944 if (list[tmp] == exp->exp_str)
6945 return(0);
6946 if (nb >= len)
6947 return(-2);
6948 list[nb] = exp->exp_str;
6949 return(1);
6950 case XML_EXP_COUNT:
6951 exp = exp->exp_left;
6952 goto tail;
6953 case XML_EXP_SEQ:
6954 case XML_EXP_OR:
6955 tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
6956 if (tmp < 0)
6957 return(tmp);
6958 tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
6959 nb + tmp);
6960 if (tmp2 < 0)
6961 return(tmp2);
6962 return(tmp + tmp2);
6963 }
6964 return(-1);
6965}
6966
6979int
6980xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6981 const xmlChar**langList, int len) {
6982 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
6983 return(-1);
6984 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
6985}
6986
6987static int
6988xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6989 const xmlChar**list, int len, int nb) {
6990 int tmp, tmp2;
6991tail:
6992 switch (exp->type) {
6993 case XML_EXP_FORBID:
6994 return(0);
6995 case XML_EXP_EMPTY:
6996 return(0);
6997 case XML_EXP_ATOM:
6998 for (tmp = 0;tmp < nb;tmp++)
6999 if (list[tmp] == exp->exp_str)
7000 return(0);
7001 if (nb >= len)
7002 return(-2);
7003 list[nb] = exp->exp_str;
7004 return(1);
7005 case XML_EXP_COUNT:
7006 exp = exp->exp_left;
7007 goto tail;
7008 case XML_EXP_SEQ:
7009 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7010 if (tmp < 0)
7011 return(tmp);
7012 if (IS_NILLABLE(exp->exp_left)) {
7013 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7014 nb + tmp);
7015 if (tmp2 < 0)
7016 return(tmp2);
7017 tmp += tmp2;
7018 }
7019 return(tmp);
7020 case XML_EXP_OR:
7021 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7022 if (tmp < 0)
7023 return(tmp);
7024 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7025 nb + tmp);
7026 if (tmp2 < 0)
7027 return(tmp2);
7028 return(tmp + tmp2);
7029 }
7030 return(-1);
7031}
7032
7047int
7048xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7049 const xmlChar**tokList, int len) {
7050 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7051 return(-1);
7052 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7053}
7054
7063int
7064xmlExpIsNillable(xmlExpNodePtr exp) {
7065 if (exp == NULL)
7066 return(-1);
7067 return(IS_NILLABLE(exp) != 0);
7068}
7069
7070static xmlExpNodePtr
7071xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
7072{
7073 xmlExpNodePtr ret;
7074
7075 switch (exp->type) {
7076 case XML_EXP_EMPTY:
7077 return(forbiddenExp);
7078 case XML_EXP_FORBID:
7079 return(forbiddenExp);
7080 case XML_EXP_ATOM:
7081 if (exp->exp_str == str) {
7082 ret = emptyExp;
7083 } else {
7084 /* TODO wildcards here */
7085 ret = forbiddenExp;
7086 }
7087 return(ret);
7088 case XML_EXP_OR: {
7089 xmlExpNodePtr tmp;
7090
7091 tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7092 if (tmp == NULL) {
7093 return(NULL);
7094 }
7095 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7096 if (ret == NULL) {
7097 xmlExpFree(ctxt, tmp);
7098 return(NULL);
7099 }
7100 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7101 NULL, 0, 0);
7102 return(ret);
7103 }
7104 case XML_EXP_SEQ:
7105 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7106 if (ret == NULL) {
7107 return(NULL);
7108 } else if (ret == forbiddenExp) {
7109 if (IS_NILLABLE(exp->exp_left)) {
7110 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7111 }
7112 } else {
7113 exp->exp_right->ref++;
7114 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7115 NULL, 0, 0);
7116 }
7117 return(ret);
7118 case XML_EXP_COUNT: {
7119 int min, max;
7120 xmlExpNodePtr tmp;
7121
7122 if (exp->exp_max == 0)
7123 return(forbiddenExp);
7124 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7125 if (ret == NULL)
7126 return(NULL);
7127 if (ret == forbiddenExp) {
7128 return(ret);
7129 }
7130 if (exp->exp_max == 1)
7131 return(ret);
7132 if (exp->exp_max < 0) /* unbounded */
7133 max = -1;
7134 else
7135 max = exp->exp_max - 1;
7136 if (exp->exp_min > 0)
7137 min = exp->exp_min - 1;
7138 else
7139 min = 0;
7140 exp->exp_left->ref++;
7141 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7142 NULL, min, max);
7143 if (ret == emptyExp) {
7144 return(tmp);
7145 }
7146 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7147 NULL, 0, 0));
7148 }
7149 }
7150 return(NULL);
7151}
7152
7165xmlExpNodePtr
7166xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7167 const xmlChar *str, int len) {
7168 const xmlChar *input;
7169
7170 if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7171 return(NULL);
7172 }
7173 /*
7174 * check the string is in the dictionary, if yes use an interned
7175 * copy, otherwise we know it's not an acceptable input
7176 */
7177 input = xmlDictExists(ctxt->dict, str, len);
7178 if (input == NULL) {
7179 return(forbiddenExp);
7180 }
7181 return(xmlExpStringDeriveInt(ctxt, exp, input));
7182}
7183
7184static int
7185xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7186 int ret = 1;
7187
7188 if (sub->c_max == -1) {
7189 if (exp->c_max != -1)
7190 ret = 0;
7191 } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7192 ret = 0;
7193 }
7194#if 0
7195 if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7196 ret = 0;
7197#endif
7198 return(ret);
7199}
7200
7201static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7202 xmlExpNodePtr sub);
7218static int
7219xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7220 xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7221 int i;
7222 xmlExpNodePtr tmp, tmp2;
7223
7224 if (mult != NULL) *mult = NULL;
7225 if (remain != NULL) *remain = NULL;
7226 if (exp->c_max == -1) return(0);
7227 if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7228
7229 for (i = 1;i <= exp->c_max;i++) {
7230 sub->ref++;
7231 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7232 sub, NULL, NULL, i, i);
7233 if (tmp == NULL) {
7234 return(-1);
7235 }
7236 if (!xmlExpCheckCard(tmp, exp)) {
7237 xmlExpFree(ctxt, tmp);
7238 continue;
7239 }
7240 tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7241 if (tmp2 == NULL) {
7242 xmlExpFree(ctxt, tmp);
7243 return(-1);
7244 }
7245 if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7246 if (remain != NULL)
7247 *remain = tmp2;
7248 else
7249 xmlExpFree(ctxt, tmp2);
7250 if (mult != NULL)
7251 *mult = tmp;
7252 else
7253 xmlExpFree(ctxt, tmp);
7254 return(i);
7255 }
7256 xmlExpFree(ctxt, tmp);
7257 xmlExpFree(ctxt, tmp2);
7258 }
7259 return(0);
7260}
7261
7273static xmlExpNodePtr
7274xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7275 xmlExpNodePtr ret, tmp, tmp2, tmp3;
7276 const xmlChar **tab;
7277 int len, i;
7278
7279 /*
7280 * In case of equality and if the expression can only consume a finite
7281 * amount, then the derivation is empty
7282 */
7283 if ((exp == sub) && (exp->c_max >= 0)) {
7284 return(emptyExp);
7285 }
7286 /*
7287 * decompose sub sequence first
7288 */
7289 if (sub->type == XML_EXP_EMPTY) {
7290 exp->ref++;
7291 return(exp);
7292 }
7293 if (sub->type == XML_EXP_SEQ) {
7294 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7295 if (tmp == NULL)
7296 return(NULL);
7297 if (tmp == forbiddenExp)
7298 return(tmp);
7299 ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7300 xmlExpFree(ctxt, tmp);
7301 return(ret);
7302 }
7303 if (sub->type == XML_EXP_OR) {
7304 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7305 if (tmp == forbiddenExp)
7306 return(tmp);
7307 if (tmp == NULL)
7308 return(NULL);
7309 ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7310 if ((ret == NULL) || (ret == forbiddenExp)) {
7311 xmlExpFree(ctxt, tmp);
7312 return(ret);
7313 }
7314 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7315 }
7316 if (!xmlExpCheckCard(exp, sub)) {
7317 return(forbiddenExp);
7318 }
7319 switch (exp->type) {
7320 case XML_EXP_EMPTY:
7321 if (sub == emptyExp)
7322 return(emptyExp);
7323 return(forbiddenExp);
7324 case XML_EXP_FORBID:
7325 return(forbiddenExp);
7326 case XML_EXP_ATOM:
7327 if (sub->type == XML_EXP_ATOM) {
7328 /* TODO: handle wildcards */
7329 if (exp->exp_str == sub->exp_str) {
7330 return(emptyExp);
7331 }
7332 return(forbiddenExp);
7333 }
7334 if ((sub->type == XML_EXP_COUNT) &&
7335 (sub->exp_max == 1) &&
7336 (sub->exp_left->type == XML_EXP_ATOM)) {
7337 /* TODO: handle wildcards */
7338 if (exp->exp_str == sub->exp_left->exp_str) {
7339 return(emptyExp);
7340 }
7341 return(forbiddenExp);
7342 }
7343 return(forbiddenExp);
7344 case XML_EXP_SEQ:
7345 /* try to get the sequence consumed only if possible */
7346 if (xmlExpCheckCard(exp->exp_left, sub)) {
7347 /* See if the sequence can be consumed directly */
7348 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7349 if ((ret != forbiddenExp) && (ret != NULL)) {
7350 /*
7351 * TODO: assumption here that we are determinist
7352 * i.e. we won't get to a nillable exp left
7353 * subset which could be matched by the right
7354 * part too.
7355 * e.g.: (a | b)+,(a | c) and 'a+,a'
7356 */
7357 exp->exp_right->ref++;
7358 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7359 exp->exp_right, NULL, 0, 0));
7360 }
7361 }
7362 /* Try instead to decompose */
7363 if (sub->type == XML_EXP_COUNT) {
7364 int min, max;
7365
7366 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7367 if (ret == NULL)
7368 return(NULL);
7369 if (ret != forbiddenExp) {
7370 if (sub->exp_max < 0)
7371 max = -1;
7372 else
7373 max = sub->exp_max -1;
7374 if (sub->exp_min > 0)
7375 min = sub->exp_min -1;
7376 else
7377 min = 0;
7378 exp->exp_right->ref++;
7379 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7380 exp->exp_right, NULL, 0, 0);
7381 if (tmp == NULL)
7382 return(NULL);
7383
7384 sub->exp_left->ref++;
7385 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7386 sub->exp_left, NULL, NULL, min, max);
7387 if (tmp2 == NULL) {
7388 xmlExpFree(ctxt, tmp);
7389 return(NULL);
7390 }
7391 ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7392 xmlExpFree(ctxt, tmp);
7393 xmlExpFree(ctxt, tmp2);
7394 return(ret);
7395 }
7396 }
7397 /* we made no progress on structured operations */
7398 break;
7399 case XML_EXP_OR:
7400 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7401 if (ret == NULL)
7402 return(NULL);
7403 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7404 if (tmp == NULL) {
7405 xmlExpFree(ctxt, ret);
7406 return(NULL);
7407 }
7408 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7409 case XML_EXP_COUNT: {
7410 int min, max;
7411
7412 if (sub->type == XML_EXP_COUNT) {
7413 /*
7414 * Try to see if the loop is completely subsumed
7415 */
7416 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7417 if (tmp == NULL)
7418 return(NULL);
7419 if (tmp == forbiddenExp) {
7420 int mult;
7421
7422 mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7423 NULL, &tmp);
7424 if (mult <= 0) {
7425 return(forbiddenExp);
7426 }
7427 if (sub->exp_max == -1) {
7428 max = -1;
7429 if (exp->exp_max == -1) {
7430 if (exp->exp_min <= sub->exp_min * mult)
7431 min = 0;
7432 else
7433 min = exp->exp_min - sub->exp_min * mult;
7434 } else {
7435 xmlExpFree(ctxt, tmp);
7436 return(forbiddenExp);
7437 }
7438 } else {
7439 if (exp->exp_max == -1) {
7440 if (exp->exp_min > sub->exp_min * mult) {
7441 max = -1;
7442 min = exp->exp_min - sub->exp_min * mult;
7443 } else {
7444 max = -1;
7445 min = 0;
7446 }
7447 } else {
7448 if (exp->exp_max < sub->exp_max * mult) {
7449 xmlExpFree(ctxt, tmp);
7450 return(forbiddenExp);
7451 }
7452 if (sub->exp_max * mult > exp->exp_min)
7453 min = 0;
7454 else
7455 min = exp->exp_min - sub->exp_max * mult;
7456 max = exp->exp_max - sub->exp_max * mult;
7457 }
7458 }
7459 } else if (!IS_NILLABLE(tmp)) {
7460 /*
7461 * TODO: loop here to try to grow if working on finite
7462 * blocks.
7463 */
7464 xmlExpFree(ctxt, tmp);
7465 return(forbiddenExp);
7466 } else if (sub->exp_max == -1) {
7467 if (exp->exp_max == -1) {
7468 if (exp->exp_min <= sub->exp_min) {
7469 max = -1;
7470 min = 0;
7471 } else {
7472 max = -1;
7473 min = exp->exp_min - sub->exp_min;
7474 }
7475 } else if (exp->exp_min > sub->exp_min) {
7476 xmlExpFree(ctxt, tmp);
7477 return(forbiddenExp);
7478 } else {
7479 max = -1;
7480 min = 0;
7481 }
7482 } else {
7483 if (exp->exp_max == -1) {
7484 if (exp->exp_min > sub->exp_min) {
7485 max = -1;
7486 min = exp->exp_min - sub->exp_min;
7487 } else {
7488 max = -1;
7489 min = 0;
7490 }
7491 } else {
7492 if (exp->exp_max < sub->exp_max) {
7493 xmlExpFree(ctxt, tmp);
7494 return(forbiddenExp);
7495 }
7496 if (sub->exp_max > exp->exp_min)
7497 min = 0;
7498 else
7499 min = exp->exp_min - sub->exp_max;
7500 max = exp->exp_max - sub->exp_max;
7501 }
7502 }
7503 exp->exp_left->ref++;
7504 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7505 NULL, NULL, min, max);
7506 if (tmp2 == NULL) {
7507 return(NULL);
7508 }
7509 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7510 NULL, 0, 0);
7511 return(ret);
7512 }
7513 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7514 if (tmp == NULL)
7515 return(NULL);
7516 if (tmp == forbiddenExp) {
7517 return(forbiddenExp);
7518 }
7519 if (exp->exp_min > 0)
7520 min = exp->exp_min - 1;
7521 else
7522 min = 0;
7523 if (exp->exp_max < 0)
7524 max = -1;
7525 else
7526 max = exp->exp_max - 1;
7527
7528 exp->exp_left->ref++;
7529 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7530 NULL, NULL, min, max);
7531 if (tmp2 == NULL)
7532 return(NULL);
7533 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7534 NULL, 0, 0);
7535 return(ret);
7536 }
7537 }
7538
7539 if (IS_NILLABLE(sub)) {
7540 if (!(IS_NILLABLE(exp)))
7541 return(forbiddenExp);
7542 else
7543 ret = emptyExp;
7544 } else
7545 ret = NULL;
7546 /*
7547 * here the structured derivation made no progress so
7548 * we use the default token based derivation to force one more step
7549 */
7550 if (ctxt->tabSize == 0)
7551 ctxt->tabSize = 40;
7552
7553 tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7554 sizeof(const xmlChar *));
7555 if (tab == NULL) {
7556 return(NULL);
7557 }
7558
7559 /*
7560 * collect all the strings accepted by the subexpression on input
7561 */
7562 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7563 while (len < 0) {
7564 const xmlChar **temp;
7565 temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7566 sizeof(const xmlChar *));
7567 if (temp == NULL) {
7568 xmlFree((xmlChar **) tab);
7569 return(NULL);
7570 }
7571 tab = temp;
7572 ctxt->tabSize *= 2;
7573 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7574 }
7575 for (i = 0;i < len;i++) {
7576 tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7577 if ((tmp == NULL) || (tmp == forbiddenExp)) {
7578 xmlExpFree(ctxt, ret);
7579 xmlFree((xmlChar **) tab);
7580 return(tmp);
7581 }
7582 tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7583 if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7584 xmlExpFree(ctxt, tmp);
7585 xmlExpFree(ctxt, ret);
7586 xmlFree((xmlChar **) tab);
7587 return(tmp);
7588 }
7589 tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7590 xmlExpFree(ctxt, tmp);
7591 xmlExpFree(ctxt, tmp2);
7592
7593 if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7594 xmlExpFree(ctxt, ret);
7595 xmlFree((xmlChar **) tab);
7596 return(tmp3);
7597 }
7598
7599 if (ret == NULL)
7600 ret = tmp3;
7601 else {
7602 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7603 if (ret == NULL) {
7604 xmlFree((xmlChar **) tab);
7605 return(NULL);
7606 }
7607 }
7608 }
7609 xmlFree((xmlChar **) tab);
7610 return(ret);
7611}
7612
7627xmlExpNodePtr
7628xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7629 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7630 return(NULL);
7631
7632 /*
7633 * O(1) speedups
7634 */
7635 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7636 return(forbiddenExp);
7637 }
7638 if (xmlExpCheckCard(exp, sub) == 0) {
7639 return(forbiddenExp);
7640 }
7641 return(xmlExpExpDeriveInt(ctxt, exp, sub));
7642}
7643
7655int
7656xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7657 xmlExpNodePtr tmp;
7658
7659 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7660 return(-1);
7661
7662 /*
7663 * TODO: speedup by checking the language of sub is a subset of the
7664 * language of exp
7665 */
7666 /*
7667 * O(1) speedups
7668 */
7669 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7670 return(0);
7671 }
7672 if (xmlExpCheckCard(exp, sub) == 0) {
7673 return(0);
7674 }
7675 tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7676 if (tmp == NULL)
7677 return(-1);
7678 if (tmp == forbiddenExp)
7679 return(0);
7680 if (tmp == emptyExp)
7681 return(1);
7682 if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7683 xmlExpFree(ctxt, tmp);
7684 return(1);
7685 }
7686 xmlExpFree(ctxt, tmp);
7687 return(0);
7688}
7689
7690/************************************************************************
7691 * *
7692 * Parsing expression *
7693 * *
7694 ************************************************************************/
7695
7696static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7697
7698#undef CUR
7699#define CUR (*ctxt->cur)
7700#undef NEXT
7701#define NEXT ctxt->cur++;
7702#undef IS_BLANK
7703#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7704#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7705
7706static int
7707xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7708 int ret = 0;
7709
7711 if (CUR == '*') {
7712 NEXT
7713 return(-1);
7714 }
7715 if ((CUR < '0') || (CUR > '9'))
7716 return(-1);
7717 while ((CUR >= '0') && (CUR <= '9')) {
7718 ret = ret * 10 + (CUR - '0');
7719 NEXT
7720 }
7721 return(ret);
7722}
7723
7724static xmlExpNodePtr
7725xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7726 const char *base;
7727 xmlExpNodePtr ret;
7728 const xmlChar *val;
7729
7731 base = ctxt->cur;
7732 if (*ctxt->cur == '(') {
7733 NEXT
7734 ret = xmlExpParseExpr(ctxt);
7736 if (*ctxt->cur != ')') {
7737 fprintf(stderr, "unbalanced '(' : %s\n", base);
7738 xmlExpFree(ctxt, ret);
7739 return(NULL);
7740 }
7741 NEXT;
7743 goto parse_quantifier;
7744 }
7745 while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
7746 (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
7747 (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
7748 NEXT;
7749 val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
7750 if (val == NULL)
7751 return(NULL);
7752 ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
7753 if (ret == NULL)
7754 return(NULL);
7756parse_quantifier:
7757 if (CUR == '{') {
7758 int min, max;
7759
7760 NEXT
7761 min = xmlExpParseNumber(ctxt);
7762 if (min < 0) {
7763 xmlExpFree(ctxt, ret);
7764 return(NULL);
7765 }
7767 if (CUR == ',') {
7768 NEXT
7769 max = xmlExpParseNumber(ctxt);
7771 } else
7772 max = min;
7773 if (CUR != '}') {
7774 xmlExpFree(ctxt, ret);
7775 return(NULL);
7776 }
7777 NEXT
7778 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7779 min, max);
7781 } else if (CUR == '?') {
7782 NEXT
7783 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7784 0, 1);
7786 } else if (CUR == '+') {
7787 NEXT
7788 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7789 1, -1);
7791 } else if (CUR == '*') {
7792 NEXT
7793 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7794 0, -1);
7796 }
7797 return(ret);
7798}
7799
7800
7801static xmlExpNodePtr
7802xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7803 xmlExpNodePtr ret, right;
7804
7805 ret = xmlExpParseOr(ctxt);
7807 while (CUR == '|') {
7808 NEXT
7809 right = xmlExpParseOr(ctxt);
7810 if (right == NULL) {
7811 xmlExpFree(ctxt, ret);
7812 return(NULL);
7813 }
7814 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
7815 if (ret == NULL)
7816 return(NULL);
7817 }
7818 return(ret);
7819}
7820
7821static xmlExpNodePtr
7822xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
7823 xmlExpNodePtr ret, right;
7824
7825 ret = xmlExpParseSeq(ctxt);
7827 while (CUR == ',') {
7828 NEXT
7829 right = xmlExpParseSeq(ctxt);
7830 if (right == NULL) {
7831 xmlExpFree(ctxt, ret);
7832 return(NULL);
7833 }
7834 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
7835 if (ret == NULL)
7836 return(NULL);
7837 }
7838 return(ret);
7839}
7840
7858xmlExpNodePtr
7859xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
7860 xmlExpNodePtr ret;
7861
7862 ctxt->expr = expr;
7863 ctxt->cur = expr;
7864
7865 ret = xmlExpParseExpr(ctxt);
7867 if (*ctxt->cur != 0) {
7868 xmlExpFree(ctxt, ret);
7869 return(NULL);
7870 }
7871 return(ret);
7872}
7873
7874static void
7875xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
7876 xmlExpNodePtr c;
7877
7878 if (expr == NULL) return;
7879 if (glob) xmlBufferWriteChar(buf, "(");
7880 switch (expr->type) {
7881 case XML_EXP_EMPTY:
7882 xmlBufferWriteChar(buf, "empty");
7883 break;
7884 case XML_EXP_FORBID:
7885 xmlBufferWriteChar(buf, "forbidden");
7886 break;
7887 case XML_EXP_ATOM:
7888 xmlBufferWriteCHAR(buf, expr->exp_str);
7889 break;
7890 case XML_EXP_SEQ:
7891 c = expr->exp_left;
7892 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7893 xmlExpDumpInt(buf, c, 1);
7894 else
7895 xmlExpDumpInt(buf, c, 0);
7896 xmlBufferWriteChar(buf, " , ");
7897 c = expr->exp_right;
7898 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7899 xmlExpDumpInt(buf, c, 1);
7900 else
7901 xmlExpDumpInt(buf, c, 0);
7902 break;
7903 case XML_EXP_OR:
7904 c = expr->exp_left;
7905 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7906 xmlExpDumpInt(buf, c, 1);
7907 else
7908 xmlExpDumpInt(buf, c, 0);
7909 xmlBufferWriteChar(buf, " | ");
7910 c = expr->exp_right;
7911 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7912 xmlExpDumpInt(buf, c, 1);
7913 else
7914 xmlExpDumpInt(buf, c, 0);
7915 break;
7916 case XML_EXP_COUNT: {
7917 char rep[40];
7918
7919 c = expr->exp_left;
7920 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7921 xmlExpDumpInt(buf, c, 1);
7922 else
7923 xmlExpDumpInt(buf, c, 0);
7924 if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
7925 rep[0] = '?';
7926 rep[1] = 0;
7927 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
7928 rep[0] = '*';
7929 rep[1] = 0;
7930 } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
7931 rep[0] = '+';
7932 rep[1] = 0;
7933 } else if (expr->exp_max == expr->exp_min) {
7934 snprintf(rep, 39, "{%d}", expr->exp_min);
7935 } else if (expr->exp_max < 0) {
7936 snprintf(rep, 39, "{%d,inf}", expr->exp_min);
7937 } else {
7938 snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
7939 }
7940 rep[39] = 0;
7941 xmlBufferWriteChar(buf, rep);
7942 break;
7943 }
7944 default:
7945 fprintf(stderr, "Error in tree\n");
7946 }
7947 if (glob)
7948 xmlBufferWriteChar(buf, ")");
7949}
7957void
7958xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
7959 if ((buf == NULL) || (expr == NULL))
7960 return;
7961 xmlExpDumpInt(buf, expr, 0);
7962}
7963
7972int
7973xmlExpMaxToken(xmlExpNodePtr expr) {
7974 if (expr == NULL)
7975 return(-1);
7976 return(expr->c_max);
7977}
7978
7987int
7988xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
7989 if (ctxt == NULL)
7990 return(-1);
7991 return(ctxt->nb_nodes);
7992}
7993
8002int
8003xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8004 if (ctxt == NULL)
8005 return(-1);
8006 return(ctxt->nb_cons);
8007}
8008
8009#endif /* LIBXML_EXPR_ENABLED */
8010
8011#endif /* LIBXML_REGEXP_ENABLED */
#define TODO
Definition: SAX2.c:44
struct outqueuenode * tail
Definition: adnsresfilter.c:66
static int state
Definition: maze.c:121
#define ok(value,...)
Definition: atltest.h:57
#define index(s, c)
Definition: various.h:29
INT copy(TCHAR source[MAX_PATH], TCHAR dest[MAX_PATH], INT append, DWORD lpdwFlags, BOOL bTouch)
Definition: copy.c:51
_In_ fcb _In_ chunk _In_ uint64_t _In_ uint64_t _In_ bool _In_opt_ void _In_opt_ PIRP _In_ LIST_ENTRY * rollback
Definition: btrfs_drv.h:1365
cd_progress_ptr progress
Definition: cdjpeg.h:152
Definition: list.h:37
#define NULL
Definition: types.h:112
#define IS_BLANK(c)
Definition: attributes.c:45
#define SKIP_BLANKS
Definition: pattern.c:1186
#define NXT(val)
Definition: pattern.c:1183
#define CUR
Definition: pattern.c:1181
unsigned int idx
Definition: utils.c:41
content
Definition: atl_ax.c:994
static WCHAR no[MAX_STRING_RESOURCE_LEN]
Definition: object.c:2340
unsigned char ch[4][2]
Definition: console.c:118
int WINAPIV fprintf(FILE *file, const char *format,...)
Definition: file.c:5549
#define stderr
#define SIZE_MAX
Definition: limits.h:49
#define INT_MAX
Definition: limits.h:26
_ACRTIMP size_t __cdecl strlen(const char *)
Definition: string.c:1592
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
Definition: typeof.h:94
return ret
Definition: mutex.c:146
#define L(x)
Definition: resources.c:13
#define ERROR(name)
Definition: error_private.h:53
char ** glob(const char *v)
Definition: fake.c:36
#define printf
Definition: freeldr.h:104
#define NEXT(n, i)
FxCollectionEntry * cur
GLuint start
Definition: gl.h:1545
GLint GLint GLsizei GLsizei GLsizei depth
Definition: gl.h:1546
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLuint GLuint end
Definition: gl.h:1545
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: gl.h:1950
GLdouble GLdouble t
Definition: gl.h:2047
GLuint res
Definition: glext.h:9613
GLsizeiptr size
Definition: glext.h:5919
const GLubyte * c
Definition: glext.h:8905
GLdouble GLdouble GLdouble GLdouble top
Definition: glext.h:10859
GLdouble GLdouble right
Definition: glext.h:10859
GLenum GLint * range
Definition: glext.h:7539
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLint left
Definition: glext.h:7726
GLboolean GLenum GLenum GLvoid * values
Definition: glext.h:5666
GLbitfield flags
Definition: glext.h:7161
GLint GLint GLsizei GLuint * counters
Definition: glext.h:11114
GLuint GLfloat * val
Definition: glext.h:7180
GLenum GLsizei len
Definition: glext.h:6722
GLenum GLenum GLenum input
Definition: glext.h:9031
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat token
Definition: glfuncs.h:210
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
@ extra
Definition: id3.c:95
uint32_t entry
Definition: isohybrid.c:63
#define c
Definition: ke_i.h:80
#define error(str)
Definition: mkdosfs.c:1605
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define for
Definition: utility.h:88
char string[160]
Definition: util.h:11
static IPrintDialogCallback callback
Definition: printdlg.c:326
static DNS_RECORDW r1
Definition: record.c:37
static DNS_RECORDW r2
Definition: record.c:38
static UINT UINT last
Definition: font.c:45
DWORD exp
Definition: msg.c:16058
#define min(a, b)
Definition: monoChain.cc:55
#define IS_COMBINING(c)
#define IS_DIGIT(c)
#define IS_CHAR(c)
#define IS_EXTENDER(c)
#define IS_LETTER(c)
#define long
Definition: qsort.c:33
static unsigned __int64 next
Definition: rand_nt.c:6
#define err(...)
const WCHAR * str
static calc_node_t temp
Definition: rpn_ieee.c:38
void xmlDictFree(xmlDictPtr dict)
Definition: dict.c:333
const xmlChar * xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len)
Definition: dict.c:872
int xmlDictReference(xmlDictPtr dict)
Definition: dict.c:317
xmlDictPtr xmlDictCreate(void)
Definition: dict.c:262
const xmlChar * xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len)
Definition: dict.c:824
xmlReallocFunc xmlRealloc
Definition: globals.c:214
xmlFreeFunc xmlFree
Definition: globals.c:184
xmlMallocFunc xmlMalloc
Definition: globals.c:193
xmlMallocFunc xmlMallocAtomic
Definition: globals.c:204
XML_HIDDEN void __xmlRaiseError(xmlStructuredErrorFunc schannel, xmlGenericErrorFunc channel, void *data, void *ctx, void *nod, int domain, int code, xmlErrorLevel level, const char *file, int line, const char *str1, const char *str2, const char *str3, int int1, int col, const char *msg,...) LIBXML_ATTR_FORMAT(16
XML_HIDDEN void xmlAutomataSetFlags(xmlAutomataPtr am, int flags)
#define NEXTL(l)
Definition: parser.c:2289
#define CUR_SCHAR(s, l)
Definition: parser.c:2297
#define memset(x, y, z)
Definition: compat.h:39
SOCKET WSAAPI accept(IN SOCKET s, OUT LPSOCKADDR addr, OUT INT FAR *addrlen)
Definition: socklife.c:23
CardRegion * from
Definition: spigame.cpp:19
Definition: dict.c:59
DWORD status
Definition: pdh_main.c:108
struct define * next
Definition: compiler.c:65
Definition: query.h:86
int type
Definition: query.h:87
Definition: parser.c:44
Definition: copy.c:22
Definition: name.c:39
Definition: send.c:48
Definition: ps.c:97
Definition: tools.h:99
#define max(a, b)
Definition: svc.c:63
Definition: pdh_main.c:96
static struct wctab tab[]
#define snprintf
Definition: wintirpc.h:48
@ XML_ERR_FATAL
Definition: xmlerror.h:28
@ XML_FROM_REGEXP
Definition: xmlerror.h:51
@ XML_REGEXP_COMPILE_ERROR
Definition: xmlerror.h:418
@ XML_ERR_NO_MEMORY
Definition: xmlerror.h:102
XMLPUBFUN xmlChar * xmlStrndup(const xmlChar *cur, int len)
Definition: xmlstring.c:45
XMLPUBFUN int XMLPUBFUN int XMLPUBFUN int xmlGetUTF8Char(const unsigned char *utf, int *len)
Definition: xmlstring.c:708
#define BAD_CAST
Definition: xmlstring.h:35
XMLPUBFUN int xmlStrEqual(const xmlChar *str1, const xmlChar *str2)
Definition: xmlstring.c:162
XMLPUBFUN const xmlChar * xmlStrchr(const xmlChar *str, xmlChar val)
Definition: xmlstring.c:327
unsigned char xmlChar
Definition: xmlstring.h:28
XMLPUBFUN xmlChar * xmlStrdup(const xmlChar *cur)
Definition: xmlstring.c:69