ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

xmlregexp.c
Go to the documentation of this file.
00001 /*
00002  * regexp.c: generic and extensible Regular Expression engine
00003  *
00004  * Basically designed with the purpose of compiling regexps for 
00005  * the variety of validation/shemas mechanisms now available in
00006  * XML related specifications these include:
00007  *    - XML-1.0 DTD validation
00008  *    - XML Schemas structure part 1
00009  *    - XML Schemas Datatypes part 2 especially Appendix F
00010  *    - RELAX-NG/TREX i.e. the counter proposal
00011  *
00012  * See Copyright for the status of this software.
00013  *
00014  * Daniel Veillard <veillard@redhat.com>
00015  */
00016 
00017 #define IN_LIBXML
00018 #include "libxml.h"
00019 
00020 #ifdef LIBXML_REGEXP_ENABLED
00021 
00022 /* #define DEBUG_ERR */
00023 
00024 #include <stdio.h>
00025 #include <string.h>
00026 #ifdef HAVE_LIMITS_H
00027 #include <limits.h>
00028 #endif
00029 
00030 #include <libxml/tree.h>
00031 #include <libxml/parserInternals.h>
00032 #include <libxml/xmlregexp.h>
00033 #include <libxml/xmlautomata.h>
00034 #include <libxml/xmlunicode.h>
00035 
00036 #ifndef INT_MAX
00037 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
00038 #endif
00039 
00040 /* #define DEBUG_REGEXP_GRAPH */
00041 /* #define DEBUG_REGEXP_EXEC */
00042 /* #define DEBUG_PUSH */
00043 /* #define DEBUG_COMPACTION */
00044 
00045 #define MAX_PUSH 10000000
00046 
00047 #define ERROR(str)                          \
00048     ctxt->error = XML_REGEXP_COMPILE_ERROR;             \
00049     xmlRegexpErrCompile(ctxt, str);
00050 #define NEXT ctxt->cur++
00051 #define CUR (*(ctxt->cur))
00052 #define NXT(index) (ctxt->cur[index])
00053 
00054 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
00055 #define NEXTL(l) ctxt->cur += l;
00056 #define XML_REG_STRING_SEPARATOR '|'
00057 /*
00058  * Need PREV to check on a '-' within a Character Group. May only be used
00059  * when it's guaranteed that cur is not at the beginning of ctxt->string!
00060  */
00061 #define PREV (ctxt->cur[-1])
00062 
00068 #define TODO                                \
00069     xmlGenericError(xmlGenericErrorContext,             \
00070         "Unimplemented block at %s:%d\n",               \
00071             __FILE__, __LINE__);
00072 
00073 /************************************************************************
00074  *                                  *
00075  *          Datatypes and structures            *
00076  *                                  *
00077  ************************************************************************/
00078 
00079 /*
00080  * Note: the order of the enums below is significant, do not shuffle
00081  */
00082 typedef enum {
00083     XML_REGEXP_EPSILON = 1,
00084     XML_REGEXP_CHARVAL,
00085     XML_REGEXP_RANGES,
00086     XML_REGEXP_SUBREG,  /* used for () sub regexps */
00087     XML_REGEXP_STRING,
00088     XML_REGEXP_ANYCHAR, /* . */
00089     XML_REGEXP_ANYSPACE, /* \s */
00090     XML_REGEXP_NOTSPACE, /* \S */
00091     XML_REGEXP_INITNAME, /* \l */
00092     XML_REGEXP_NOTINITNAME, /* \L */
00093     XML_REGEXP_NAMECHAR, /* \c */
00094     XML_REGEXP_NOTNAMECHAR, /* \C */
00095     XML_REGEXP_DECIMAL, /* \d */
00096     XML_REGEXP_NOTDECIMAL, /* \D */
00097     XML_REGEXP_REALCHAR, /* \w */
00098     XML_REGEXP_NOTREALCHAR, /* \W */
00099     XML_REGEXP_LETTER = 100,
00100     XML_REGEXP_LETTER_UPPERCASE,
00101     XML_REGEXP_LETTER_LOWERCASE,
00102     XML_REGEXP_LETTER_TITLECASE,
00103     XML_REGEXP_LETTER_MODIFIER,
00104     XML_REGEXP_LETTER_OTHERS,
00105     XML_REGEXP_MARK,
00106     XML_REGEXP_MARK_NONSPACING,
00107     XML_REGEXP_MARK_SPACECOMBINING,
00108     XML_REGEXP_MARK_ENCLOSING,
00109     XML_REGEXP_NUMBER,
00110     XML_REGEXP_NUMBER_DECIMAL,
00111     XML_REGEXP_NUMBER_LETTER,
00112     XML_REGEXP_NUMBER_OTHERS,
00113     XML_REGEXP_PUNCT,
00114     XML_REGEXP_PUNCT_CONNECTOR,
00115     XML_REGEXP_PUNCT_DASH,
00116     XML_REGEXP_PUNCT_OPEN,
00117     XML_REGEXP_PUNCT_CLOSE,
00118     XML_REGEXP_PUNCT_INITQUOTE,
00119     XML_REGEXP_PUNCT_FINQUOTE,
00120     XML_REGEXP_PUNCT_OTHERS,
00121     XML_REGEXP_SEPAR,
00122     XML_REGEXP_SEPAR_SPACE,
00123     XML_REGEXP_SEPAR_LINE,
00124     XML_REGEXP_SEPAR_PARA,
00125     XML_REGEXP_SYMBOL,
00126     XML_REGEXP_SYMBOL_MATH,
00127     XML_REGEXP_SYMBOL_CURRENCY,
00128     XML_REGEXP_SYMBOL_MODIFIER,
00129     XML_REGEXP_SYMBOL_OTHERS,
00130     XML_REGEXP_OTHER,
00131     XML_REGEXP_OTHER_CONTROL,
00132     XML_REGEXP_OTHER_FORMAT,
00133     XML_REGEXP_OTHER_PRIVATE,
00134     XML_REGEXP_OTHER_NA,
00135     XML_REGEXP_BLOCK_NAME
00136 } xmlRegAtomType;
00137 
00138 typedef enum {
00139     XML_REGEXP_QUANT_EPSILON = 1,
00140     XML_REGEXP_QUANT_ONCE,
00141     XML_REGEXP_QUANT_OPT,
00142     XML_REGEXP_QUANT_MULT,
00143     XML_REGEXP_QUANT_PLUS,
00144     XML_REGEXP_QUANT_ONCEONLY,
00145     XML_REGEXP_QUANT_ALL,
00146     XML_REGEXP_QUANT_RANGE
00147 } xmlRegQuantType;
00148 
00149 typedef enum {
00150     XML_REGEXP_START_STATE = 1,
00151     XML_REGEXP_FINAL_STATE,
00152     XML_REGEXP_TRANS_STATE,
00153     XML_REGEXP_SINK_STATE,
00154     XML_REGEXP_UNREACH_STATE
00155 } xmlRegStateType;
00156 
00157 typedef enum {
00158     XML_REGEXP_MARK_NORMAL = 0,
00159     XML_REGEXP_MARK_START,
00160     XML_REGEXP_MARK_VISITED
00161 } xmlRegMarkedType;
00162 
00163 typedef struct _xmlRegRange xmlRegRange;
00164 typedef xmlRegRange *xmlRegRangePtr;
00165 
00166 struct _xmlRegRange {
00167     int neg;        /* 0 normal, 1 not, 2 exclude */
00168     xmlRegAtomType type;
00169     int start;
00170     int end;
00171     xmlChar *blockName;
00172 };
00173 
00174 typedef struct _xmlRegAtom xmlRegAtom;
00175 typedef xmlRegAtom *xmlRegAtomPtr;
00176 
00177 typedef struct _xmlAutomataState xmlRegState;
00178 typedef xmlRegState *xmlRegStatePtr;
00179 
00180 struct _xmlRegAtom {
00181     int no;
00182     xmlRegAtomType type;
00183     xmlRegQuantType quant;
00184     int min;
00185     int max;
00186 
00187     void *valuep;
00188     void *valuep2;
00189     int neg;
00190     int codepoint;
00191     xmlRegStatePtr start;
00192     xmlRegStatePtr start0;
00193     xmlRegStatePtr stop;
00194     int maxRanges;
00195     int nbRanges;
00196     xmlRegRangePtr *ranges;
00197     void *data;
00198 };
00199 
00200 typedef struct _xmlRegCounter xmlRegCounter;
00201 typedef xmlRegCounter *xmlRegCounterPtr;
00202 
00203 struct _xmlRegCounter {
00204     int min;
00205     int max;
00206 };
00207 
00208 typedef struct _xmlRegTrans xmlRegTrans;
00209 typedef xmlRegTrans *xmlRegTransPtr;
00210 
00211 struct _xmlRegTrans {
00212     xmlRegAtomPtr atom;
00213     int to;
00214     int counter;
00215     int count;
00216     int nd;
00217 };
00218 
00219 struct _xmlAutomataState {
00220     xmlRegStateType type;
00221     xmlRegMarkedType mark;
00222     xmlRegMarkedType reached;
00223     int no;
00224     int maxTrans;
00225     int nbTrans;
00226     xmlRegTrans *trans;
00227     /*  knowing states ponting to us can speed things up */
00228     int maxTransTo;
00229     int nbTransTo;
00230     int *transTo;
00231 };
00232 
00233 typedef struct _xmlAutomata xmlRegParserCtxt;
00234 typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
00235 
00236 #define AM_AUTOMATA_RNG 1
00237 
00238 struct _xmlAutomata {
00239     xmlChar *string;
00240     xmlChar *cur;
00241 
00242     int error;
00243     int neg;
00244 
00245     xmlRegStatePtr start;
00246     xmlRegStatePtr end;
00247     xmlRegStatePtr state;
00248 
00249     xmlRegAtomPtr atom;
00250 
00251     int maxAtoms;
00252     int nbAtoms;
00253     xmlRegAtomPtr *atoms;
00254 
00255     int maxStates;
00256     int nbStates;
00257     xmlRegStatePtr *states;
00258 
00259     int maxCounters;
00260     int nbCounters;
00261     xmlRegCounter *counters;
00262 
00263     int determinist;
00264     int negs;
00265     int flags;
00266 };
00267 
00268 struct _xmlRegexp {
00269     xmlChar *string;
00270     int nbStates;
00271     xmlRegStatePtr *states;
00272     int nbAtoms;
00273     xmlRegAtomPtr *atoms;
00274     int nbCounters;
00275     xmlRegCounter *counters;
00276     int determinist;
00277     int flags;
00278     /*
00279      * That's the compact form for determinists automatas
00280      */
00281     int nbstates;
00282     int *compact;
00283     void **transdata;
00284     int nbstrings;
00285     xmlChar **stringMap;
00286 };
00287 
00288 typedef struct _xmlRegExecRollback xmlRegExecRollback;
00289 typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
00290 
00291 struct _xmlRegExecRollback {
00292     xmlRegStatePtr state;/* the current state */
00293     int index;      /* the index in the input stack */
00294     int nextbranch; /* the next transition to explore in that state */
00295     int *counts;    /* save the automata state if it has some */
00296 };
00297 
00298 typedef struct _xmlRegInputToken xmlRegInputToken;
00299 typedef xmlRegInputToken *xmlRegInputTokenPtr;
00300 
00301 struct _xmlRegInputToken {
00302     xmlChar *value;
00303     void *data;
00304 };
00305 
00306 struct _xmlRegExecCtxt {
00307     int status;     /* execution status != 0 indicate an error */
00308     int determinist;    /* did we find an indeterministic behaviour */
00309     xmlRegexpPtr comp;  /* the compiled regexp */
00310     xmlRegExecCallbacks callback;
00311     void *data;
00312 
00313     xmlRegStatePtr state;/* the current state */
00314     int transno;    /* the current transition on that state */
00315     int transcount; /* the number of chars in char counted transitions */
00316 
00317     /*
00318      * A stack of rollback states
00319      */
00320     int maxRollbacks;
00321     int nbRollbacks;
00322     xmlRegExecRollback *rollbacks;
00323 
00324     /*
00325      * The state of the automata if any
00326      */
00327     int *counts;
00328 
00329     /*
00330      * The input stack
00331      */
00332     int inputStackMax;
00333     int inputStackNr;
00334     int index;
00335     int *charStack;
00336     const xmlChar *inputString; /* when operating on characters */
00337     xmlRegInputTokenPtr inputStack;/* when operating on strings */
00338 
00339     /*
00340      * error handling
00341      */
00342     int errStateNo;     /* the error state number */
00343     xmlRegStatePtr errState;    /* the error state */
00344     xmlChar *errString;     /* the string raising the error */
00345     int *errCounts;     /* counters at the error state */
00346     int nbPush;
00347 };
00348 
00349 #define REGEXP_ALL_COUNTER  0x123456
00350 #define REGEXP_ALL_LAX_COUNTER  0x123457
00351 
00352 static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
00353 static void xmlRegFreeState(xmlRegStatePtr state);
00354 static void xmlRegFreeAtom(xmlRegAtomPtr atom);
00355 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
00356 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
00357 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
00358                   int neg, int start, int end, const xmlChar *blockName);
00359 
00360 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
00361 
00362 /************************************************************************
00363  *                                  *
00364  *      Regexp memory error handler             *
00365  *                                  *
00366  ************************************************************************/
00373 static void
00374 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
00375 {
00376     const char *regexp = NULL;
00377     if (ctxt != NULL) {
00378         regexp = (const char *) ctxt->string;
00379     ctxt->error = XML_ERR_NO_MEMORY;
00380     }
00381     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
00382             XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
00383             regexp, NULL, 0, 0,
00384             "Memory allocation failed : %s\n", extra);
00385 }
00386 
00393 static void
00394 xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
00395 {
00396     const char *regexp = NULL;
00397     int idx = 0;
00398 
00399     if (ctxt != NULL) {
00400         regexp = (const char *) ctxt->string;
00401     idx = ctxt->cur - ctxt->string;
00402     ctxt->error = XML_REGEXP_COMPILE_ERROR;
00403     }
00404     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
00405             XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
00406             regexp, NULL, idx, 0,
00407             "failed to compile: %s\n", extra);
00408 }
00409 
00410 /************************************************************************
00411  *                                  *
00412  *          Allocation/Deallocation             *
00413  *                                  *
00414  ************************************************************************/
00415 
00416 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
00425 static xmlRegexpPtr
00426 xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
00427     xmlRegexpPtr ret;
00428 
00429     ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
00430     if (ret == NULL) {
00431     xmlRegexpErrMemory(ctxt, "compiling regexp");
00432     return(NULL);
00433     }
00434     memset(ret, 0, sizeof(xmlRegexp));
00435     ret->string = ctxt->string;
00436     ret->nbStates = ctxt->nbStates;
00437     ret->states = ctxt->states;
00438     ret->nbAtoms = ctxt->nbAtoms;
00439     ret->atoms = ctxt->atoms;
00440     ret->nbCounters = ctxt->nbCounters;
00441     ret->counters = ctxt->counters;
00442     ret->determinist = ctxt->determinist;
00443     ret->flags = ctxt->flags;
00444     if (ret->determinist == -1) {
00445         xmlRegexpIsDeterminist(ret);
00446     }
00447 
00448     if ((ret->determinist != 0) &&
00449     (ret->nbCounters == 0) &&
00450     (ctxt->negs == 0) &&
00451     (ret->atoms != NULL) &&
00452     (ret->atoms[0] != NULL) &&
00453     (ret->atoms[0]->type == XML_REGEXP_STRING)) {
00454     int i, j, nbstates = 0, nbatoms = 0;
00455     int *stateRemap;
00456     int *stringRemap;
00457     int *transitions;
00458     void **transdata;
00459     xmlChar **stringMap;
00460         xmlChar *value;
00461 
00462     /*
00463      * Switch to a compact representation
00464      * 1/ counting the effective number of states left
00465      * 2/ counting the unique number of atoms, and check that
00466      *    they are all of the string type
00467      * 3/ build a table state x atom for the transitions
00468      */
00469 
00470     stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
00471     if (stateRemap == NULL) {
00472         xmlRegexpErrMemory(ctxt, "compiling regexp");
00473         xmlFree(ret);
00474         return(NULL);
00475     }
00476     for (i = 0;i < ret->nbStates;i++) {
00477         if (ret->states[i] != NULL) {
00478         stateRemap[i] = nbstates;
00479         nbstates++;
00480         } else {
00481         stateRemap[i] = -1;
00482         }
00483     }
00484 #ifdef DEBUG_COMPACTION
00485     printf("Final: %d states\n", nbstates);
00486 #endif
00487     stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
00488     if (stringMap == NULL) {
00489         xmlRegexpErrMemory(ctxt, "compiling regexp");
00490         xmlFree(stateRemap);
00491         xmlFree(ret);
00492         return(NULL);
00493     }
00494     stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
00495     if (stringRemap == NULL) {
00496         xmlRegexpErrMemory(ctxt, "compiling regexp");
00497         xmlFree(stringMap);
00498         xmlFree(stateRemap);
00499         xmlFree(ret);
00500         return(NULL);
00501     }
00502     for (i = 0;i < ret->nbAtoms;i++) {
00503         if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
00504         (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
00505         value = ret->atoms[i]->valuep;
00506                 for (j = 0;j < nbatoms;j++) {
00507             if (xmlStrEqual(stringMap[j], value)) {
00508             stringRemap[i] = j;
00509             break;
00510             }
00511         }
00512         if (j >= nbatoms) {
00513             stringRemap[i] = nbatoms;
00514             stringMap[nbatoms] = xmlStrdup(value);
00515             if (stringMap[nbatoms] == NULL) {
00516             for (i = 0;i < nbatoms;i++)
00517                 xmlFree(stringMap[i]);
00518             xmlFree(stringRemap);
00519             xmlFree(stringMap);
00520             xmlFree(stateRemap);
00521             xmlFree(ret);
00522             return(NULL);
00523             }
00524             nbatoms++;
00525         }
00526         } else {
00527         xmlFree(stateRemap);
00528         xmlFree(stringRemap);
00529         for (i = 0;i < nbatoms;i++)
00530             xmlFree(stringMap[i]);
00531         xmlFree(stringMap);
00532         xmlFree(ret);
00533         return(NULL);
00534         }
00535     }
00536 #ifdef DEBUG_COMPACTION
00537     printf("Final: %d atoms\n", nbatoms);
00538 #endif
00539     transitions = (int *) xmlMalloc((nbstates + 1) *
00540                                     (nbatoms + 1) * sizeof(int));
00541     if (transitions == NULL) {
00542         xmlFree(stateRemap);
00543         xmlFree(stringRemap);
00544         xmlFree(stringMap);
00545         xmlFree(ret);
00546         return(NULL);
00547     }
00548     memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
00549 
00550     /*
00551      * Allocate the transition table. The first entry for each
00552      * state corresponds to the state type.
00553      */
00554     transdata = NULL;
00555 
00556     for (i = 0;i < ret->nbStates;i++) {
00557         int stateno, atomno, targetno, prev;
00558         xmlRegStatePtr state;
00559         xmlRegTransPtr trans;
00560 
00561         stateno = stateRemap[i];
00562         if (stateno == -1)
00563         continue;
00564         state = ret->states[i];
00565 
00566         transitions[stateno * (nbatoms + 1)] = state->type;
00567 
00568         for (j = 0;j < state->nbTrans;j++) {
00569         trans = &(state->trans[j]);
00570         if ((trans->to == -1) || (trans->atom == NULL))
00571             continue;
00572                 atomno = stringRemap[trans->atom->no];
00573         if ((trans->atom->data != NULL) && (transdata == NULL)) {
00574             transdata = (void **) xmlMalloc(nbstates * nbatoms *
00575                                         sizeof(void *));
00576             if (transdata != NULL)
00577             memset(transdata, 0,
00578                    nbstates * nbatoms * sizeof(void *));
00579             else {
00580             xmlRegexpErrMemory(ctxt, "compiling regexp");
00581             break;
00582             }
00583         }
00584         targetno = stateRemap[trans->to];
00585         /*
00586          * if the same atom can generate transitions to 2 different
00587          * states then it means the automata is not determinist and
00588          * the compact form can't be used !
00589          */
00590         prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
00591         if (prev != 0) {
00592             if (prev != targetno + 1) {
00593             ret->determinist = 0;
00594 #ifdef DEBUG_COMPACTION
00595             printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
00596                    i, j, trans->atom->no, trans->to, atomno, targetno);
00597             printf("       previous to is %d\n", prev);
00598 #endif
00599             if (transdata != NULL)
00600                 xmlFree(transdata);
00601             xmlFree(transitions);
00602             xmlFree(stateRemap);
00603             xmlFree(stringRemap);
00604             for (i = 0;i < nbatoms;i++)
00605                 xmlFree(stringMap[i]);
00606             xmlFree(stringMap);
00607             goto not_determ;
00608             }
00609         } else {
00610 #if 0
00611             printf("State %d trans %d: atom %d to %d : %d to %d\n",
00612                i, j, trans->atom->no, trans->to, atomno, targetno);
00613 #endif
00614             transitions[stateno * (nbatoms + 1) + atomno + 1] =
00615             targetno + 1; /* to avoid 0 */
00616             if (transdata != NULL)
00617             transdata[stateno * nbatoms + atomno] =
00618                 trans->atom->data;
00619         }
00620         }
00621     }
00622     ret->determinist = 1;
00623 #ifdef DEBUG_COMPACTION
00624     /*
00625      * Debug
00626      */
00627     for (i = 0;i < nbstates;i++) {
00628         for (j = 0;j < nbatoms + 1;j++) {
00629                 printf("%02d ", transitions[i * (nbatoms + 1) + j]);
00630         }
00631         printf("\n");
00632     }
00633     printf("\n");
00634 #endif
00635     /*
00636      * Cleanup of the old data
00637      */
00638     if (ret->states != NULL) {
00639         for (i = 0;i < ret->nbStates;i++)
00640         xmlRegFreeState(ret->states[i]);
00641         xmlFree(ret->states);
00642     }
00643     ret->states = NULL;
00644     ret->nbStates = 0;
00645     if (ret->atoms != NULL) {
00646         for (i = 0;i < ret->nbAtoms;i++)
00647         xmlRegFreeAtom(ret->atoms[i]);
00648         xmlFree(ret->atoms);
00649     }
00650     ret->atoms = NULL;
00651     ret->nbAtoms = 0;
00652 
00653     ret->compact = transitions;
00654     ret->transdata = transdata;
00655     ret->stringMap = stringMap;
00656     ret->nbstrings = nbatoms;
00657     ret->nbstates = nbstates;
00658     xmlFree(stateRemap);
00659     xmlFree(stringRemap);
00660     }
00661 not_determ:
00662     ctxt->string = NULL;
00663     ctxt->nbStates = 0;
00664     ctxt->states = NULL;
00665     ctxt->nbAtoms = 0;
00666     ctxt->atoms = NULL;
00667     ctxt->nbCounters = 0;
00668     ctxt->counters = NULL;
00669     return(ret);
00670 }
00671 
00680 static xmlRegParserCtxtPtr
00681 xmlRegNewParserCtxt(const xmlChar *string) {
00682     xmlRegParserCtxtPtr ret;
00683 
00684     ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
00685     if (ret == NULL)
00686     return(NULL);
00687     memset(ret, 0, sizeof(xmlRegParserCtxt));
00688     if (string != NULL)
00689     ret->string = xmlStrdup(string);
00690     ret->cur = ret->string;
00691     ret->neg = 0;
00692     ret->negs = 0;
00693     ret->error = 0;
00694     ret->determinist = -1;
00695     return(ret);
00696 }
00697 
00710 static xmlRegRangePtr
00711 xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
00712            int neg, xmlRegAtomType type, int start, int end) {
00713     xmlRegRangePtr ret;
00714 
00715     ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
00716     if (ret == NULL) {
00717     xmlRegexpErrMemory(ctxt, "allocating range");
00718     return(NULL);
00719     }
00720     ret->neg = neg;
00721     ret->type = type;
00722     ret->start = start;
00723     ret->end = end;
00724     return(ret);
00725 }
00726 
00733 static void
00734 xmlRegFreeRange(xmlRegRangePtr range) {
00735     if (range == NULL)
00736     return;
00737 
00738     if (range->blockName != NULL)
00739     xmlFree(range->blockName);
00740     xmlFree(range);
00741 }
00742 
00751 static xmlRegRangePtr
00752 xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
00753     xmlRegRangePtr ret;
00754 
00755     if (range == NULL)
00756     return(NULL);
00757 
00758     ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
00759                          range->end);
00760     if (ret == NULL)
00761         return(NULL);
00762     if (range->blockName != NULL) {
00763     ret->blockName = xmlStrdup(range->blockName);
00764     if (ret->blockName == NULL) {
00765         xmlRegexpErrMemory(ctxt, "allocating range");
00766         xmlRegFreeRange(ret);
00767         return(NULL);
00768     }
00769     }
00770     return(ret);
00771 }
00772 
00782 static xmlRegAtomPtr
00783 xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
00784     xmlRegAtomPtr ret;
00785 
00786     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
00787     if (ret == NULL) {
00788     xmlRegexpErrMemory(ctxt, "allocating atom");
00789     return(NULL);
00790     }
00791     memset(ret, 0, sizeof(xmlRegAtom));
00792     ret->type = type;
00793     ret->quant = XML_REGEXP_QUANT_ONCE;
00794     ret->min = 0;
00795     ret->max = 0;
00796     return(ret);
00797 }
00798 
00805 static void
00806 xmlRegFreeAtom(xmlRegAtomPtr atom) {
00807     int i;
00808 
00809     if (atom == NULL)
00810     return;
00811 
00812     for (i = 0;i < atom->nbRanges;i++)
00813     xmlRegFreeRange(atom->ranges[i]);
00814     if (atom->ranges != NULL)
00815     xmlFree(atom->ranges);
00816     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
00817     xmlFree(atom->valuep);
00818     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
00819     xmlFree(atom->valuep2);
00820     if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
00821     xmlFree(atom->valuep);
00822     xmlFree(atom);
00823 }
00824 
00834 static xmlRegAtomPtr
00835 xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
00836     xmlRegAtomPtr ret;
00837 
00838     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
00839     if (ret == NULL) {
00840     xmlRegexpErrMemory(ctxt, "copying atom");
00841     return(NULL);
00842     }
00843     memset(ret, 0, sizeof(xmlRegAtom));
00844     ret->type = atom->type;
00845     ret->quant = atom->quant;
00846     ret->min = atom->min;
00847     ret->max = atom->max;
00848     if (atom->nbRanges > 0) {
00849         int i;
00850 
00851         ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
00852                                                atom->nbRanges);
00853     if (ret->ranges == NULL) {
00854         xmlRegexpErrMemory(ctxt, "copying atom");
00855         goto error;
00856     }
00857     for (i = 0;i < atom->nbRanges;i++) {
00858         ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
00859         if (ret->ranges[i] == NULL)
00860             goto error;
00861         ret->nbRanges = i + 1;
00862     }
00863     }
00864     return(ret);
00865 
00866 error:
00867     xmlRegFreeAtom(ret);
00868     return(NULL);
00869 }
00870 
00871 static xmlRegStatePtr
00872 xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
00873     xmlRegStatePtr ret;
00874 
00875     ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
00876     if (ret == NULL) {
00877     xmlRegexpErrMemory(ctxt, "allocating state");
00878     return(NULL);
00879     }
00880     memset(ret, 0, sizeof(xmlRegState));
00881     ret->type = XML_REGEXP_TRANS_STATE;
00882     ret->mark = XML_REGEXP_MARK_NORMAL;
00883     return(ret);
00884 }
00885 
00892 static void
00893 xmlRegFreeState(xmlRegStatePtr state) {
00894     if (state == NULL)
00895     return;
00896 
00897     if (state->trans != NULL)
00898     xmlFree(state->trans);
00899     if (state->transTo != NULL)
00900     xmlFree(state->transTo);
00901     xmlFree(state);
00902 }
00903 
00910 static void
00911 xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
00912     int i;
00913     if (ctxt == NULL)
00914     return;
00915 
00916     if (ctxt->string != NULL)
00917     xmlFree(ctxt->string);
00918     if (ctxt->states != NULL) {
00919     for (i = 0;i < ctxt->nbStates;i++)
00920         xmlRegFreeState(ctxt->states[i]);
00921     xmlFree(ctxt->states);
00922     }
00923     if (ctxt->atoms != NULL) {
00924     for (i = 0;i < ctxt->nbAtoms;i++)
00925         xmlRegFreeAtom(ctxt->atoms[i]);
00926     xmlFree(ctxt->atoms);
00927     }
00928     if (ctxt->counters != NULL)
00929     xmlFree(ctxt->counters);
00930     xmlFree(ctxt);
00931 }
00932 
00933 /************************************************************************
00934  *                                  *
00935  *          Display of Data structures          *
00936  *                                  *
00937  ************************************************************************/
00938 
00939 static void
00940 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
00941     switch (type) {
00942         case XML_REGEXP_EPSILON:
00943         fprintf(output, "epsilon "); break;
00944         case XML_REGEXP_CHARVAL:
00945         fprintf(output, "charval "); break;
00946         case XML_REGEXP_RANGES:
00947         fprintf(output, "ranges "); break;
00948         case XML_REGEXP_SUBREG:
00949         fprintf(output, "subexpr "); break;
00950         case XML_REGEXP_STRING:
00951         fprintf(output, "string "); break;
00952         case XML_REGEXP_ANYCHAR:
00953         fprintf(output, "anychar "); break;
00954         case XML_REGEXP_ANYSPACE:
00955         fprintf(output, "anyspace "); break;
00956         case XML_REGEXP_NOTSPACE:
00957         fprintf(output, "notspace "); break;
00958         case XML_REGEXP_INITNAME:
00959         fprintf(output, "initname "); break;
00960         case XML_REGEXP_NOTINITNAME:
00961         fprintf(output, "notinitname "); break;
00962         case XML_REGEXP_NAMECHAR:
00963         fprintf(output, "namechar "); break;
00964         case XML_REGEXP_NOTNAMECHAR:
00965         fprintf(output, "notnamechar "); break;
00966         case XML_REGEXP_DECIMAL:
00967         fprintf(output, "decimal "); break;
00968         case XML_REGEXP_NOTDECIMAL:
00969         fprintf(output, "notdecimal "); break;
00970         case XML_REGEXP_REALCHAR:
00971         fprintf(output, "realchar "); break;
00972         case XML_REGEXP_NOTREALCHAR:
00973         fprintf(output, "notrealchar "); break;
00974         case XML_REGEXP_LETTER:
00975             fprintf(output, "LETTER "); break;
00976         case XML_REGEXP_LETTER_UPPERCASE:
00977             fprintf(output, "LETTER_UPPERCASE "); break;
00978         case XML_REGEXP_LETTER_LOWERCASE:
00979             fprintf(output, "LETTER_LOWERCASE "); break;
00980         case XML_REGEXP_LETTER_TITLECASE:
00981             fprintf(output, "LETTER_TITLECASE "); break;
00982         case XML_REGEXP_LETTER_MODIFIER:
00983             fprintf(output, "LETTER_MODIFIER "); break;
00984         case XML_REGEXP_LETTER_OTHERS:
00985             fprintf(output, "LETTER_OTHERS "); break;
00986         case XML_REGEXP_MARK:
00987             fprintf(output, "MARK "); break;
00988         case XML_REGEXP_MARK_NONSPACING:
00989             fprintf(output, "MARK_NONSPACING "); break;
00990         case XML_REGEXP_MARK_SPACECOMBINING:
00991             fprintf(output, "MARK_SPACECOMBINING "); break;
00992         case XML_REGEXP_MARK_ENCLOSING:
00993             fprintf(output, "MARK_ENCLOSING "); break;
00994         case XML_REGEXP_NUMBER:
00995             fprintf(output, "NUMBER "); break;
00996         case XML_REGEXP_NUMBER_DECIMAL:
00997             fprintf(output, "NUMBER_DECIMAL "); break;
00998         case XML_REGEXP_NUMBER_LETTER:
00999             fprintf(output, "NUMBER_LETTER "); break;
01000         case XML_REGEXP_NUMBER_OTHERS:
01001             fprintf(output, "NUMBER_OTHERS "); break;
01002         case XML_REGEXP_PUNCT:
01003             fprintf(output, "PUNCT "); break;
01004         case XML_REGEXP_PUNCT_CONNECTOR:
01005             fprintf(output, "PUNCT_CONNECTOR "); break;
01006         case XML_REGEXP_PUNCT_DASH:
01007             fprintf(output, "PUNCT_DASH "); break;
01008         case XML_REGEXP_PUNCT_OPEN:
01009             fprintf(output, "PUNCT_OPEN "); break;
01010         case XML_REGEXP_PUNCT_CLOSE:
01011             fprintf(output, "PUNCT_CLOSE "); break;
01012         case XML_REGEXP_PUNCT_INITQUOTE:
01013             fprintf(output, "PUNCT_INITQUOTE "); break;
01014         case XML_REGEXP_PUNCT_FINQUOTE:
01015             fprintf(output, "PUNCT_FINQUOTE "); break;
01016         case XML_REGEXP_PUNCT_OTHERS:
01017             fprintf(output, "PUNCT_OTHERS "); break;
01018         case XML_REGEXP_SEPAR:
01019             fprintf(output, "SEPAR "); break;
01020         case XML_REGEXP_SEPAR_SPACE:
01021             fprintf(output, "SEPAR_SPACE "); break;
01022         case XML_REGEXP_SEPAR_LINE:
01023             fprintf(output, "SEPAR_LINE "); break;
01024         case XML_REGEXP_SEPAR_PARA:
01025             fprintf(output, "SEPAR_PARA "); break;
01026         case XML_REGEXP_SYMBOL:
01027             fprintf(output, "SYMBOL "); break;
01028         case XML_REGEXP_SYMBOL_MATH:
01029             fprintf(output, "SYMBOL_MATH "); break;
01030         case XML_REGEXP_SYMBOL_CURRENCY:
01031             fprintf(output, "SYMBOL_CURRENCY "); break;
01032         case XML_REGEXP_SYMBOL_MODIFIER:
01033             fprintf(output, "SYMBOL_MODIFIER "); break;
01034         case XML_REGEXP_SYMBOL_OTHERS:
01035             fprintf(output, "SYMBOL_OTHERS "); break;
01036         case XML_REGEXP_OTHER:
01037             fprintf(output, "OTHER "); break;
01038         case XML_REGEXP_OTHER_CONTROL:
01039             fprintf(output, "OTHER_CONTROL "); break;
01040         case XML_REGEXP_OTHER_FORMAT:
01041             fprintf(output, "OTHER_FORMAT "); break;
01042         case XML_REGEXP_OTHER_PRIVATE:
01043             fprintf(output, "OTHER_PRIVATE "); break;
01044         case XML_REGEXP_OTHER_NA:
01045             fprintf(output, "OTHER_NA "); break;
01046         case XML_REGEXP_BLOCK_NAME:
01047         fprintf(output, "BLOCK "); break;
01048     }
01049 }
01050 
01051 static void
01052 xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
01053     switch (type) {
01054         case XML_REGEXP_QUANT_EPSILON:
01055         fprintf(output, "epsilon "); break;
01056         case XML_REGEXP_QUANT_ONCE:
01057         fprintf(output, "once "); break;
01058         case XML_REGEXP_QUANT_OPT:
01059         fprintf(output, "? "); break;
01060         case XML_REGEXP_QUANT_MULT:
01061         fprintf(output, "* "); break;
01062         case XML_REGEXP_QUANT_PLUS:
01063         fprintf(output, "+ "); break;
01064     case XML_REGEXP_QUANT_RANGE:
01065         fprintf(output, "range "); break;
01066     case XML_REGEXP_QUANT_ONCEONLY:
01067         fprintf(output, "onceonly "); break;
01068     case XML_REGEXP_QUANT_ALL:
01069         fprintf(output, "all "); break;
01070     }
01071 }
01072 static void
01073 xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
01074     fprintf(output, "  range: ");
01075     if (range->neg)
01076     fprintf(output, "negative ");
01077     xmlRegPrintAtomType(output, range->type);
01078     fprintf(output, "%c - %c\n", range->start, range->end);
01079 }
01080 
01081 static void
01082 xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
01083     fprintf(output, " atom: ");
01084     if (atom == NULL) {
01085     fprintf(output, "NULL\n");
01086     return;
01087     }
01088     if (atom->neg)
01089         fprintf(output, "not ");
01090     xmlRegPrintAtomType(output, atom->type);
01091     xmlRegPrintQuantType(output, atom->quant);
01092     if (atom->quant == XML_REGEXP_QUANT_RANGE)
01093     fprintf(output, "%d-%d ", atom->min, atom->max);
01094     if (atom->type == XML_REGEXP_STRING)
01095     fprintf(output, "'%s' ", (char *) atom->valuep);
01096     if (atom->type == XML_REGEXP_CHARVAL)
01097     fprintf(output, "char %c\n", atom->codepoint);
01098     else if (atom->type == XML_REGEXP_RANGES) {
01099     int i;
01100     fprintf(output, "%d entries\n", atom->nbRanges);
01101     for (i = 0; i < atom->nbRanges;i++)
01102         xmlRegPrintRange(output, atom->ranges[i]);
01103     } else if (atom->type == XML_REGEXP_SUBREG) {
01104     fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
01105     } else {
01106     fprintf(output, "\n");
01107     }
01108 }
01109 
01110 static void
01111 xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
01112     fprintf(output, "  trans: ");
01113     if (trans == NULL) {
01114     fprintf(output, "NULL\n");
01115     return;
01116     }
01117     if (trans->to < 0) {
01118     fprintf(output, "removed\n");
01119     return;
01120     }
01121     if (trans->nd != 0) {
01122     if (trans->nd == 2)
01123         fprintf(output, "last not determinist, ");
01124     else
01125         fprintf(output, "not determinist, ");
01126     }
01127     if (trans->counter >= 0) {
01128     fprintf(output, "counted %d, ", trans->counter);
01129     }
01130     if (trans->count == REGEXP_ALL_COUNTER) {
01131     fprintf(output, "all transition, ");
01132     } else if (trans->count >= 0) {
01133     fprintf(output, "count based %d, ", trans->count);
01134     }
01135     if (trans->atom == NULL) {
01136     fprintf(output, "epsilon to %d\n", trans->to);
01137     return;
01138     }
01139     if (trans->atom->type == XML_REGEXP_CHARVAL)
01140     fprintf(output, "char %c ", trans->atom->codepoint);
01141     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
01142 }
01143     
01144 static void
01145 xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
01146     int i;
01147 
01148     fprintf(output, " state: ");
01149     if (state == NULL) {
01150     fprintf(output, "NULL\n");
01151     return;
01152     }
01153     if (state->type == XML_REGEXP_START_STATE)
01154     fprintf(output, "START ");
01155     if (state->type == XML_REGEXP_FINAL_STATE)
01156     fprintf(output, "FINAL ");
01157     
01158     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
01159     for (i = 0;i < state->nbTrans; i++) {
01160     xmlRegPrintTrans(output, &(state->trans[i]));
01161     }
01162 }
01163 
01164 #ifdef DEBUG_REGEXP_GRAPH
01165 static void
01166 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
01167     int i;
01168 
01169     fprintf(output, " ctxt: ");
01170     if (ctxt == NULL) {
01171     fprintf(output, "NULL\n");
01172     return;
01173     }
01174     fprintf(output, "'%s' ", ctxt->string);
01175     if (ctxt->error)
01176     fprintf(output, "error ");
01177     if (ctxt->neg)
01178     fprintf(output, "neg ");
01179     fprintf(output, "\n");
01180     fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
01181     for (i = 0;i < ctxt->nbAtoms; i++) {
01182     fprintf(output, " %02d ", i);
01183     xmlRegPrintAtom(output, ctxt->atoms[i]);
01184     }
01185     if (ctxt->atom != NULL) {
01186     fprintf(output, "current atom:\n");
01187     xmlRegPrintAtom(output, ctxt->atom);
01188     }
01189     fprintf(output, "%d states:", ctxt->nbStates);
01190     if (ctxt->start != NULL)
01191     fprintf(output, " start: %d", ctxt->start->no);
01192     if (ctxt->end != NULL)
01193     fprintf(output, " end: %d", ctxt->end->no);
01194     fprintf(output, "\n");
01195     for (i = 0;i < ctxt->nbStates; i++) {
01196     xmlRegPrintState(output, ctxt->states[i]);
01197     }
01198     fprintf(output, "%d counters:\n", ctxt->nbCounters);
01199     for (i = 0;i < ctxt->nbCounters; i++) {
01200     fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
01201                                         ctxt->counters[i].max);
01202     }
01203 }
01204 #endif
01205 
01206 /************************************************************************
01207  *                                  *
01208  *       Finite Automata structures manipulations       *
01209  *                                  *
01210  ************************************************************************/
01211 
01212 static void 
01213 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
01214                int neg, xmlRegAtomType type, int start, int end,
01215            xmlChar *blockName) {
01216     xmlRegRangePtr range;
01217 
01218     if (atom == NULL) {
01219     ERROR("add range: atom is NULL");
01220     return;
01221     }
01222     if (atom->type != XML_REGEXP_RANGES) {
01223     ERROR("add range: atom is not ranges");
01224     return;
01225     }
01226     if (atom->maxRanges == 0) {
01227     atom->maxRanges = 4;
01228     atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
01229                                      sizeof(xmlRegRangePtr));
01230     if (atom->ranges == NULL) {
01231         xmlRegexpErrMemory(ctxt, "adding ranges");
01232         atom->maxRanges = 0;
01233         return;
01234     }
01235     } else if (atom->nbRanges >= atom->maxRanges) {
01236     xmlRegRangePtr *tmp;
01237     atom->maxRanges *= 2;
01238     tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
01239                                      sizeof(xmlRegRangePtr));
01240     if (tmp == NULL) {
01241         xmlRegexpErrMemory(ctxt, "adding ranges");
01242         atom->maxRanges /= 2;
01243         return;
01244     }
01245     atom->ranges = tmp;
01246     }
01247     range = xmlRegNewRange(ctxt, neg, type, start, end);
01248     if (range == NULL)
01249     return;
01250     range->blockName = blockName;
01251     atom->ranges[atom->nbRanges++] = range;
01252     
01253 }
01254 
01255 static int
01256 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
01257     if (ctxt->maxCounters == 0) {
01258     ctxt->maxCounters = 4;
01259     ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
01260                                      sizeof(xmlRegCounter));
01261     if (ctxt->counters == NULL) {
01262         xmlRegexpErrMemory(ctxt, "allocating counter");
01263         ctxt->maxCounters = 0;
01264         return(-1);
01265     }
01266     } else if (ctxt->nbCounters >= ctxt->maxCounters) {
01267     xmlRegCounter *tmp;
01268     ctxt->maxCounters *= 2;
01269     tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
01270                                    sizeof(xmlRegCounter));
01271     if (tmp == NULL) {
01272         xmlRegexpErrMemory(ctxt, "allocating counter");
01273         ctxt->maxCounters /= 2;
01274         return(-1);
01275     }
01276     ctxt->counters = tmp;
01277     }
01278     ctxt->counters[ctxt->nbCounters].min = -1;
01279     ctxt->counters[ctxt->nbCounters].max = -1;
01280     return(ctxt->nbCounters++);
01281 }
01282 
01283 static int 
01284 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
01285     if (atom == NULL) {
01286     ERROR("atom push: atom is NULL");
01287     return(-1);
01288     }
01289     if (ctxt->maxAtoms == 0) {
01290     ctxt->maxAtoms = 4;
01291     ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
01292                                      sizeof(xmlRegAtomPtr));
01293     if (ctxt->atoms == NULL) {
01294         xmlRegexpErrMemory(ctxt, "pushing atom");
01295         ctxt->maxAtoms = 0;
01296         return(-1);
01297     }
01298     } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
01299     xmlRegAtomPtr *tmp;
01300     ctxt->maxAtoms *= 2;
01301     tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
01302                                      sizeof(xmlRegAtomPtr));
01303     if (tmp == NULL) {
01304         xmlRegexpErrMemory(ctxt, "allocating counter");
01305         ctxt->maxAtoms /= 2;
01306         return(-1);
01307     }
01308     ctxt->atoms = tmp;
01309     }
01310     atom->no = ctxt->nbAtoms;
01311     ctxt->atoms[ctxt->nbAtoms++] = atom;
01312     return(0);
01313 }
01314 
01315 static void 
01316 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
01317                       int from) {
01318     if (target->maxTransTo == 0) {
01319     target->maxTransTo = 8;
01320     target->transTo = (int *) xmlMalloc(target->maxTransTo *
01321                                      sizeof(int));
01322     if (target->transTo == NULL) {
01323         xmlRegexpErrMemory(ctxt, "adding transition");
01324         target->maxTransTo = 0;
01325         return;
01326     }
01327     } else if (target->nbTransTo >= target->maxTransTo) {
01328     int *tmp;
01329     target->maxTransTo *= 2;
01330     tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
01331                                      sizeof(int));
01332     if (tmp == NULL) {
01333         xmlRegexpErrMemory(ctxt, "adding transition");
01334         target->maxTransTo /= 2;
01335         return;
01336     }
01337     target->transTo = tmp;
01338     }
01339     target->transTo[target->nbTransTo] = from;
01340     target->nbTransTo++;
01341 }
01342 
01343 static void 
01344 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
01345                 xmlRegAtomPtr atom, xmlRegStatePtr target,
01346             int counter, int count) {
01347 
01348     int nrtrans;
01349 
01350     if (state == NULL) {
01351     ERROR("add state: state is NULL");
01352     return;
01353     }
01354     if (target == NULL) {
01355     ERROR("add state: target is NULL");
01356     return;
01357     }
01358     /*
01359      * Other routines follow the philosophy 'When in doubt, add a transition'
01360      * so we check here whether such a transition is already present and, if
01361      * so, silently ignore this request.
01362      */
01363 
01364     for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
01365     xmlRegTransPtr trans = &(state->trans[nrtrans]);
01366     if ((trans->atom == atom) &&
01367         (trans->to == target->no) &&
01368         (trans->counter == counter) &&
01369         (trans->count == count)) {
01370 #ifdef DEBUG_REGEXP_GRAPH
01371         printf("Ignoring duplicate transition from %d to %d\n",
01372             state->no, target->no);
01373 #endif
01374         return;
01375     }
01376     }
01377 
01378     if (state->maxTrans == 0) {
01379     state->maxTrans = 8;
01380     state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
01381                                      sizeof(xmlRegTrans));
01382     if (state->trans == NULL) {
01383         xmlRegexpErrMemory(ctxt, "adding transition");
01384         state->maxTrans = 0;
01385         return;
01386     }
01387     } else if (state->nbTrans >= state->maxTrans) {
01388     xmlRegTrans *tmp;
01389     state->maxTrans *= 2;
01390     tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
01391                                      sizeof(xmlRegTrans));
01392     if (tmp == NULL) {
01393         xmlRegexpErrMemory(ctxt, "adding transition");
01394         state->maxTrans /= 2;
01395         return;
01396     }
01397     state->trans = tmp;
01398     }
01399 #ifdef DEBUG_REGEXP_GRAPH
01400     printf("Add trans from %d to %d ", state->no, target->no);
01401     if (count == REGEXP_ALL_COUNTER)
01402     printf("all transition\n");
01403     else if (count >= 0)
01404     printf("count based %d\n", count);
01405     else if (counter >= 0)
01406     printf("counted %d\n", counter);
01407     else if (atom == NULL)
01408     printf("epsilon transition\n");
01409     else if (atom != NULL) 
01410         xmlRegPrintAtom(stdout, atom);
01411 #endif
01412 
01413     state->trans[state->nbTrans].atom = atom;
01414     state->trans[state->nbTrans].to = target->no;
01415     state->trans[state->nbTrans].counter = counter;
01416     state->trans[state->nbTrans].count = count;
01417     state->trans[state->nbTrans].nd = 0;
01418     state->nbTrans++;
01419     xmlRegStateAddTransTo(ctxt, target, state->no);
01420 }
01421 
01422 static int
01423 xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
01424     if (state == NULL) return(-1);
01425     if (ctxt->maxStates == 0) {
01426     ctxt->maxStates = 4;
01427     ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
01428                                      sizeof(xmlRegStatePtr));
01429     if (ctxt->states == NULL) {
01430         xmlRegexpErrMemory(ctxt, "adding state");
01431         ctxt->maxStates = 0;
01432         return(-1);
01433     }
01434     } else if (ctxt->nbStates >= ctxt->maxStates) {
01435     xmlRegStatePtr *tmp;
01436     ctxt->maxStates *= 2;
01437     tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
01438                                      sizeof(xmlRegStatePtr));
01439     if (tmp == NULL) {
01440         xmlRegexpErrMemory(ctxt, "adding state");
01441         ctxt->maxStates /= 2;
01442         return(-1);
01443     }
01444     ctxt->states = tmp;
01445     }
01446     state->no = ctxt->nbStates;
01447     ctxt->states[ctxt->nbStates++] = state;
01448     return(0);
01449 }
01450 
01459 static void
01460 xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
01461                xmlRegStatePtr from, xmlRegStatePtr to,
01462                int lax) {
01463     if (to == NULL) {
01464     to = xmlRegNewState(ctxt);
01465     xmlRegStatePush(ctxt, to);
01466     ctxt->state = to;
01467     }
01468     if (lax)
01469     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
01470     else
01471     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
01472 }
01473 
01481 static void
01482 xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
01483                    xmlRegStatePtr from, xmlRegStatePtr to) {
01484     if (to == NULL) {
01485     to = xmlRegNewState(ctxt);
01486     xmlRegStatePush(ctxt, to);
01487     ctxt->state = to;
01488     }
01489     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
01490 }
01491 
01500 static void
01501 xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
01502         xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
01503     if (to == NULL) {
01504     to = xmlRegNewState(ctxt);
01505     xmlRegStatePush(ctxt, to);
01506     ctxt->state = to;
01507     }
01508     xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
01509 }
01510 
01519 static void
01520 xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
01521         xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
01522     if (to == NULL) {
01523     to = xmlRegNewState(ctxt);
01524     xmlRegStatePush(ctxt, to);
01525     ctxt->state = to;
01526     }
01527     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
01528 }
01529 
01539 static int
01540 xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
01541                      xmlRegStatePtr to, xmlRegAtomPtr atom) {
01542     xmlRegStatePtr end;
01543 
01544     if (atom == NULL) {
01545     ERROR("genrate transition: atom == NULL");
01546     return(-1);
01547     }
01548     if (atom->type == XML_REGEXP_SUBREG) {
01549     /*
01550      * this is a subexpression handling one should not need to
01551      * create a new node except for XML_REGEXP_QUANT_RANGE.
01552      */
01553     if (xmlRegAtomPush(ctxt, atom) < 0) {
01554         return(-1);
01555     }
01556     if ((to != NULL) && (atom->stop != to) &&
01557         (atom->quant != XML_REGEXP_QUANT_RANGE)) {
01558         /*
01559          * Generate an epsilon transition to link to the target
01560          */
01561         xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
01562 #ifdef DV
01563     } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
01564            (atom->quant != XML_REGEXP_QUANT_ONCE)) {
01565         to = xmlRegNewState(ctxt);
01566         xmlRegStatePush(ctxt, to);
01567         ctxt->state = to;
01568         xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
01569 #endif
01570     }
01571     switch (atom->quant) {
01572         case XML_REGEXP_QUANT_OPT:
01573         atom->quant = XML_REGEXP_QUANT_ONCE;
01574         /*
01575          * transition done to the state after end of atom.
01576          *      1. set transition from atom start to new state
01577          *      2. set transition from atom end to this state. 
01578          */
01579                 if (to == NULL) {
01580                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
01581                     xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
01582                                                    ctxt->state);
01583                 } else {
01584                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
01585                 }
01586         break;
01587         case XML_REGEXP_QUANT_MULT:
01588         atom->quant = XML_REGEXP_QUANT_ONCE;
01589         xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
01590         xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
01591         break;
01592         case XML_REGEXP_QUANT_PLUS:
01593         atom->quant = XML_REGEXP_QUANT_ONCE;
01594         xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
01595         break;
01596         case XML_REGEXP_QUANT_RANGE: {
01597         int counter;
01598         xmlRegStatePtr inter, newstate;
01599 
01600         /*
01601          * create the final state now if needed
01602          */
01603         if (to != NULL) {
01604             newstate = to;
01605         } else {
01606             newstate = xmlRegNewState(ctxt);
01607             xmlRegStatePush(ctxt, newstate);
01608         }
01609 
01610         /*
01611          * The principle here is to use counted transition
01612          * to avoid explosion in the number of states in the
01613          * graph. This is clearly more complex but should not
01614          * be exploitable at runtime.
01615          */
01616         if ((atom->min == 0) && (atom->start0 == NULL)) {
01617             xmlRegAtomPtr copy;
01618             /*
01619              * duplicate a transition based on atom to count next
01620              * occurences after 1. We cannot loop to atom->start
01621              * directly because we need an epsilon transition to 
01622              * newstate.
01623              */
01624              /* ???? For some reason it seems we never reach that
01625                 case, I suppose this got optimized out before when
01626             building the automata */
01627             copy = xmlRegCopyAtom(ctxt, atom);
01628             if (copy == NULL)
01629                 return(-1);
01630             copy->quant = XML_REGEXP_QUANT_ONCE;
01631             copy->min = 0;
01632             copy->max = 0;
01633 
01634             if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
01635                 < 0)
01636             return(-1);
01637             inter = ctxt->state;
01638             counter = xmlRegGetCounter(ctxt);
01639             ctxt->counters[counter].min = atom->min - 1;
01640             ctxt->counters[counter].max = atom->max - 1;
01641             /* count the number of times we see it again */
01642             xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
01643                            atom->stop, counter);
01644             /* allow a way out based on the count */
01645             xmlFAGenerateCountedTransition(ctxt, inter,
01646                                        newstate, counter);
01647             /* and also allow a direct exit for 0 */
01648             xmlFAGenerateEpsilonTransition(ctxt, atom->start,
01649                                            newstate);
01650         } else {
01651             /*
01652              * either we need the atom at least once or there
01653              * is an atom->start0 allowing to easilly plug the
01654              * epsilon transition.
01655              */
01656             counter = xmlRegGetCounter(ctxt);
01657             ctxt->counters[counter].min = atom->min - 1;
01658             ctxt->counters[counter].max = atom->max - 1;
01659             /* count the number of times we see it again */
01660             xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
01661                            atom->start, counter);
01662             /* allow a way out based on the count */
01663             xmlFAGenerateCountedTransition(ctxt, atom->stop,
01664                                        newstate, counter);
01665             /* and if needed allow a direct exit for 0 */
01666             if (atom->min == 0)
01667             xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
01668                                newstate);
01669 
01670         }
01671         atom->min = 0;
01672         atom->max = 0;
01673         atom->quant = XML_REGEXP_QUANT_ONCE;
01674         ctxt->state = newstate;
01675         }
01676         default:
01677         break;
01678     }
01679     return(0);
01680     } 
01681     if ((atom->min == 0) && (atom->max == 0) &&
01682                (atom->quant == XML_REGEXP_QUANT_RANGE)) {
01683         /*
01684      * we can discard the atom and generate an epsilon transition instead
01685      */
01686     if (to == NULL) {
01687         to = xmlRegNewState(ctxt);
01688         if (to != NULL)
01689         xmlRegStatePush(ctxt, to);
01690         else {
01691         return(-1);
01692         }
01693     }
01694     xmlFAGenerateEpsilonTransition(ctxt, from, to);
01695     ctxt->state = to;
01696     xmlRegFreeAtom(atom);
01697     return(0);
01698     }
01699     if (to == NULL) {
01700     to = xmlRegNewState(ctxt);
01701     if (to != NULL)
01702         xmlRegStatePush(ctxt, to);
01703     else {
01704         return(-1);
01705     }
01706     } 
01707     end = to;
01708     if ((atom->quant == XML_REGEXP_QUANT_MULT) || 
01709         (atom->quant == XML_REGEXP_QUANT_PLUS)) {
01710     /*
01711      * Do not pollute the target state by adding transitions from
01712      * it as it is likely to be the shared target of multiple branches.
01713      * So isolate with an epsilon transition.
01714      */
01715         xmlRegStatePtr tmp;
01716     
01717     tmp = xmlRegNewState(ctxt);
01718     if (tmp != NULL)
01719         xmlRegStatePush(ctxt, tmp);
01720     else {
01721         return(-1);
01722     }
01723     xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
01724     to = tmp;
01725     }
01726     if (xmlRegAtomPush(ctxt, atom) < 0) {
01727     return(-1);
01728     }
01729     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
01730     ctxt->state = end;
01731     switch (atom->quant) {
01732     case XML_REGEXP_QUANT_OPT:
01733         atom->quant = XML_REGEXP_QUANT_ONCE;
01734         xmlFAGenerateEpsilonTransition(ctxt, from, to);
01735         break;
01736     case XML_REGEXP_QUANT_MULT:
01737         atom->quant = XML_REGEXP_QUANT_ONCE;
01738         xmlFAGenerateEpsilonTransition(ctxt, from, to);
01739         xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
01740         break;
01741     case XML_REGEXP_QUANT_PLUS:
01742         atom->quant = XML_REGEXP_QUANT_ONCE;
01743         xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
01744         break;
01745     case XML_REGEXP_QUANT_RANGE: 
01746 #if DV_test
01747         if (atom->min == 0) {
01748         xmlFAGenerateEpsilonTransition(ctxt, from, to);
01749         }
01750 #endif
01751         break;
01752     default:
01753         break;
01754     }
01755     return(0);
01756 }
01757 
01766 static void
01767 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
01768                           int tonr, int counter) {
01769     int transnr;
01770     xmlRegStatePtr from;
01771     xmlRegStatePtr to;
01772 
01773 #ifdef DEBUG_REGEXP_GRAPH
01774     printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
01775 #endif
01776     from = ctxt->states[fromnr];
01777     if (from == NULL)
01778     return;
01779     to = ctxt->states[tonr];
01780     if (to == NULL)
01781     return;
01782     if ((to->mark == XML_REGEXP_MARK_START) ||
01783     (to->mark == XML_REGEXP_MARK_VISITED))
01784     return;
01785 
01786     to->mark = XML_REGEXP_MARK_VISITED;
01787     if (to->type == XML_REGEXP_FINAL_STATE) {
01788 #ifdef DEBUG_REGEXP_GRAPH
01789     printf("State %d is final, so %d becomes final\n", tonr, fromnr);
01790 #endif
01791     from->type = XML_REGEXP_FINAL_STATE;
01792     }
01793     for (transnr = 0;transnr < to->nbTrans;transnr++) {
01794         if (to->trans[transnr].to < 0)
01795         continue;
01796     if (to->trans[transnr].atom == NULL) {
01797         /*
01798          * Don't remove counted transitions
01799          * Don't loop either
01800          */
01801         if (to->trans[transnr].to != fromnr) {
01802         if (to->trans[transnr].count >= 0) {
01803             int newto = to->trans[transnr].to;
01804 
01805             xmlRegStateAddTrans(ctxt, from, NULL,
01806                     ctxt->states[newto], 
01807                     -1, to->trans[transnr].count);
01808         } else {
01809 #ifdef DEBUG_REGEXP_GRAPH
01810             printf("Found epsilon trans %d from %d to %d\n",
01811                transnr, tonr, to->trans[transnr].to);
01812 #endif
01813             if (to->trans[transnr].counter >= 0) {
01814             xmlFAReduceEpsilonTransitions(ctxt, fromnr,
01815                           to->trans[transnr].to,
01816                           to->trans[transnr].counter);
01817             } else {
01818             xmlFAReduceEpsilonTransitions(ctxt, fromnr,
01819                           to->trans[transnr].to,
01820                           counter);
01821             }
01822         }
01823         }
01824     } else {
01825         int newto = to->trans[transnr].to;
01826 
01827         if (to->trans[transnr].counter >= 0) {
01828         xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
01829                     ctxt->states[newto], 
01830                     to->trans[transnr].counter, -1);
01831         } else {
01832         xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
01833                     ctxt->states[newto], counter, -1);
01834         }
01835     }
01836     }
01837     to->mark = XML_REGEXP_MARK_NORMAL;
01838 }
01839 
01855 static void
01856 xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
01857     int statenr, i, j, newto;
01858     xmlRegStatePtr state, tmp;
01859 
01860     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
01861     state = ctxt->states[statenr];
01862     if (state == NULL)
01863         continue;
01864     if (state->nbTrans != 1)
01865         continue;
01866     if (state->type == XML_REGEXP_UNREACH_STATE)
01867         continue;
01868     /* is the only transition out a basic transition */
01869     if ((state->trans[0].atom == NULL) &&
01870         (state->trans[0].to >= 0) &&
01871         (state->trans[0].to != statenr) &&
01872         (state->trans[0].counter < 0) &&
01873         (state->trans[0].count < 0)) {
01874         newto = state->trans[0].to;
01875 
01876             if (state->type == XML_REGEXP_START_STATE) {
01877 #ifdef DEBUG_REGEXP_GRAPH
01878         printf("Found simple epsilon trans from start %d to %d\n",
01879                statenr, newto);
01880 #endif     
01881             } else {
01882 #ifdef DEBUG_REGEXP_GRAPH
01883         printf("Found simple epsilon trans from %d to %d\n",
01884                statenr, newto);
01885 #endif     
01886             for (i = 0;i < state->nbTransTo;i++) {
01887             tmp = ctxt->states[state->transTo[i]];
01888             for (j = 0;j < tmp->nbTrans;j++) {
01889             if (tmp->trans[j].to == statenr) {
01890 #ifdef DEBUG_REGEXP_GRAPH
01891                 printf("Changed transition %d on %d to go to %d\n",
01892                    j, tmp->no, newto);
01893 #endif     
01894                 tmp->trans[j].to = -1;
01895                 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
01896                             ctxt->states[newto],
01897                             tmp->trans[j].counter,
01898                         tmp->trans[j].count);
01899             }
01900             }
01901         }
01902         if (state->type == XML_REGEXP_FINAL_STATE)
01903             ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
01904         /* eliminate the transition completely */
01905         state->nbTrans = 0;
01906 
01907                 state->type = XML_REGEXP_UNREACH_STATE;
01908 
01909         }
01910             
01911     }
01912     }
01913 }
01919 static void
01920 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
01921     int statenr, transnr;
01922     xmlRegStatePtr state;
01923     int has_epsilon;
01924 
01925     if (ctxt->states == NULL) return;
01926 
01927     /*
01928      * Eliminate simple epsilon transition and the associated unreachable
01929      * states.
01930      */
01931     xmlFAEliminateSimpleEpsilonTransitions(ctxt);
01932     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
01933     state = ctxt->states[statenr];
01934     if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
01935 #ifdef DEBUG_REGEXP_GRAPH
01936         printf("Removed unreachable state %d\n", statenr);
01937 #endif
01938         xmlRegFreeState(state);
01939         ctxt->states[statenr] = NULL;
01940     }
01941     }
01942 
01943     has_epsilon = 0;
01944 
01945     /*
01946      * Build the completed transitions bypassing the epsilons
01947      * Use a marking algorithm to avoid loops
01948      * Mark sink states too.
01949      * Process from the latests states backward to the start when
01950      * there is long cascading epsilon chains this minimize the
01951      * recursions and transition compares when adding the new ones
01952      */
01953     for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
01954     state = ctxt->states[statenr];
01955     if (state == NULL)
01956         continue;
01957     if ((state->nbTrans == 0) &&
01958         (state->type != XML_REGEXP_FINAL_STATE)) {
01959         state->type = XML_REGEXP_SINK_STATE;
01960     }
01961     for (transnr = 0;transnr < state->nbTrans;transnr++) {
01962         if ((state->trans[transnr].atom == NULL) &&
01963         (state->trans[transnr].to >= 0)) {
01964         if (state->trans[transnr].to == statenr) {
01965             state->trans[transnr].to = -1;
01966 #ifdef DEBUG_REGEXP_GRAPH
01967             printf("Removed loopback epsilon trans %d on %d\n",
01968                transnr, statenr);
01969 #endif
01970         } else if (state->trans[transnr].count < 0) {
01971             int newto = state->trans[transnr].to;
01972 
01973 #ifdef DEBUG_REGEXP_GRAPH
01974             printf("Found epsilon trans %d from %d to %d\n",
01975                transnr, statenr, newto);
01976 #endif
01977             has_epsilon = 1;
01978             state->trans[transnr].to = -2;
01979             state->mark = XML_REGEXP_MARK_START;
01980             xmlFAReduceEpsilonTransitions(ctxt, statenr,
01981                       newto, state->trans[transnr].counter);
01982             state->mark = XML_REGEXP_MARK_NORMAL;
01983 #ifdef DEBUG_REGEXP_GRAPH
01984         } else {
01985             printf("Found counted transition %d on %d\n",
01986                transnr, statenr);
01987 #endif
01988             }
01989         }
01990     }
01991     }
01992     /*
01993      * Eliminate the epsilon transitions
01994      */
01995     if (has_epsilon) {
01996     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
01997         state = ctxt->states[statenr];
01998         if (state == NULL)
01999         continue;
02000         for (transnr = 0;transnr < state->nbTrans;transnr++) {
02001         xmlRegTransPtr trans = &(state->trans[transnr]);
02002         if ((trans->atom == NULL) &&
02003             (trans->count < 0) &&
02004             (trans->to >= 0)) {
02005             trans->to = -1;
02006         }
02007         }
02008     }
02009     }
02010 
02011     /*
02012      * Use this pass to detect unreachable states too
02013      */
02014     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
02015     state = ctxt->states[statenr];
02016     if (state != NULL)
02017         state->reached = XML_REGEXP_MARK_NORMAL;
02018     }
02019     state = ctxt->states[0];
02020     if (state != NULL)
02021     state->reached = XML_REGEXP_MARK_START;
02022     while (state != NULL) {
02023     xmlRegStatePtr target = NULL;
02024     state->reached = XML_REGEXP_MARK_VISITED;
02025     /*
02026      * Mark all states reachable from the current reachable state
02027      */
02028     for (transnr = 0;transnr < state->nbTrans;transnr++) {
02029         if ((state->trans[transnr].to >= 0) &&
02030         ((state->trans[transnr].atom != NULL) ||
02031          (state->trans[transnr].count >= 0))) {
02032         int newto = state->trans[transnr].to;
02033 
02034         if (ctxt->states[newto] == NULL)
02035             continue;
02036         if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
02037             ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
02038             target = ctxt->states[newto];
02039         }
02040         }
02041     }
02042 
02043     /*
02044      * find the next accessible state not explored
02045      */
02046     if (target == NULL) {
02047         for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
02048         state = ctxt->states[statenr];
02049         if ((state != NULL) && (state->reached ==
02050             XML_REGEXP_MARK_START)) {
02051             target = state;
02052             break;
02053         }
02054         }
02055     }
02056     state = target;
02057     }
02058     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
02059     state = ctxt->states[statenr];
02060     if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
02061 #ifdef DEBUG_REGEXP_GRAPH
02062         printf("Removed unreachable state %d\n", statenr);
02063 #endif
02064         xmlRegFreeState(state);
02065         ctxt->states[statenr] = NULL;
02066     }
02067     }
02068 
02069 }
02070 
02071 static int
02072 xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
02073     int ret = 0;
02074 
02075     if ((range1->type == XML_REGEXP_RANGES) ||
02076         (range2->type == XML_REGEXP_RANGES) ||
02077         (range2->type == XML_REGEXP_SUBREG) ||
02078         (range1->type == XML_REGEXP_SUBREG) ||
02079         (range1->type == XML_REGEXP_STRING) ||
02080         (range2->type == XML_REGEXP_STRING))
02081     return(-1);
02082 
02083     /* put them in order */
02084     if (range1->type > range2->type) {
02085         xmlRegRangePtr tmp;
02086 
02087     tmp = range1;
02088     range1 = range2;
02089     range2 = tmp;
02090     }
02091     if ((range1->type == XML_REGEXP_ANYCHAR) ||
02092         (range2->type == XML_REGEXP_ANYCHAR)) {
02093     ret = 1;
02094     } else if ((range1->type == XML_REGEXP_EPSILON) ||
02095                (range2->type == XML_REGEXP_EPSILON)) {
02096     return(0);
02097     } else if (range1->type == range2->type) {
02098         if (range1->type != XML_REGEXP_CHARVAL)
02099             ret = 1;
02100         else if ((range1->end < range2->start) ||
02101              (range2->end < range1->start))
02102         ret = 0;
02103     else
02104         ret = 1;
02105     } else if (range1->type == XML_REGEXP_CHARVAL) {
02106         int codepoint;
02107     int neg = 0;
02108 
02109     /*
02110      * just check all codepoints in the range for acceptance,
02111      * this is usually way cheaper since done only once at
02112      * compilation than testing over and over at runtime or 
02113      * pushing too many states when evaluating.
02114      */
02115     if (((range1->neg == 0) && (range2->neg != 0)) ||
02116         ((range1->neg != 0) && (range2->neg == 0)))
02117         neg = 1;
02118 
02119     for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
02120         ret = xmlRegCheckCharacterRange(range2->type, codepoint,
02121                         0, range2->start, range2->end,
02122                         range2->blockName);
02123         if (ret < 0)
02124             return(-1);
02125         if (((neg == 1) && (ret == 0)) ||
02126             ((neg == 0) && (ret == 1)))
02127         return(1);
02128     }
02129     return(0);
02130     } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
02131                (range2->type == XML_REGEXP_BLOCK_NAME)) {
02132     if (range1->type == range2->type) {
02133         ret = xmlStrEqual(range1->blockName, range2->blockName);
02134     } else {
02135         /*
02136          * comparing a block range with anything else is way
02137          * too costly, and maintining the table is like too much
02138          * memory too, so let's force the automata to save state
02139          * here.
02140          */
02141         return(1);
02142     }
02143     } else if ((range1->type < XML_REGEXP_LETTER) ||
02144                (range2->type < XML_REGEXP_LETTER)) {
02145     if ((range1->type == XML_REGEXP_ANYSPACE) &&
02146         (range2->type == XML_REGEXP_NOTSPACE))
02147         ret = 0;
02148     else if ((range1->type == XML_REGEXP_INITNAME) &&
02149              (range2->type == XML_REGEXP_NOTINITNAME))
02150         ret = 0;
02151     else if ((range1->type == XML_REGEXP_NAMECHAR) &&
02152              (range2->type == XML_REGEXP_NOTNAMECHAR))
02153         ret = 0;
02154     else if ((range1->type == XML_REGEXP_DECIMAL) &&
02155              (range2->type == XML_REGEXP_NOTDECIMAL))
02156         ret = 0;
02157     else if ((range1->type == XML_REGEXP_REALCHAR) &&
02158              (range2->type == XML_REGEXP_NOTREALCHAR))
02159         ret = 0;
02160     else {
02161         /* same thing to limit complexity */
02162         return(1);
02163     }
02164     } else {
02165         ret = 0;
02166         /* range1->type < range2->type here */
02167         switch (range1->type) {
02168         case XML_REGEXP_LETTER:
02169              /* all disjoint except in the subgroups */
02170              if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
02171              (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
02172              (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
02173              (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
02174              (range2->type == XML_REGEXP_LETTER_OTHERS))
02175              ret = 1;
02176          break;
02177         case XML_REGEXP_MARK:
02178              if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
02179              (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
02180              (range2->type == XML_REGEXP_MARK_ENCLOSING))
02181              ret = 1;
02182          break;
02183         case XML_REGEXP_NUMBER:
02184              if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
02185              (range2->type == XML_REGEXP_NUMBER_LETTER) ||
02186              (range2->type == XML_REGEXP_NUMBER_OTHERS))
02187              ret = 1;
02188          break;
02189         case XML_REGEXP_PUNCT:
02190              if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
02191              (range2->type == XML_REGEXP_PUNCT_DASH) ||
02192              (range2->type == XML_REGEXP_PUNCT_OPEN) ||
02193              (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
02194              (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
02195              (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
02196              (range2->type == XML_REGEXP_PUNCT_OTHERS))
02197              ret = 1;
02198          break;
02199         case XML_REGEXP_SEPAR:
02200              if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
02201              (range2->type == XML_REGEXP_SEPAR_LINE) ||
02202              (range2->type == XML_REGEXP_SEPAR_PARA))
02203              ret = 1;
02204          break;
02205         case XML_REGEXP_SYMBOL:
02206              if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
02207              (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
02208              (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
02209              (range2->type == XML_REGEXP_SYMBOL_OTHERS))
02210              ret = 1;
02211          break;
02212         case XML_REGEXP_OTHER:
02213              if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
02214              (range2->type == XML_REGEXP_OTHER_FORMAT) ||
02215              (range2->type == XML_REGEXP_OTHER_PRIVATE))
02216              ret = 1;
02217          break;
02218             default:
02219              if ((range2->type >= XML_REGEXP_LETTER) &&
02220              (range2->type < XML_REGEXP_BLOCK_NAME))
02221              ret = 0;
02222          else {
02223              /* safety net ! */
02224              return(1);
02225          }
02226     }
02227     }
02228     if (((range1->neg == 0) && (range2->neg != 0)) ||
02229         ((range1->neg != 0) && (range2->neg == 0)))
02230     ret = !ret;
02231     return(ret);
02232 }
02233 
02244 static int
02245 xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
02246     if ((type1 == XML_REGEXP_EPSILON) ||
02247         (type1 == XML_REGEXP_CHARVAL) ||
02248     (type1 == XML_REGEXP_RANGES) ||
02249     (type1 == XML_REGEXP_SUBREG) ||
02250     (type1 == XML_REGEXP_STRING) ||
02251     (type1 == XML_REGEXP_ANYCHAR))
02252     return(1);
02253     if ((type2 == XML_REGEXP_EPSILON) ||
02254         (type2 == XML_REGEXP_CHARVAL) ||
02255     (type2 == XML_REGEXP_RANGES) ||
02256     (type2 == XML_REGEXP_SUBREG) ||
02257     (type2 == XML_REGEXP_STRING) ||
02258     (type2 == XML_REGEXP_ANYCHAR))
02259     return(1);
02260 
02261     if (type1 == type2) return(1);
02262 
02263     /* simplify subsequent compares by making sure type1 < type2 */
02264     if (type1 > type2) {
02265         xmlRegAtomType tmp = type1;
02266     type1 = type2;
02267     type2 = tmp;
02268     }
02269     switch (type1) {
02270         case XML_REGEXP_ANYSPACE: /* \s */
02271         /* can't be a letter, number, mark, pontuation, symbol */
02272         if ((type2 == XML_REGEXP_NOTSPACE) ||
02273         ((type2 >= XML_REGEXP_LETTER) &&
02274          (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
02275             ((type2 >= XML_REGEXP_NUMBER) &&
02276          (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
02277             ((type2 >= XML_REGEXP_MARK) &&
02278          (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
02279             ((type2 >= XML_REGEXP_PUNCT) &&
02280          (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
02281             ((type2 >= XML_REGEXP_SYMBOL) &&
02282          (type2 <= XML_REGEXP_SYMBOL_OTHERS))
02283             ) return(0);
02284         break;
02285         case XML_REGEXP_NOTSPACE: /* \S */
02286         break;
02287         case XML_REGEXP_INITNAME: /* \l */
02288         /* can't be a number, mark, separator, pontuation, symbol or other */
02289         if ((type2 == XML_REGEXP_NOTINITNAME) ||
02290             ((type2 >= XML_REGEXP_NUMBER) &&
02291          (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
02292             ((type2 >= XML_REGEXP_MARK) &&
02293          (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
02294             ((type2 >= XML_REGEXP_SEPAR) &&
02295          (type2 <= XML_REGEXP_SEPAR_PARA)) ||
02296             ((type2 >= XML_REGEXP_PUNCT) &&
02297          (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
02298             ((type2 >= XML_REGEXP_SYMBOL) &&
02299          (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
02300             ((type2 >= XML_REGEXP_OTHER) &&
02301          (type2 <= XML_REGEXP_OTHER_NA))
02302         ) return(0);
02303         break;
02304         case XML_REGEXP_NOTINITNAME: /* \L */
02305         break;
02306         case XML_REGEXP_NAMECHAR: /* \c */
02307         /* can't be a mark, separator, pontuation, symbol or other */
02308         if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
02309             ((type2 >= XML_REGEXP_MARK) &&
02310          (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
02311             ((type2 >= XML_REGEXP_PUNCT) &&
02312          (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
02313             ((type2 >= XML_REGEXP_SEPAR) &&
02314          (type2 <= XML_REGEXP_SEPAR_PARA)) ||
02315             ((type2 >= XML_REGEXP_SYMBOL) &&
02316          (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
02317             ((type2 >= XML_REGEXP_OTHER) &&
02318          (type2 <= XML_REGEXP_OTHER_NA))
02319         ) return(0);
02320         break;
02321         case XML_REGEXP_NOTNAMECHAR: /* \C */
02322         break;
02323         case XML_REGEXP_DECIMAL: /* \d */
02324         /* can't be a letter, mark, separator, pontuation, symbol or other */
02325         if ((type2 == XML_REGEXP_NOTDECIMAL) ||
02326             (type2 == XML_REGEXP_REALCHAR) ||
02327         ((type2 >= XML_REGEXP_LETTER) &&
02328          (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
02329             ((type2 >= XML_REGEXP_MARK) &&
02330          (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
02331             ((type2 >= XML_REGEXP_PUNCT) &&
02332          (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
02333             ((type2 >= XML_REGEXP_SEPAR) &&
02334          (type2 <= XML_REGEXP_SEPAR_PARA)) ||
02335             ((type2 >= XML_REGEXP_SYMBOL) &&
02336          (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
02337             ((type2 >= XML_REGEXP_OTHER) &&
02338          (type2 <= XML_REGEXP_OTHER_NA))
02339         )return(0);
02340         break;
02341         case XML_REGEXP_NOTDECIMAL: /* \D */
02342         break;
02343         case XML_REGEXP_REALCHAR: /* \w */
02344         /* can't be a mark, separator, pontuation, symbol or other */
02345         if ((type2 == XML_REGEXP_NOTDECIMAL) ||
02346             ((type2 >= XML_REGEXP_MARK) &&
02347          (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
02348             ((type2 >= XML_REGEXP_PUNCT) &&
02349          (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
02350             ((type2 >= XML_REGEXP_SEPAR) &&
02351          (type2 <= XML_REGEXP_SEPAR_PARA)) ||
02352             ((type2 >= XML_REGEXP_SYMBOL) &&
02353          (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
02354             ((type2 >= XML_REGEXP_OTHER) &&
02355          (type2 <= XML_REGEXP_OTHER_NA))
02356         )return(0);
02357         break;
02358         case XML_REGEXP_NOTREALCHAR: /* \W */
02359         break;
02360     /*
02361      * at that point we know both type 1 and type2 are from
02362      * character categories are ordered and are different,
02363      * it becomes simple because this is a partition
02364      */
02365         case XML_REGEXP_LETTER:
02366         if (type2 <= XML_REGEXP_LETTER_OTHERS)
02367             return(1);
02368         return(0);
02369         case XML_REGEXP_LETTER_UPPERCASE:
02370         case XML_REGEXP_LETTER_LOWERCASE:
02371         case XML_REGEXP_LETTER_TITLECASE:
02372         case XML_REGEXP_LETTER_MODIFIER:
02373         case XML_REGEXP_LETTER_OTHERS:
02374         return(0);
02375         case XML_REGEXP_MARK:
02376         if (type2 <= XML_REGEXP_MARK_ENCLOSING)
02377             return(1);
02378         return(0);
02379         case XML_REGEXP_MARK_NONSPACING:
02380         case XML_REGEXP_MARK_SPACECOMBINING:
02381         case XML_REGEXP_MARK_ENCLOSING:
02382         return(0);
02383         case XML_REGEXP_NUMBER:
02384         if (type2 <= XML_REGEXP_NUMBER_OTHERS)
02385             return(1);
02386         return(0);
02387         case XML_REGEXP_NUMBER_DECIMAL:
02388         case XML_REGEXP_NUMBER_LETTER:
02389         case XML_REGEXP_NUMBER_OTHERS:
02390         return(0);
02391         case XML_REGEXP_PUNCT:
02392         if (type2 <= XML_REGEXP_PUNCT_OTHERS)
02393             return(1);
02394         return(0);
02395         case XML_REGEXP_PUNCT_CONNECTOR:
02396         case XML_REGEXP_PUNCT_DASH:
02397         case XML_REGEXP_PUNCT_OPEN:
02398         case XML_REGEXP_PUNCT_CLOSE:
02399         case XML_REGEXP_PUNCT_INITQUOTE:
02400         case XML_REGEXP_PUNCT_FINQUOTE:
02401         case XML_REGEXP_PUNCT_OTHERS:
02402         return(0);
02403         case XML_REGEXP_SEPAR:
02404         if (type2 <= XML_REGEXP_SEPAR_PARA)
02405             return(1);
02406         return(0);
02407         case XML_REGEXP_SEPAR_SPACE:
02408         case XML_REGEXP_SEPAR_LINE:
02409         case XML_REGEXP_SEPAR_PARA:
02410         return(0);
02411         case XML_REGEXP_SYMBOL:
02412         if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
02413             return(1);
02414         return(0);
02415         case XML_REGEXP_SYMBOL_MATH:
02416         case XML_REGEXP_SYMBOL_CURRENCY:
02417         case XML_REGEXP_SYMBOL_MODIFIER:
02418         case XML_REGEXP_SYMBOL_OTHERS:
02419         return(0);
02420         case XML_REGEXP_OTHER:
02421         if (type2 <= XML_REGEXP_OTHER_NA)
02422             return(1);
02423         return(0);
02424         case XML_REGEXP_OTHER_CONTROL:
02425         case XML_REGEXP_OTHER_FORMAT:
02426         case XML_REGEXP_OTHER_PRIVATE:
02427         case XML_REGEXP_OTHER_NA:
02428         return(0);
02429     default:
02430         break;
02431     }
02432     return(1);
02433 }
02434 
02446 static int
02447 xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
02448     int ret = 0;
02449 
02450     if (atom1 == atom2)
02451     return(1);
02452     if ((atom1 == NULL) || (atom2 == NULL))
02453     return(0);
02454 
02455     if (atom1->type != atom2->type)
02456         return(0);
02457     switch (atom1->type) {
02458         case XML_REGEXP_EPSILON:
02459         ret = 0;
02460         break;
02461         case XML_REGEXP_STRING:
02462             if (!deep)
02463                 ret = (atom1->valuep == atom2->valuep);
02464             else
02465                 ret = xmlStrEqual((xmlChar *)atom1->valuep,
02466                                   (xmlChar *)atom2->valuep);
02467         break;
02468         case XML_REGEXP_CHARVAL:
02469         ret = (atom1->codepoint == atom2->codepoint);
02470         break;
02471     case XML_REGEXP_RANGES:
02472         /* too hard to do in the general case */
02473         ret = 0;
02474     default:
02475         break;
02476     }
02477     return(ret);
02478 }
02479 
02491 static int
02492 xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
02493     int ret = 1;
02494 
02495     if (atom1 == atom2)
02496     return(1);
02497     if ((atom1 == NULL) || (atom2 == NULL))
02498     return(0);
02499 
02500     if ((atom1->type == XML_REGEXP_ANYCHAR) ||
02501         (atom2->type == XML_REGEXP_ANYCHAR))
02502     return(1);
02503 
02504     if (atom1->type > atom2->type) {
02505     xmlRegAtomPtr tmp;
02506     tmp = atom1;
02507     atom1 = atom2;
02508     atom2 = tmp;
02509     }
02510     if (atom1->type != atom2->type) {
02511         ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
02512     /* if they can't intersect at the type level break now */
02513     if (ret == 0)
02514         return(0);
02515     }
02516     switch (atom1->type) {
02517         case XML_REGEXP_STRING:
02518             if (!deep)
02519                 ret = (atom1->valuep != atom2->valuep);
02520             else
02521                 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
02522                                              (xmlChar *)atom2->valuep);
02523         break;
02524         case XML_REGEXP_EPSILON:
02525         goto not_determinist;
02526         case XML_REGEXP_CHARVAL:
02527         if (atom2->type == XML_REGEXP_CHARVAL) {
02528         ret = (atom1->codepoint == atom2->codepoint);
02529         } else {
02530             ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
02531         if (ret < 0)
02532             ret = 1;
02533         }
02534         break;
02535         case XML_REGEXP_RANGES:
02536         if (atom2->type == XML_REGEXP_RANGES) {
02537             int i, j, res;
02538         xmlRegRangePtr r1, r2;
02539 
02540         /*
02541          * need to check that none of the ranges eventually matches
02542          */
02543         for (i = 0;i < atom1->nbRanges;i++) {
02544             for (j = 0;j < atom2->nbRanges;j++) {
02545             r1 = atom1->ranges[i];
02546             r2 = atom2->ranges[j];
02547             res = xmlFACompareRanges(r1, r2);
02548             if (res == 1) {
02549                 ret = 1;
02550                 goto done;
02551             }
02552             }
02553         }
02554         ret = 0;
02555         }
02556         break;
02557     default:
02558         goto not_determinist;
02559     }
02560 done:
02561     if (atom1->neg != atom2->neg) {
02562         ret = !ret;
02563     }
02564     if (ret == 0)
02565         return(0);
02566 not_determinist:
02567     return(1);
02568 }
02569 
02578 static int
02579 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
02580                      int to, xmlRegAtomPtr atom) {
02581     int ret = 1;
02582     int res;
02583     int transnr, nbTrans;
02584     xmlRegTransPtr t1;
02585     int deep = 1;
02586 
02587     if (state == NULL)
02588     return(ret);
02589 
02590     if (ctxt->flags & AM_AUTOMATA_RNG)
02591         deep = 0;
02592 
02593     /*
02594      * don't recurse on transitions potentially added in the course of
02595      * the elimination.
02596      */
02597     nbTrans = state->nbTrans;
02598     for (transnr = 0;transnr < nbTrans;transnr++) {
02599     t1 = &(state->trans[transnr]);
02600     /*
02601      * check transitions conflicting with the one looked at
02602      */
02603     if (t1->atom == NULL) {
02604         if (t1->to < 0)
02605         continue;
02606         res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
02607                                    to, atom);
02608         if (res == 0) {
02609             ret = 0;
02610         /* t1->nd = 1; */
02611         }
02612         continue;
02613     }
02614     if (t1->to != to)
02615         continue;
02616     if (xmlFACompareAtoms(t1->atom, atom, deep)) {
02617         ret = 0;
02618         /* mark the transition as non-deterministic */
02619         t1->nd = 1;
02620     }
02621     }
02622     return(ret);
02623 }
02624 
02633 static int
02634 xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
02635     int statenr, transnr;
02636     xmlRegStatePtr state;
02637     xmlRegTransPtr t1, t2, last;
02638     int i;
02639     int ret = 1;
02640     int deep = 1;
02641 
02642 #ifdef DEBUG_REGEXP_GRAPH
02643     printf("xmlFAComputesDeterminism\n");
02644     xmlRegPrintCtxt(stdout, ctxt);
02645 #endif
02646     if (ctxt->determinist != -1)
02647     return(ctxt->determinist);
02648 
02649     if (ctxt->flags & AM_AUTOMATA_RNG)
02650         deep = 0;
02651 
02652     /*
02653      * First cleanup the automata removing cancelled transitions
02654      */
02655     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
02656     state = ctxt->states[statenr];
02657     if (state == NULL)
02658         continue;
02659     if (state->nbTrans < 2)
02660         continue;
02661     for (transnr = 0;transnr < state->nbTrans;transnr++) {
02662         t1 = &(state->trans[transnr]);
02663         /*
02664          * Determinism checks in case of counted or all transitions
02665          * will have to be handled separately
02666          */
02667         if (t1->atom == NULL) {
02668         /* t1->nd = 1; */
02669         continue;
02670         }
02671         if (t1->to == -1) /* eliminated */
02672         continue;
02673         for (i = 0;i < transnr;i++) {
02674         t2 = &(state->trans[i]);
02675         if (t2->to == -1) /* eliminated */
02676             continue;
02677         if (t2->atom != NULL) {
02678             if (t1->to == t2->to) {
02679                         /*
02680                          * Here we use deep because we want to keep the
02681                          * transitions which indicate a conflict
02682                          */
02683             if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
02684                             (t1->counter == t2->counter) &&
02685                             (t1->count == t2->count))
02686                 t2->to = -1; /* eliminated */
02687             }
02688         }
02689         }
02690     }
02691     }
02692 
02693     /*
02694      * Check for all states that there aren't 2 transitions
02695      * with the same atom and a different target.
02696      */
02697     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
02698     state = ctxt->states[statenr];
02699     if (state == NULL)
02700         continue;
02701     if (state->nbTrans < 2)
02702         continue;
02703     last = NULL;
02704     for (transnr = 0;transnr < state->nbTrans;transnr++) {
02705         t1 = &(state->trans[transnr]);
02706         /*
02707          * Determinism checks in case of counted or all transitions
02708          * will have to be handled separately
02709          */
02710         if (t1->atom == NULL) {
02711         continue;
02712         }
02713         if (t1->to == -1) /* eliminated */
02714         continue;
02715         for (i = 0;i < transnr;i++) {
02716         t2 = &(state->trans[i]);
02717         if (t2->to == -1) /* eliminated */
02718             continue;
02719         if (t2->atom != NULL) {
02720                     /*
02721                      * But here we don't use deep because we want to
02722                      * find transitions which indicate a conflict
02723                      */
02724             if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
02725             ret = 0;
02726             /* mark the transitions as non-deterministic ones */
02727             t1->nd = 1;
02728             t2->nd = 1;
02729             last = t1;
02730             }
02731         } else if (t1->to != -1) {
02732             /*
02733              * do the closure in case of remaining specific
02734              * epsilon transitions like choices or all
02735              */
02736             ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
02737                            t2->to, t2->atom);
02738             /* don't shortcut the computation so all non deterministic
02739                transition get marked down
02740             if (ret == 0)
02741             return(0);
02742              */
02743             if (ret == 0) {
02744             t1->nd = 1;
02745             /* t2->nd = 1; */
02746             last = t1;
02747             }
02748         }
02749         }
02750         /* don't shortcut the computation so all non deterministic
02751            transition get marked down
02752         if (ret == 0)
02753         break; */
02754     }
02755 
02756     /*
02757      * mark specifically the last non-deterministic transition
02758      * from a state since there is no need to set-up rollback
02759      * from it
02760      */
02761     if (last != NULL) {
02762         last->nd = 2;
02763     }
02764 
02765     /* don't shortcut the computation so all non deterministic
02766        transition get marked down
02767     if (ret == 0)
02768         break; */
02769     }
02770 
02771     ctxt->determinist = ret;
02772     return(ret);
02773 }
02774 
02775 /************************************************************************
02776  *                                  *
02777  *  Routines to check input against transition atoms        *
02778  *                                  *
02779  ************************************************************************/
02780 
02781 static int
02782 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
02783                       int start, int end, const xmlChar *blockName) {
02784     int ret = 0;
02785 
02786     switch (type) {
02787         case XML_REGEXP_STRING:
02788         case XML_REGEXP_SUBREG:
02789         case XML_REGEXP_RANGES:
02790         case XML_REGEXP_EPSILON:
02791         return(-1);
02792         case XML_REGEXP_ANYCHAR:
02793         ret = ((codepoint != '\n') && (codepoint != '\r'));
02794         break;
02795         case XML_REGEXP_CHARVAL:
02796         ret = ((codepoint >= start) && (codepoint <= end));
02797         break;
02798         case XML_REGEXP_NOTSPACE:
02799         neg = !neg;
02800         case XML_REGEXP_ANYSPACE:
02801         ret = ((codepoint == '\n') || (codepoint == '\r') ||
02802            (codepoint == '\t') || (codepoint == ' '));
02803         break;
02804         case XML_REGEXP_NOTINITNAME:
02805         neg = !neg;
02806         case XML_REGEXP_INITNAME:
02807         ret = (IS_LETTER(codepoint) || 
02808            (codepoint == '_') || (codepoint == ':'));
02809         break;
02810         case XML_REGEXP_NOTNAMECHAR:
02811         neg = !neg;
02812         case XML_REGEXP_NAMECHAR:
02813         ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
02814            (codepoint == '.') || (codepoint == '-') ||
02815            (codepoint == '_') || (codepoint == ':') ||
02816            IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
02817         break;
02818         case XML_REGEXP_NOTDECIMAL:
02819         neg = !neg;
02820         case XML_REGEXP_DECIMAL:
02821         ret = xmlUCSIsCatNd(codepoint);
02822         break;
02823         case XML_REGEXP_REALCHAR:
02824         neg = !neg;
02825         case XML_REGEXP_NOTREALCHAR:
02826         ret = xmlUCSIsCatP(codepoint);
02827         if (ret == 0)
02828         ret = xmlUCSIsCatZ(codepoint);
02829         if (ret == 0)
02830         ret = xmlUCSIsCatC(codepoint);
02831         break;
02832         case XML_REGEXP_LETTER:
02833         ret = xmlUCSIsCatL(codepoint);
02834         break;
02835         case XML_REGEXP_LETTER_UPPERCASE:
02836         ret = xmlUCSIsCatLu(codepoint);
02837         break;
02838         case XML_REGEXP_LETTER_LOWERCASE:
02839         ret = xmlUCSIsCatLl(codepoint);
02840         break;
02841         case XML_REGEXP_LETTER_TITLECASE:
02842         ret = xmlUCSIsCatLt(codepoint);
02843         break;
02844         case XML_REGEXP_LETTER_MODIFIER:
02845         ret = xmlUCSIsCatLm(codepoint);
02846         break;
02847         case XML_REGEXP_LETTER_OTHERS:
02848         ret = xmlUCSIsCatLo(codepoint);
02849         break;
02850         case XML_REGEXP_MARK:
02851         ret = xmlUCSIsCatM(codepoint);
02852         break;
02853         case XML_REGEXP_MARK_NONSPACING:
02854         ret = xmlUCSIsCatMn(codepoint);
02855         break;
02856         case XML_REGEXP_MARK_SPACECOMBINING:
02857         ret = xmlUCSIsCatMc(codepoint);
02858         break;
02859         case XML_REGEXP_MARK_ENCLOSING:
02860         ret = xmlUCSIsCatMe(codepoint);
02861         break;
02862         case XML_REGEXP_NUMBER:
02863         ret = xmlUCSIsCatN(codepoint);
02864         break;
02865         case XML_REGEXP_NUMBER_DECIMAL:
02866         ret = xmlUCSIsCatNd(codepoint);
02867         break;
02868         case XML_REGEXP_NUMBER_LETTER:
02869         ret = xmlUCSIsCatNl(codepoint);
02870         break;
02871         case XML_REGEXP_NUMBER_OTHERS:
02872         ret = xmlUCSIsCatNo(codepoint);
02873         break;
02874         case XML_REGEXP_PUNCT:
02875         ret = xmlUCSIsCatP(codepoint);
02876         break;
02877         case XML_REGEXP_PUNCT_CONNECTOR:
02878         ret = xmlUCSIsCatPc(codepoint);
02879         break;
02880         case XML_REGEXP_PUNCT_DASH:
02881         ret = xmlUCSIsCatPd(codepoint);
02882         break;
02883         case XML_REGEXP_PUNCT_OPEN:
02884         ret = xmlUCSIsCatPs(codepoint);
02885         break;
02886         case XML_REGEXP_PUNCT_CLOSE:
02887         ret = xmlUCSIsCatPe(codepoint);
02888         break;
02889         case XML_REGEXP_PUNCT_INITQUOTE:
02890         ret = xmlUCSIsCatPi(codepoint);
02891         break;
02892         case XML_REGEXP_PUNCT_FINQUOTE:
02893         ret = xmlUCSIsCatPf(codepoint);
02894         break;
02895         case XML_REGEXP_PUNCT_OTHERS:
02896         ret = xmlUCSIsCatPo(codepoint);
02897         break;
02898         case XML_REGEXP_SEPAR:
02899         ret = xmlUCSIsCatZ(codepoint);
02900         break;
02901         case XML_REGEXP_SEPAR_SPACE:
02902         ret = xmlUCSIsCatZs(codepoint);
02903         break;
02904         case XML_REGEXP_SEPAR_LINE:
02905         ret = xmlUCSIsCatZl(codepoint);
02906         break;
02907         case XML_REGEXP_SEPAR_PARA:
02908         ret = xmlUCSIsCatZp(codepoint);
02909         break;
02910         case XML_REGEXP_SYMBOL:
02911         ret = xmlUCSIsCatS(codepoint);
02912         break;
02913         case XML_REGEXP_SYMBOL_MATH:
02914         ret = xmlUCSIsCatSm(codepoint);
02915         break;
02916         case XML_REGEXP_SYMBOL_CURRENCY:
02917         ret = xmlUCSIsCatSc(codepoint);
02918         break;
02919         case XML_REGEXP_SYMBOL_MODIFIER:
02920         ret = xmlUCSIsCatSk(codepoint);
02921         break;
02922         case XML_REGEXP_SYMBOL_OTHERS:
02923         ret = xmlUCSIsCatSo(codepoint);
02924         break;
02925         case XML_REGEXP_OTHER:
02926         ret = xmlUCSIsCatC(codepoint);
02927         break;
02928         case XML_REGEXP_OTHER_CONTROL:
02929         ret = xmlUCSIsCatCc(codepoint);
02930         break;
02931         case XML_REGEXP_OTHER_FORMAT:
02932         ret = xmlUCSIsCatCf(codepoint);
02933         break;
02934         case XML_REGEXP_OTHER_PRIVATE:
02935         ret = xmlUCSIsCatCo(codepoint);
02936         break;
02937         case XML_REGEXP_OTHER_NA:
02938         /* ret = xmlUCSIsCatCn(codepoint); */
02939         /* Seems it doesn't exist anymore in recent Unicode releases */
02940         ret = 0;
02941         break;
02942         case XML_REGEXP_BLOCK_NAME:
02943         ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
02944         break;
02945     }
02946     if (neg)
02947     return(!ret);
02948     return(ret);
02949 }
02950 
02951 static int
02952 xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
02953     int i, ret = 0;
02954     xmlRegRangePtr range;
02955 
02956     if ((atom == NULL) || (!IS_CHAR(codepoint)))
02957     return(-1);
02958 
02959     switch (atom->type) {
02960         case XML_REGEXP_SUBREG:
02961         case XML_REGEXP_EPSILON:
02962         return(-1);
02963         case XML_REGEXP_CHARVAL:
02964             return(codepoint == atom->codepoint);
02965         case XML_REGEXP_RANGES: {
02966         int accept = 0;
02967 
02968         for (i = 0;i < atom->nbRanges;i++) {
02969         range = atom->ranges[i];
02970         if (range->neg == 2) {
02971             ret = xmlRegCheckCharacterRange(range->type, codepoint,
02972                         0, range->start, range->end,
02973                         range->blockName);
02974             if (ret != 0)
02975             return(0); /* excluded char */
02976         } else if (range->neg) {
02977             ret = xmlRegCheckCharacterRange(range->type, codepoint,
02978                         0, range->start, range->end,
02979                         range->blockName);
02980             if (ret == 0)
02981                 accept = 1;
02982             else
02983                 return(0);
02984         } else {
02985             ret = xmlRegCheckCharacterRange(range->type, codepoint,
02986                         0, range->start, range->end,
02987                         range->blockName);
02988             if (ret != 0)
02989             accept = 1; /* might still be excluded */
02990         }
02991         }
02992         return(accept);
02993     }
02994         case XML_REGEXP_STRING:
02995         printf("TODO: XML_REGEXP_STRING\n");
02996         return(-1);
02997         case XML_REGEXP_ANYCHAR:
02998         case XML_REGEXP_ANYSPACE:
02999         case XML_REGEXP_NOTSPACE:
03000         case XML_REGEXP_INITNAME:
03001         case XML_REGEXP_NOTINITNAME:
03002         case XML_REGEXP_NAMECHAR:
03003         case XML_REGEXP_NOTNAMECHAR:
03004         case XML_REGEXP_DECIMAL:
03005         case XML_REGEXP_NOTDECIMAL:
03006         case XML_REGEXP_REALCHAR:
03007         case XML_REGEXP_NOTREALCHAR:
03008         case XML_REGEXP_LETTER:
03009         case XML_REGEXP_LETTER_UPPERCASE:
03010         case XML_REGEXP_LETTER_LOWERCASE:
03011         case XML_REGEXP_LETTER_TITLECASE:
03012         case XML_REGEXP_LETTER_MODIFIER:
03013         case XML_REGEXP_LETTER_OTHERS:
03014         case XML_REGEXP_MARK:
03015         case XML_REGEXP_MARK_NONSPACING:
03016         case XML_REGEXP_MARK_SPACECOMBINING:
03017         case XML_REGEXP_MARK_ENCLOSING:
03018         case XML_REGEXP_NUMBER:
03019         case XML_REGEXP_NUMBER_DECIMAL:
03020         case XML_REGEXP_NUMBER_LETTER:
03021         case XML_REGEXP_NUMBER_OTHERS:
03022         case XML_REGEXP_PUNCT:
03023         case XML_REGEXP_PUNCT_CONNECTOR:
03024         case XML_REGEXP_PUNCT_DASH:
03025         case XML_REGEXP_PUNCT_OPEN:
03026         case XML_REGEXP_PUNCT_CLOSE:
03027         case XML_REGEXP_PUNCT_INITQUOTE:
03028         case XML_REGEXP_PUNCT_FINQUOTE:
03029         case XML_REGEXP_PUNCT_OTHERS:
03030         case XML_REGEXP_SEPAR:
03031         case XML_REGEXP_SEPAR_SPACE:
03032         case XML_REGEXP_SEPAR_LINE:
03033         case XML_REGEXP_SEPAR_PARA:
03034         case XML_REGEXP_SYMBOL:
03035         case XML_REGEXP_SYMBOL_MATH:
03036         case XML_REGEXP_SYMBOL_CURRENCY:
03037         case XML_REGEXP_SYMBOL_MODIFIER:
03038         case XML_REGEXP_SYMBOL_OTHERS:
03039         case XML_REGEXP_OTHER:
03040         case XML_REGEXP_OTHER_CONTROL:
03041         case XML_REGEXP_OTHER_FORMAT:
03042         case XML_REGEXP_OTHER_PRIVATE:
03043         case XML_REGEXP_OTHER_NA:
03044     case XML_REGEXP_BLOCK_NAME:
03045         ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
03046                                     (const xmlChar *)atom->valuep);
03047         if (atom->neg)
03048         ret = !ret;
03049         break;
03050     }
03051     return(ret);
03052 }
03053 
03054 /************************************************************************
03055  *                                  *
03056  *  Saving and restoring state of an execution context      *
03057  *                                  *
03058  ************************************************************************/
03059 
03060 #ifdef DEBUG_REGEXP_EXEC
03061 static void
03062 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
03063     printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
03064     if (exec->inputStack != NULL) {
03065     int i;
03066     printf(": ");
03067     for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
03068         printf("%s ", (const char *)
03069                exec->inputStack[exec->inputStackNr - (i + 1)].value);
03070     } else {
03071     printf(": %s", &(exec->inputString[exec->index]));
03072     }
03073     printf("\n");
03074 }
03075 #endif
03076 
03077 static void
03078 xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
03079 #ifdef DEBUG_REGEXP_EXEC
03080     printf("saving ");
03081     exec->transno++;
03082     xmlFARegDebugExec(exec);
03083     exec->transno--;
03084 #endif
03085 #ifdef MAX_PUSH
03086     if (exec->nbPush > MAX_PUSH) {
03087         return;
03088     }
03089     exec->nbPush++;
03090 #endif
03091 
03092     if (exec->maxRollbacks == 0) {
03093     exec->maxRollbacks = 4;
03094     exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
03095                                      sizeof(xmlRegExecRollback));
03096     if (exec->rollbacks == NULL) {
03097         xmlRegexpErrMemory(NULL, "saving regexp");
03098         exec->maxRollbacks = 0;
03099         return;
03100     }
03101     memset(exec->rollbacks, 0,
03102            exec->maxRollbacks * sizeof(xmlRegExecRollback));
03103     } else if (exec->nbRollbacks >= exec->maxRollbacks) {
03104     xmlRegExecRollback *tmp;
03105     int len = exec->maxRollbacks;
03106 
03107     exec->maxRollbacks *= 2;
03108     tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
03109             exec->maxRollbacks * sizeof(xmlRegExecRollback));
03110     if (tmp == NULL) {
03111         xmlRegexpErrMemory(NULL, "saving regexp");
03112         exec->maxRollbacks /= 2;
03113         return;
03114     }
03115     exec->rollbacks = tmp;
03116     tmp = &exec->rollbacks[len];
03117     memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
03118     }
03119     exec->rollbacks[exec->nbRollbacks].state = exec->state;
03120     exec->rollbacks[exec->nbRollbacks].index = exec->index;
03121     exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
03122     if (exec->comp->nbCounters > 0) {
03123     if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
03124         exec->rollbacks[exec->nbRollbacks].counts = (int *)
03125         xmlMalloc(exec->comp->nbCounters * sizeof(int));
03126         if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
03127         xmlRegexpErrMemory(NULL, "saving regexp");
03128         exec->status = -5;
03129         return;
03130         }
03131     }
03132     memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
03133            exec->comp->nbCounters * sizeof(int));
03134     }
03135     exec->nbRollbacks++;
03136 }
03137 
03138 static void
03139 xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
03140     if (exec->nbRollbacks <= 0) {
03141     exec->status = -1;
03142 #ifdef DEBUG_REGEXP_EXEC
03143     printf("rollback failed on empty stack\n");
03144 #endif
03145     return;
03146     }
03147     exec->nbRollbacks--;
03148     exec->state = exec->rollbacks[exec->nbRollbacks].state;
03149     exec->index = exec->rollbacks[exec->nbRollbacks].index;
03150     exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
03151     if (exec->comp->nbCounters > 0) {
03152     if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
03153         fprintf(stderr, "exec save: allocation failed");
03154         exec->status = -6;
03155         return;
03156     }
03157     memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
03158            exec->comp->nbCounters * sizeof(int));
03159     }
03160 
03161 #ifdef DEBUG_REGEXP_EXEC
03162     printf("restored ");
03163     xmlFARegDebugExec(exec);
03164 #endif
03165 }
03166 
03167 /************************************************************************
03168  *                                  *
03169  *  Verifier, running an input against a compiled regexp        *
03170  *                                  *
03171  ************************************************************************/
03172 
03173 static int
03174 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
03175     xmlRegExecCtxt execval;
03176     xmlRegExecCtxtPtr exec = &execval;
03177     int ret, codepoint = 0, len, deter;
03178 
03179     exec->inputString = content;
03180     exec->index = 0;
03181     exec->nbPush = 0;
03182     exec->determinist = 1;
03183     exec->maxRollbacks = 0;
03184     exec->nbRollbacks = 0;
03185     exec->rollbacks = NULL;
03186     exec->status = 0;
03187     exec->comp = comp;
03188     exec->state = comp->states[0];
03189     exec->transno = 0;
03190     exec->transcount = 0;
03191     exec->inputStack = NULL;
03192     exec->inputStackMax = 0;
03193     if (comp->nbCounters > 0) {
03194     exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
03195     if (exec->counts == NULL) {
03196         xmlRegexpErrMemory(NULL, "running regexp");
03197         return(-1);
03198     }
03199         memset(exec->counts, 0, comp->nbCounters * sizeof(int));
03200     } else
03201     exec->counts = NULL;
03202     while ((exec->status == 0) &&
03203        ((exec->inputString[exec->index] != 0) ||
03204         ((exec->state != NULL) &&
03205          (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
03206     xmlRegTransPtr trans;
03207     xmlRegAtomPtr atom;
03208 
03209     /*
03210      * If end of input on non-terminal state, rollback, however we may
03211      * still have epsilon like transition for counted transitions
03212      * on counters, in that case don't break too early.  Additionally,
03213      * if we are working on a range like "AB{0,2}", where B is not present,
03214      * we don't want to break.
03215      */
03216     len = 1;
03217     if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
03218         /*
03219          * if there is a transition, we must check if
03220          *  atom allows minOccurs of 0
03221          */
03222         if (exec->transno < exec->state->nbTrans) {
03223             trans = &exec->state->trans[exec->transno];
03224         if (trans->to >=0) {
03225             atom = trans->atom;
03226             if (!((atom->min == 0) && (atom->max > 0)))
03227                 goto rollback;
03228         }
03229         } else
03230             goto rollback;
03231     }
03232 
03233     exec->transcount = 0;
03234     for (;exec->transno < exec->state->nbTrans;exec->transno++) {
03235         trans = &exec->state->trans[exec->transno];
03236         if (trans->to < 0)
03237         continue;
03238         atom = trans->atom;
03239         ret = 0;
03240         deter = 1;
03241         if (trans->count >= 0) {
03242         int count;
03243         xmlRegCounterPtr counter;
03244 
03245         if (exec->counts == NULL) {
03246             exec->status = -1;
03247             goto error;
03248         }
03249         /*
03250          * A counted transition.
03251          */
03252 
03253         count = exec->counts[trans->count];
03254         counter = &exec->comp->counters[trans->count];
03255 #ifdef DEBUG_REGEXP_EXEC
03256         printf("testing count %d: val %d, min %d, max %d\n",
03257                trans->count, count, counter->min,  counter->max);
03258 #endif
03259         ret = ((count >= counter->min) && (count <= counter->max));
03260         if ((ret) && (counter->min != counter->max))
03261             deter = 0;
03262         } else if (atom == NULL) {
03263         fprintf(stderr, "epsilon transition left at runtime\n");
03264         exec->status = -2;
03265         break;
03266         } else if (exec->inputString[exec->index] != 0) {
03267                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
03268         ret = xmlRegCheckCharacter(atom, codepoint);
03269         if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
03270             xmlRegStatePtr to = comp->states[trans->to];
03271 
03272             /*
03273              * this is a multiple input sequence
03274              * If there is a counter associated increment it now.
03275              * before potentially saving and rollback
03276              * do not increment if the counter is already over the
03277              * maximum limit in which case get to next transition
03278              */
03279             if (trans->counter >= 0) {
03280             xmlRegCounterPtr counter;
03281 
03282             if ((exec->counts == NULL) ||
03283                 (exec->comp == NULL) ||
03284                 (exec->comp->counters == NULL)) {
03285                 exec->status = -1;
03286                 goto error;
03287             }
03288             counter = &exec->comp->counters[trans->counter];
03289             if (exec->counts[trans->counter] >= counter->max)
03290                 continue; /* for loop on transitions */
03291 
03292 #ifdef DEBUG_REGEXP_EXEC
03293             printf("Increasing count %d\n", trans->counter);
03294 #endif
03295             exec->counts[trans->counter]++;
03296             }
03297             if (exec->state->nbTrans > exec->transno + 1) {
03298             xmlFARegExecSave(exec);
03299             }
03300             exec->transcount = 1;
03301             do {
03302             /*
03303              * Try to progress as much as possible on the input
03304              */
03305             if (exec->transcount == atom->max) {
03306                 break;
03307             }
03308             exec->index += len;
03309             /*
03310              * End of input: stop here
03311              */
03312             if (exec->inputString[exec->index] == 0) {
03313                 exec->index -= len;
03314                 break;
03315             }
03316             if (exec->transcount >= atom->min) {
03317                 int transno = exec->transno;
03318                 xmlRegStatePtr state = exec->state;
03319 
03320                 /*
03321                  * The transition is acceptable save it
03322                  */
03323                 exec->transno = -1; /* trick */
03324                 exec->state = to;
03325                 xmlFARegExecSave(exec);
03326                 exec->transno = transno;
03327                 exec->state = state;
03328             }
03329             codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
03330                               len);
03331             ret = xmlRegCheckCharacter(atom, codepoint);
03332             exec->transcount++;
03333             } while (ret == 1);
03334             if (exec->transcount < atom->min)
03335             ret = 0;
03336 
03337             /*
03338              * If the last check failed but one transition was found
03339              * possible, rollback
03340              */
03341             if (ret < 0)
03342             ret = 0;
03343             if (ret == 0) {
03344             goto rollback;
03345             }
03346             if (trans->counter >= 0) {
03347             if (exec->counts == NULL) {
03348                 exec->status = -1;
03349                 goto error;
03350             }
03351 #ifdef DEBUG_REGEXP_EXEC
03352             printf("Decreasing count %d\n", trans->counter);
03353 #endif
03354             exec->counts[trans->counter]--;
03355             }
03356         } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
03357             /*
03358              * we don't match on the codepoint, but minOccurs of 0
03359              * says that's ok.  Setting len to 0 inhibits stepping
03360              * over the codepoint.
03361              */
03362             exec->transcount = 1;
03363             len = 0;
03364             ret = 1;
03365         }
03366         } else if ((atom->min == 0) && (atom->max > 0)) {
03367             /* another spot to match when minOccurs is 0 */
03368         exec->transcount = 1;
03369         len = 0;
03370         ret = 1;
03371         }
03372         if (ret == 1) {
03373         if ((trans->nd == 1) ||
03374             ((trans->count >= 0) && (deter == 0) &&
03375              (exec->state->nbTrans > exec->transno + 1))) {
03376 #ifdef DEBUG_REGEXP_EXEC
03377             if (trans->nd == 1)
03378                 printf("Saving on nd transition atom %d for %c at %d\n",
03379                    trans->atom->no, codepoint, exec->index);
03380             else
03381                 printf("Saving on counted transition count %d for %c at %d\n",
03382                    trans->count, codepoint, exec->index);
03383 #endif
03384             xmlFARegExecSave(exec);
03385         }
03386         if (trans->counter >= 0) {
03387             xmlRegCounterPtr counter;
03388 
03389                     /* make sure we don't go over the counter maximum value */
03390             if ((exec->counts == NULL) ||
03391             (exec->comp == NULL) ||
03392             (exec->comp->counters == NULL)) {
03393             exec->status = -1;
03394             goto error;
03395             }
03396             counter = &exec->comp->counters[trans->counter];
03397             if (exec->counts[trans->counter] >= counter->max)
03398             continue; /* for loop on transitions */
03399 #ifdef DEBUG_REGEXP_EXEC
03400             printf("Increasing count %d\n", trans->counter);
03401 #endif
03402             exec->counts[trans->counter]++;
03403         }
03404         if ((trans->count >= 0) &&
03405             (trans->count < REGEXP_ALL_COUNTER)) {
03406             if (exec->counts == NULL) {
03407                 exec->status = -1;
03408             goto error;
03409             }
03410 #ifdef DEBUG_REGEXP_EXEC
03411             printf("resetting count %d on transition\n",
03412                    trans->count);
03413 #endif
03414             exec->counts[trans->count] = 0;
03415         }
03416 #ifdef DEBUG_REGEXP_EXEC
03417         printf("entering state %d\n", trans->to);
03418 #endif
03419         exec->state = comp->states[trans->to];
03420         exec->transno = 0;
03421         if (trans->atom != NULL) {
03422             exec->index += len;
03423         }
03424         goto progress;
03425         } else if (ret < 0) {
03426         exec->status = -4;
03427         break;
03428         }
03429     }
03430     if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
03431 rollback:
03432         /*
03433          * Failed to find a way out
03434          */
03435         exec->determinist = 0;
03436 #ifdef DEBUG_REGEXP_EXEC
03437         printf("rollback from state %d on %d:%c\n", exec->state->no,
03438                codepoint,codepoint);
03439 #endif
03440         xmlFARegExecRollBack(exec);
03441     }
03442 progress:
03443     continue;
03444     }
03445 error:
03446     if (exec->rollbacks != NULL) {
03447     if (exec->counts != NULL) {
03448         int i;
03449 
03450         for (i = 0;i < exec->maxRollbacks;i++)
03451         if (exec->rollbacks[i].counts != NULL)
03452             xmlFree(exec->rollbacks[i].counts);
03453     }
03454     xmlFree(exec->rollbacks);
03455     }
03456     if (exec->counts != NULL)
03457     xmlFree(exec->counts);
03458     if (exec->status == 0)
03459     return(1);
03460     if (exec->status == -1) {
03461     if (exec->nbPush > MAX_PUSH)
03462         return(-1);
03463     return(0);
03464     }
03465     return(exec->status);
03466 }
03467 
03468 /************************************************************************
03469  *                                  *
03470  *  Progressive interface to the verifier one atom at a time    *
03471  *                                  *
03472  ************************************************************************/
03473 #ifdef DEBUG_ERR
03474 static void testerr(xmlRegExecCtxtPtr exec);
03475 #endif
03476 
03488 xmlRegExecCtxtPtr
03489 xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
03490     xmlRegExecCtxtPtr exec;
03491 
03492     if (comp == NULL)
03493     return(NULL);
03494     if ((comp->compact == NULL) && (comp->states == NULL))
03495         return(NULL);
03496     exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
03497     if (exec == NULL) {
03498     xmlRegexpErrMemory(NULL, "creating execution context");
03499     return(NULL);
03500     }
03501     memset(exec, 0, sizeof(xmlRegExecCtxt));
03502     exec->inputString = NULL;
03503     exec->index = 0;
03504     exec->determinist = 1;
03505     exec->maxRollbacks = 0;
03506     exec->nbRollbacks = 0;
03507     exec->rollbacks = NULL;
03508     exec->status = 0;
03509     exec->comp = comp;
03510     if (comp->compact == NULL)
03511     exec->state = comp->states[0];
03512     exec->transno = 0;
03513     exec->transcount = 0;
03514     exec->callback = callback;
03515     exec->data = data;
03516     if (comp->nbCounters > 0) {
03517         /*
03518      * For error handling, exec->counts is allocated twice the size
03519      * the second half is used to store the data in case of rollback
03520      */
03521     exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
03522                                      * 2);
03523     if (exec->counts == NULL) {
03524         xmlRegexpErrMemory(NULL, "creating execution context");
03525         xmlFree(exec);
03526         return(NULL);
03527     }
03528         memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
03529     exec->errCounts = &exec->counts[comp->nbCounters];
03530     } else {
03531     exec->counts = NULL;
03532     exec->errCounts = NULL;
03533     }
03534     exec->inputStackMax = 0;
03535     exec->inputStackNr = 0;
03536     exec->inputStack = NULL;
03537     exec->errStateNo = -1;
03538     exec->errString = NULL;
03539     exec->nbPush = 0;
03540     return(exec);
03541 }
03542 
03549 void
03550 xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
03551     if (exec == NULL)
03552     return;
03553 
03554     if (exec->rollbacks != NULL) {
03555     if (exec->counts != NULL) {
03556         int i;
03557 
03558         for (i = 0;i < exec->maxRollbacks;i++)
03559         if (exec->rollbacks[i].counts != NULL)
03560             xmlFree(exec->rollbacks[i].counts);
03561     }
03562     xmlFree(exec->rollbacks);
03563     }
03564     if (exec->counts != NULL)
03565     xmlFree(exec->counts);
03566     if (exec->inputStack != NULL) {
03567     int i;
03568 
03569     for (i = 0;i < exec->inputStackNr;i++) {
03570         if (exec->inputStack[i].value != NULL)
03571         xmlFree(exec->inputStack[i].value);
03572     }
03573     xmlFree(exec->inputStack);
03574     }
03575     if (exec->errString != NULL)
03576         xmlFree(exec->errString);
03577     xmlFree(exec);
03578 }
03579 
03580 static void
03581 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
03582                         void *data) {
03583 #ifdef DEBUG_PUSH
03584     printf("saving value: %d:%s\n", exec->inputStackNr, value);
03585 #endif
03586     if (exec->inputStackMax == 0) {
03587     exec->inputStackMax = 4;
03588     exec->inputStack = (xmlRegInputTokenPtr) 
03589         xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
03590     if (exec->inputStack == NULL) {
03591         xmlRegexpErrMemory(NULL, "pushing input string");
03592         exec->inputStackMax = 0;
03593         return;
03594     }
03595     } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
03596     xmlRegInputTokenPtr tmp;
03597 
03598     exec->inputStackMax *= 2;
03599     tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
03600             exec->inputStackMax * sizeof(xmlRegInputToken));
03601     if (tmp == NULL) {
03602         xmlRegexpErrMemory(NULL, "pushing input string");
03603         exec->inputStackMax /= 2;
03604         return;
03605     }
03606     exec->inputStack = tmp;
03607     }
03608     exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
03609     exec->inputStack[exec->inputStackNr].data = data;
03610     exec->inputStackNr++;
03611     exec->inputStack[exec->inputStackNr].value = NULL;
03612     exec->inputStack[exec->inputStackNr].data = NULL;
03613 }
03614 
03628 static int
03629 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
03630     if (expStr == valStr) return(1);
03631     if (expStr == NULL) return(0);
03632     if (valStr == NULL) return(0);
03633     do {
03634     /*
03635     * Eval if we have a wildcard for the current item.
03636     */
03637         if (*expStr != *valStr) {
03638         /* if one of them starts with a wildcard make valStr be it */
03639         if (*valStr == '*') {
03640             const xmlChar *tmp;
03641 
03642         tmp = valStr;
03643         valStr = expStr;
03644         expStr = tmp;
03645         }
03646         if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
03647         do {
03648             if (*valStr == XML_REG_STRING_SEPARATOR)
03649             break;
03650             valStr++;
03651         } while (*valStr != 0);
03652         continue;
03653         } else
03654         return(0);
03655     }
03656     expStr++;
03657     valStr++;
03658     } while (*valStr != 0);
03659     if (*expStr != 0)
03660     return (0);
03661     else
03662     return (1);
03663 }
03664 
03677 static int
03678 xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
03679                     xmlRegexpPtr comp,
03680                     const xmlChar *value,
03681                     void *data) {
03682     int state = exec->index;
03683     int i, target;
03684 
03685     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
03686     return(-1);
03687     
03688     if (value == NULL) {
03689     /*
03690      * are we at a final state ?
03691      */
03692     if (comp->compact[state * (comp->nbstrings + 1)] ==
03693             XML_REGEXP_FINAL_STATE)
03694         return(1);
03695     return(0);
03696     }
03697 
03698 #ifdef DEBUG_PUSH
03699     printf("value pushed: %s\n", value);
03700 #endif
03701 
03702     /*
03703      * Examine all outside transitions from current state
03704      */
03705     for (i = 0;i < comp->nbstrings;i++) {
03706     target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
03707     if ((target > 0) && (target <= comp->nbstates)) {
03708         target--; /* to avoid 0 */    
03709         if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
03710         exec->index = target;       
03711         if ((exec->callback != NULL) && (comp->transdata != NULL)) {
03712             exec->callback(exec->data, value,
03713               comp->transdata[state * comp->nbstrings + i], data);
03714         }
03715 #ifdef DEBUG_PUSH
03716         printf("entering state %d\n", target);
03717 #endif
03718         if (comp->compact[target * (comp->nbstrings + 1)] ==
03719             XML_REGEXP_SINK_STATE)
03720             goto error;
03721 
03722         if (comp->compact[target * (comp->nbstrings + 1)] ==
03723             XML_REGEXP_FINAL_STATE)
03724             return(1);
03725         return(0);
03726         }
03727     }
03728     }
03729     /*
03730      * Failed to find an exit transition out from current state for the
03731      * current token
03732      */
03733 #ifdef DEBUG_PUSH
03734     printf("failed to find a transition for %s on state %d\n", value, state);
03735 #endif
03736 error:
03737     if (exec->errString != NULL)
03738         xmlFree(exec->errString);
03739     exec->errString = xmlStrdup(value);
03740     exec->errStateNo = state;
03741     exec->status = -1;
03742 #ifdef DEBUG_ERR
03743     testerr(exec);
03744 #endif
03745     return(-1);
03746 }
03747 
03760 static int
03761 xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
03762                          void *data, int compound) {
03763     xmlRegTransPtr trans;
03764     xmlRegAtomPtr atom;
03765     int ret;
03766     int final = 0;
03767     int progress = 1;
03768 
03769     if (exec == NULL)
03770     return(-1);
03771     if (exec->comp == NULL)
03772     return(-1);
03773     if (exec->status != 0)
03774     return(exec->status);
03775 
03776     if (exec->comp->compact != NULL)
03777     return(xmlRegCompactPushString(exec, exec->comp, value, data));
03778 
03779     if (value == NULL) {
03780         if (exec->state->type == XML_REGEXP_FINAL_STATE)
03781         return(1);
03782     final = 1;
03783     }
03784 
03785 #ifdef DEBUG_PUSH
03786     printf("value pushed: %s\n", value);
03787 #endif
03788     /*
03789      * If we have an active rollback stack push the new value there
03790      * and get back to where we were left
03791      */
03792     if ((value != NULL) && (exec->inputStackNr > 0)) {
03793     xmlFARegExecSaveInputString(exec, value, data);
03794     value = exec->inputStack[exec->index].value;
03795     data = exec->inputStack[exec->index].data;
03796 #ifdef DEBUG_PUSH
03797     printf("value loaded: %s\n", value);
03798 #endif
03799     }
03800 
03801     while ((exec->status == 0) &&
03802        ((value != NULL) ||
03803         ((final == 1) &&
03804          (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
03805 
03806     /*
03807      * End of input on non-terminal state, rollback, however we may
03808      * still have epsilon like transition for counted transitions
03809      * on counters, in that case don't break too early.
03810      */
03811     if ((value == NULL) && (exec->counts == NULL))
03812         goto rollback;
03813 
03814     exec->transcount = 0;
03815     for (;exec->transno < exec->state->nbTrans;exec->transno++) {
03816         trans = &exec->state->trans[exec->transno];
03817         if (trans->to < 0)
03818         continue;
03819         atom = trans->atom;
03820         ret = 0;
03821         if (trans->count == REGEXP_ALL_LAX_COUNTER) {
03822         int i;
03823         int count;
03824         xmlRegTransPtr t;
03825         xmlRegCounterPtr counter;
03826 
03827         ret = 0;
03828 
03829 #ifdef DEBUG_PUSH
03830         printf("testing all lax %d\n", trans->count);
03831 #endif
03832         /*
03833          * Check all counted transitions from the current state
03834          */
03835         if ((value == NULL) && (final)) {
03836             ret = 1;
03837         } else if (value != NULL) {
03838             for (i = 0;i < exec->state->nbTrans;i++) {
03839             t = &exec->state->trans[i];
03840             if ((t->counter < 0) || (t == trans))
03841                 continue;
03842             counter = &exec->comp->counters[t->counter];
03843             count = exec->counts[t->counter];
03844             if ((count < counter->max) && 
03845                     (t->atom != NULL) &&
03846                 (xmlStrEqual(value, t->atom->valuep))) {
03847                 ret = 0;
03848                 break;
03849             }
03850             if ((count >= counter->min) &&
03851                 (count < counter->max) &&
03852                 (t->atom != NULL) &&
03853                 (xmlStrEqual(value, t->atom->valuep))) {
03854                 ret = 1;
03855                 break;
03856             }
03857             }
03858         }
03859         } else if (trans->count == REGEXP_ALL_COUNTER) {
03860         int i;
03861         int count;
03862         xmlRegTransPtr t;
03863         xmlRegCounterPtr counter;
03864 
03865         ret = 1;
03866 
03867 #ifdef DEBUG_PUSH
03868         printf("testing all %d\n", trans->count);
03869 #endif
03870         /*
03871          * Check all counted transitions from the current state
03872          */
03873         for (i = 0;i < exec->state->nbTrans;i++) {
03874                     t = &exec->state->trans[i];
03875             if ((t->counter < 0) || (t == trans))
03876             continue;
03877                     counter = &exec->comp->counters[t->counter];
03878             count = exec->counts[t->counter];
03879             if ((count < counter->min) || (count > counter->max)) {
03880             ret = 0;
03881             break;
03882             }
03883         }
03884         } else if (trans->count >= 0) {
03885         int count;
03886         xmlRegCounterPtr counter;
03887 
03888         /*
03889          * A counted transition.
03890          */
03891 
03892         count = exec->counts[trans->count];
03893         counter = &exec->comp->counters[trans->count];
03894 #ifdef DEBUG_PUSH
03895         printf("testing count %d: val %d, min %d, max %d\n",
03896                trans->count, count, counter->min,  counter->max);
03897 #endif
03898         ret = ((count >= counter->min) && (count <= counter->max));
03899         } else if (atom == NULL) {
03900         fprintf(stderr, "epsilon transition left at runtime\n");
03901         exec->status = -2;
03902         break;
03903         } else if (value != NULL) {
03904         ret = xmlRegStrEqualWildcard(atom->valuep, value);
03905         if (atom->neg) {
03906             ret = !ret;
03907             if (!compound)
03908                 ret = 0;
03909         }
03910         if ((ret == 1) && (trans->counter >= 0)) {
03911             xmlRegCounterPtr counter;
03912             int count;
03913 
03914             count = exec->counts[trans->counter];
03915             counter = &exec->comp->counters[trans->counter];
03916             if (count >= counter->max)
03917             ret = 0;
03918         }
03919 
03920         if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
03921             xmlRegStatePtr to = exec->comp->states[trans->to];
03922 
03923             /*
03924              * this is a multiple input sequence
03925              */
03926             if (exec->state->nbTrans > exec->transno + 1) {
03927             if (exec->inputStackNr <= 0) {
03928                 xmlFARegExecSaveInputString(exec, value, data);
03929             }
03930             xmlFARegExecSave(exec);
03931             }
03932             exec->transcount = 1;
03933             do {
03934             /*
03935              * Try to progress as much as possible on the input
03936              */
03937             if (exec->transcount == atom->max) {
03938                 break;
03939             }
03940             exec->index++;
03941             value = exec->inputStack[exec->index].value;
03942             data = exec->inputStack[exec->index].data;
03943 #ifdef DEBUG_PUSH
03944             printf("value loaded: %s\n", value);
03945 #endif
03946 
03947             /*
03948              * End of input: stop here
03949              */
03950             if (value == NULL) {
03951                 exec->index --;
03952                 break;
03953             }
03954             if (exec->transcount >= atom->min) {
03955                 int transno = exec->transno;
03956                 xmlRegStatePtr state = exec->state;
03957 
03958                 /*
03959                  * The transition is acceptable save it
03960                  */
03961                 exec->transno = -1; /* trick */
03962                 exec->state = to;
03963                 if (exec->inputStackNr <= 0) {
03964                 xmlFARegExecSaveInputString(exec, value, data);
03965                 }
03966                 xmlFARegExecSave(exec);
03967                 exec->transno = transno;
03968                 exec->state = state;
03969             }
03970             ret = xmlStrEqual(value, atom->valuep);
03971             exec->transcount++;
03972             } while (ret == 1);
03973             if (exec->transcount < atom->min)
03974             ret = 0;
03975 
03976             /*
03977              * If the last check failed but one transition was found
03978              * possible, rollback
03979              */
03980             if (ret < 0)
03981             ret = 0;
03982             if (ret == 0) {
03983             goto rollback;
03984             }
03985         }
03986         }
03987         if (ret == 1) {
03988         if ((exec->callback != NULL) && (atom != NULL) &&
03989             (data != NULL)) {
03990             exec->callback(exec->data, atom->valuep,
03991                        atom->data, data);
03992         }
03993         if (exec->state->nbTrans > exec->transno + 1) {
03994             if (exec->inputStackNr <= 0) {
03995             xmlFARegExecSaveInputString(exec, value, data);
03996             }
03997             xmlFARegExecSave(exec);
03998         }
03999         if (trans->counter >= 0) {
04000 #ifdef DEBUG_PUSH
04001             printf("Increasing count %d\n", trans->counter);
04002 #endif
04003             exec->counts[trans->counter]++;
04004         }
04005         if ((trans->count >= 0) &&
04006             (trans->count < REGEXP_ALL_COUNTER)) {
04007 #ifdef DEBUG_REGEXP_EXEC
04008             printf("resetting count %d on transition\n",
04009                    trans->count);
04010 #endif
04011             exec->counts[trans->count] = 0;
04012         }
04013 #ifdef DEBUG_PUSH
04014         printf("entering state %d\n", trans->to);
04015 #endif
04016                 if ((exec->comp->states[trans->to] != NULL) &&
04017             (exec->comp->states[trans->to]->type ==
04018              XML_REGEXP_SINK_STATE)) {
04019             /*
04020              * entering a sink state, save the current state as error
04021              * state.
04022              */
04023             if (exec->errString != NULL)
04024             xmlFree(exec->errString);
04025             exec->errString = xmlStrdup(value);
04026             exec->errState = exec->state;
04027             memcpy(exec->errCounts, exec->counts,
04028                exec->comp->nbCounters * sizeof(int));
04029         }
04030         exec->state = exec->comp->states[trans->to];
04031         exec->transno = 0;
04032         if (trans->atom != NULL) {
04033             if (exec->inputStack != NULL) {
04034             exec->index++;
04035             if (exec->index < exec->inputStackNr) {
04036                 value = exec->inputStack[exec->index].value;
04037                 data = exec->inputStack[exec->index].data;
04038 #ifdef DEBUG_PUSH
04039                 printf("value loaded: %s\n", value);
04040 #endif
04041             } else {
04042                 value = NULL;
04043                 data = NULL;
04044 #ifdef DEBUG_PUSH
04045                 printf("end of input\n");
04046 #endif
04047             }
04048             } else {
04049             value = NULL;
04050             data = NULL;
04051 #ifdef DEBUG_PUSH
04052             printf("end of input\n");
04053 #endif
04054             }
04055         }
04056         goto progress;
04057         } else if (ret < 0) {
04058         exec->status = -4;
04059         break;
04060         }
04061     }
04062     if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
04063 rollback:
04064             /*
04065          * if we didn't yet rollback on the current input
04066          * store the current state as the error state.
04067          */
04068         if ((progress) && (exec->state != NULL) &&
04069             (exec->state->type != XML_REGEXP_SINK_STATE)) {
04070             progress = 0;
04071         if (exec->errString != NULL)
04072             xmlFree(exec->errString);
04073         exec->errString = xmlStrdup(value);
04074         exec->errState = exec->state;
04075         memcpy(exec->errCounts, exec->counts,
04076                exec->comp->nbCounters * sizeof(int));
04077         }
04078 
04079         /*
04080          * Failed to find a way out
04081          */
04082         exec->determinist = 0;
04083         xmlFARegExecRollBack(exec);
04084         if (exec->status == 0) {
04085         value = exec->inputStack[exec->index].value;
04086         data = exec->inputStack[exec->index].data;
04087 #ifdef DEBUG_PUSH
04088         printf("value loaded: %s\n", value);
04089 #endif
04090         }
04091     }
04092     continue;
04093 progress:
04094         progress = 1;
04095     continue;
04096     }
04097     if (exec->status == 0) {
04098         return(exec->state->type == XML_REGEXP_FINAL_STATE);
04099     }
04100 #ifdef DEBUG_ERR
04101     if (exec->status < 0) {
04102     testerr(exec);
04103     }
04104 #endif
04105     return(exec->status);
04106 }
04107 
04119 int
04120 xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
04121                  void *data) {
04122     return(xmlRegExecPushStringInternal(exec, value, data, 0));
04123 }
04124 
04137 int
04138 xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
04139                       const xmlChar *value2, void *data) {
04140     xmlChar buf[150];
04141     int lenn, lenp, ret;
04142     xmlChar *str;
04143 
04144     if (exec == NULL)
04145     return(-1);
04146     if (exec->comp == NULL)
04147     return(-1);
04148     if (exec->status != 0)
04149     return(exec->status);
04150 
04151     if (value2 == NULL)
04152         return(xmlRegExecPushString(exec, value, data));
04153 
04154     lenn = strlen((char *) value2);
04155     lenp = strlen((char *) value);
04156 
04157     if (150 < lenn + lenp + 2) {
04158     str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
04159     if (str == NULL) {
04160         exec->status = -1;
04161         return(-1);
04162     }
04163     } else {
04164     str = buf;
04165     }
04166     memcpy(&str[0], value, lenp);
04167     str[lenp] = XML_REG_STRING_SEPARATOR;
04168     memcpy(&str[lenp + 1], value2, lenn);
04169     str[lenn + lenp + 1] = 0;
04170 
04171     if (exec->comp->compact != NULL)
04172     ret = xmlRegCompactPushString(exec, exec->comp, str, data);
04173     else
04174         ret = xmlRegExecPushStringInternal(exec, str, data, 1);
04175 
04176     if (str != buf)
04177         xmlFree(str);
04178     return(ret);
04179 }
04180 
04195 static int
04196 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
04197                     int *nbval, int *nbneg,
04198             xmlChar **values, int *terminal) {
04199     int maxval;
04200     int nb = 0;
04201 
04202     if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
04203         (values == NULL) || (*nbval <= 0))
04204         return(-1);
04205 
04206     maxval = *nbval;
04207     *nbval = 0;
04208     *nbneg = 0;
04209     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
04210         xmlRegexpPtr comp;
04211     int target, i, state;
04212 
04213         comp = exec->comp;
04214 
04215     if (err) {
04216         if (exec->errStateNo == -1) return(-1);
04217         state = exec->errStateNo;
04218     } else {
04219         state = exec->index;
04220     }
04221     if (terminal != NULL) {
04222         if (comp->compact[state * (comp->nbstrings + 1)] ==
04223             XML_REGEXP_FINAL_STATE)
04224         *terminal = 1;
04225         else
04226         *terminal = 0;
04227     }
04228     for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
04229         target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
04230         if ((target > 0) && (target <= comp->nbstates) &&
04231             (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
04232          XML_REGEXP_SINK_STATE)) {
04233             values[nb++] = comp->stringMap[i];
04234         (*nbval)++;
04235         }
04236     }
04237     for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
04238         target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
04239         if ((target > 0) && (target <= comp->nbstates) &&
04240             (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
04241          XML_REGEXP_SINK_STATE)) {
04242             values[nb++] = comp->stringMap[i];
04243         (*nbneg)++;
04244         }
04245     }
04246     } else {
04247         int transno;
04248     xmlRegTransPtr trans;
04249     xmlRegAtomPtr atom;
04250     xmlRegStatePtr state;
04251 
04252     if (terminal != NULL) {
04253         if (exec->state->type == XML_REGEXP_FINAL_STATE)
04254         *terminal = 1;
04255         else
04256         *terminal = 0;
04257     }
04258 
04259     if (err) {
04260         if (exec->errState == NULL) return(-1);
04261         state = exec->errState;
04262     } else {
04263         if (exec->state == NULL) return(-1);
04264         state = exec->state;
04265     }
04266     for (transno = 0;
04267          (transno < state->nbTrans) && (nb < maxval);
04268          transno++) {
04269         trans = &state->trans[transno];
04270         if (trans->to < 0)
04271         continue;
04272         atom = trans->atom;
04273         if ((atom == NULL) || (atom->valuep == NULL))
04274         continue;
04275         if (trans->count == REGEXP_ALL_LAX_COUNTER) {
04276             /* this should not be reached but ... */
04277             TODO;
04278         } else if (trans->count == REGEXP_ALL_COUNTER) {
04279             /* this should not be reached but ... */
04280             TODO;
04281         } else if (trans->counter >= 0) {
04282         xmlRegCounterPtr counter = NULL;
04283         int count;
04284 
04285         if (err)
04286             count = exec->errCounts[trans->counter];
04287         else
04288             count = exec->counts[trans->counter];
04289         if (exec->comp != NULL)
04290             counter = &exec->comp->counters[trans->counter];
04291         if ((counter == NULL) || (count < counter->max)) {
04292             if (atom->neg)
04293             values[nb++] = (xmlChar *) atom->valuep2;
04294             else
04295             values[nb++] = (xmlChar *) atom->valuep;
04296             (*nbval)++;
04297         }
04298         } else {
04299                 if ((exec->comp->states[trans->to] != NULL) &&
04300             (exec->comp->states[trans->to]->type !=
04301              XML_REGEXP_SINK_STATE)) {
04302             if (atom->neg)
04303             values[nb++] = (xmlChar *) atom->valuep2;
04304             else
04305             values[nb++] = (xmlChar *) atom->valuep;
04306             (*nbval)++;
04307         }
04308         } 
04309     }
04310     for (transno = 0;
04311          (transno < state->nbTrans) && (nb < maxval);
04312          transno++) {
04313         trans = &state->trans[transno];
04314         if (trans->to < 0)
04315         continue;
04316         atom = trans->atom;
04317         if ((atom == NULL) || (atom->valuep == NULL))
04318         continue;
04319         if (trans->count == REGEXP_ALL_LAX_COUNTER) {
04320             continue;
04321         } else if (trans->count == REGEXP_ALL_COUNTER) {
04322             continue;
04323         } else if (trans->counter >= 0) {
04324             continue;
04325         } else {
04326                 if ((exec->comp->states[trans->to] != NULL) &&
04327             (exec->comp->states[trans->to]->type ==
04328              XML_REGEXP_SINK_STATE)) {
04329             if (atom->neg)
04330             values[nb++] = (xmlChar *) atom->valuep2;
04331             else
04332             values[nb++] = (xmlChar *) atom->valuep;
04333             (*nbneg)++;
04334         }
04335         } 
04336     }
04337     }
04338     return(0);
04339 }
04340 
04358 int
04359 xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
04360                      xmlChar **values, int *terminal) {
04361     return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
04362 }
04363 
04383 int
04384 xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
04385                   int *nbval, int *nbneg, xmlChar **values, int *terminal) {
04386     if (exec == NULL)
04387         return(-1);
04388     if (string != NULL) {
04389         if (exec->status != 0)
04390         *string = exec->errString;
04391     else
04392         *string = NULL;
04393     }
04394     return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
04395 }
04396 
04397 #ifdef DEBUG_ERR
04398 static void testerr(xmlRegExecCtxtPtr exec) {
04399     const xmlChar *string;
04400     xmlChar *values[5];
04401     int nb = 5;
04402     int nbneg;
04403     int terminal;
04404     xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
04405 }
04406 #endif
04407 
04408 #if 0
04409 static int
04410 xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
04411     xmlRegTransPtr trans;
04412     xmlRegAtomPtr atom;
04413     int ret;
04414     int codepoint, len;
04415 
04416     if (exec == NULL)
04417     return(-1);
04418     if (exec->status != 0)
04419     return(exec->status);
04420 
04421     while ((exec->status == 0) &&
04422        ((exec->inputString[exec->index] != 0) ||
04423         (exec->state->type != XML_REGEXP_FINAL_STATE))) {
04424 
04425     /*
04426      * End of input on non-terminal state, rollback, however we may
04427      * still have epsilon like transition for counted transitions
04428      * on counters, in that case don't break too early.
04429      */
04430     if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
04431         goto rollback;
04432 
04433     exec->transcount = 0;
04434     for (;exec->transno < exec->state->nbTrans;exec->transno++) {
04435         trans = &exec->state->trans[exec->transno];
04436         if (trans->to < 0)
04437         continue;
04438         atom = trans->atom;
04439         ret = 0;
04440         if (trans->count >= 0) {
04441         int count;
04442         xmlRegCounterPtr counter;
04443 
04444         /*
04445          * A counted transition.
04446          */
04447 
04448         count = exec->counts[trans->count];
04449         counter = &exec->comp->counters[trans->count];
04450 #ifdef DEBUG_REGEXP_EXEC
04451         printf("testing count %d: val %d, min %d, max %d\n",
04452                trans->count, count, counter->min,  counter->max);
04453 #endif
04454         ret = ((count >= counter->min) && (count <= counter->max));
04455         } else if (atom == NULL) {
04456         fprintf(stderr, "epsilon transition left at runtime\n");
04457         exec->status = -2;
04458         break;
04459         } else if (exec->inputString[exec->index] != 0) {
04460                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
04461         ret = xmlRegCheckCharacter(atom, codepoint);
04462         if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
04463             xmlRegStatePtr to = exec->comp->states[trans->to];
04464 
04465             /*
04466              * this is a multiple input sequence
04467              */
04468             if (exec->state->nbTrans > exec->transno + 1) {
04469             xmlFARegExecSave(exec);
04470             }
04471             exec->transcount = 1;
04472             do {
04473             /*
04474              * Try to progress as much as possible on the input
04475              */
04476             if (exec->transcount == atom->max) {
04477                 break;
04478             }
04479             exec->index += len;
04480             /*
04481              * End of input: stop here
04482              */
04483             if (exec->inputString[exec->index] == 0) {
04484                 exec->index -= len;
04485                 break;
04486             }
04487             if (exec->transcount >= atom->min) {
04488                 int transno = exec->transno;
04489                 xmlRegStatePtr state = exec->state;
04490 
04491                 /*
04492                  * The transition is acceptable save it
04493                  */
04494                 exec->transno = -1; /* trick */
04495                 exec->state = to;
04496                 xmlFARegExecSave(exec);
04497                 exec->transno = transno;
04498                 exec->state = state;
04499             }
04500             codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
04501                               len);
04502             ret = xmlRegCheckCharacter(atom, codepoint);
04503             exec->transcount++;
04504             } while (ret == 1);
04505             if (exec->transcount < atom->min)
04506             ret = 0;
04507 
04508             /*
04509              * If the last check failed but one transition was found
04510              * possible, rollback
04511              */
04512             if (ret < 0)
04513             ret = 0;
04514             if (ret == 0) {
04515             goto rollback;
04516             }
04517         }
04518         }
04519         if (ret == 1) {
04520         if (exec->state->nbTrans > exec->transno + 1) {
04521             xmlFARegExecSave(exec);
04522         }
04523         /*
04524          * restart count for expressions like this ((abc){2})*
04525          */
04526         if (trans->count >= 0) {
04527 #ifdef DEBUG_REGEXP_EXEC
04528             printf("Reset count %d\n", trans->count);
04529 #endif
04530             exec->counts[trans->count] = 0;
04531         }
04532         if (trans->counter >= 0) {
04533 #ifdef DEBUG_REGEXP_EXEC
04534             printf("Increasing count %d\n", trans->counter);
04535 #endif
04536             exec->counts[trans->counter]++;
04537         }
04538 #ifdef DEBUG_REGEXP_EXEC
04539         printf("entering state %d\n", trans->to);
04540 #endif
04541         exec->state = exec->comp->states[trans->to];
04542         exec->transno = 0;
04543         if (trans->atom != NULL) {
04544             exec->index += len;
04545         }
04546         goto progress;
04547         } else if (ret < 0) {
04548         exec->status = -4;
04549         break;
04550         }
04551     }
04552     if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
04553 rollback:
04554         /*
04555          * Failed to find a way out
04556          */
04557         exec->determinist = 0;
04558         xmlFARegExecRollBack(exec);
04559     }
04560 progress:
04561     continue;
04562     }
04563 }
04564 #endif
04565 /************************************************************************
04566  *                                  *
04567  *  Parser for the Schemas Datatype Regular Expressions     *
04568  *  http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs  *
04569  *                                  *
04570  ************************************************************************/
04571 
04578 static int
04579 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
04580     int cur;
04581     int len;
04582 
04583     cur = CUR_SCHAR(ctxt->cur, len);
04584     if ((cur == '.') || (cur == '\\') || (cur == '?') ||
04585     (cur == '*') || (cur == '+') || (cur == '(') ||
04586     (cur == ')') || (cur == '|') || (cur == 0x5B) ||
04587     (cur == 0x5D) || (cur == 0))
04588     return(-1);
04589     return(cur);
04590 }
04591 
04608 static void
04609 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
04610     int cur;
04611     xmlRegAtomType type = (xmlRegAtomType) 0;
04612     xmlChar *blockName = NULL;
04613     
04614     cur = CUR;
04615     if (cur == 'L') {
04616     NEXT;
04617     cur = CUR;
04618     if (cur == 'u') {
04619         NEXT;
04620         type = XML_REGEXP_LETTER_UPPERCASE;
04621     } else if (cur == 'l') {
04622         NEXT;
04623         type = XML_REGEXP_LETTER_LOWERCASE;
04624     } else if (cur == 't') {
04625         NEXT;
04626         type = XML_REGEXP_LETTER_TITLECASE;
04627     } else if (cur == 'm') {
04628         NEXT;
04629         type = XML_REGEXP_LETTER_MODIFIER;
04630     } else if (cur == 'o') {
04631         NEXT;
04632         type = XML_REGEXP_LETTER_OTHERS;
04633     } else {
04634         type = XML_REGEXP_LETTER;
04635     }
04636     } else if (cur == 'M') {
04637     NEXT;
04638     cur = CUR;
04639     if (cur == 'n') {
04640         NEXT;
04641         /* nonspacing */
04642         type = XML_REGEXP_MARK_NONSPACING;
04643     } else if (cur == 'c') {
04644         NEXT;
04645         /* spacing combining */
04646         type = XML_REGEXP_MARK_SPACECOMBINING;
04647     } else if (cur == 'e') {
04648         NEXT;
04649         /* enclosing */
04650         type = XML_REGEXP_MARK_ENCLOSING;
04651     } else {
04652         /* all marks */
04653         type = XML_REGEXP_MARK;
04654     }
04655     } else if (cur == 'N') {
04656     NEXT;
04657     cur = CUR;
04658     if (cur == 'd') {
04659         NEXT;
04660         /* digital */
04661         type = XML_REGEXP_NUMBER_DECIMAL;
04662     } else if (cur == 'l') {
04663         NEXT;
04664         /* letter */
04665         type = XML_REGEXP_NUMBER_LETTER;
04666     } else if (cur == 'o') {
04667         NEXT;
04668         /* other */
04669         type = XML_REGEXP_NUMBER_OTHERS;
04670     } else {
04671         /* all numbers */
04672         type = XML_REGEXP_NUMBER;
04673     }
04674     } else if (cur == 'P') {
04675     NEXT;
04676     cur = CUR;
04677     if (cur == 'c') {
04678         NEXT;
04679         /* connector */
04680         type = XML_REGEXP_PUNCT_CONNECTOR;
04681     } else if (cur == 'd') {
04682         NEXT;
04683         /* dash */
04684         type = XML_REGEXP_PUNCT_DASH;
04685     } else if (cur == 's') {
04686         NEXT;
04687         /* open */
04688         type = XML_REGEXP_PUNCT_OPEN;
04689     } else if (cur == 'e') {
04690         NEXT;
04691         /* close */
04692         type = XML_REGEXP_PUNCT_CLOSE;
04693     } else if (cur == 'i') {
04694         NEXT;
04695         /* initial quote */
04696         type = XML_REGEXP_PUNCT_INITQUOTE;
04697     } else if (cur == 'f') {
04698         NEXT;
04699         /* final quote */
04700         type = XML_REGEXP_PUNCT_FINQUOTE;
04701     } else if (cur == 'o') {
04702         NEXT;
04703         /* other */
04704         type = XML_REGEXP_PUNCT_OTHERS;
04705     } else {
04706         /* all punctuation */
04707         type = XML_REGEXP_PUNCT;
04708     }
04709     } else if (cur == 'Z') {
04710     NEXT;
04711     cur = CUR;
04712     if (cur == 's') {
04713         NEXT;
04714         /* space */
04715         type = XML_REGEXP_SEPAR_SPACE;
04716     } else if (cur == 'l') {
04717         NEXT;
04718         /* line */
04719         type = XML_REGEXP_SEPAR_LINE;
04720     } else if (cur == 'p') {
04721         NEXT;
04722         /* paragraph */
04723         type = XML_REGEXP_SEPAR_PARA;
04724     } else {
04725         /* all separators */
04726         type = XML_REGEXP_SEPAR;
04727     }
04728     } else if (cur == 'S') {
04729     NEXT;
04730     cur = CUR;
04731     if (cur == 'm') {
04732         NEXT;
04733         type = XML_REGEXP_SYMBOL_MATH;
04734         /* math */
04735     } else if (cur == 'c') {
04736         NEXT;
04737         type = XML_REGEXP_SYMBOL_CURRENCY;
04738         /* currency */
04739     } else if (cur == 'k') {
04740         NEXT;
04741         type = XML_REGEXP_SYMBOL_MODIFIER;
04742         /* modifiers */
04743     } else if (cur == 'o') {
04744         NEXT;
04745         type = XML_REGEXP_SYMBOL_OTHERS;
04746         /* other */
04747     } else {
04748         /* all symbols */
04749         type = XML_REGEXP_SYMBOL;
04750     }
04751     } else if (cur == 'C') {
04752     NEXT;
04753     cur = CUR;
04754     if (cur == 'c') {
04755         NEXT;
04756         /* control */
04757         type = XML_REGEXP_OTHER_CONTROL;
04758     } else if (cur == 'f') {
04759         NEXT;
04760         /* format */
04761         type = XML_REGEXP_OTHER_FORMAT;
04762     } else if (cur == 'o') {
04763         NEXT;
04764         /* private use */
04765         type = XML_REGEXP_OTHER_PRIVATE;
04766     } else if (cur == 'n') {
04767         NEXT;
04768         /* not assigned */
04769         type = XML_REGEXP_OTHER_NA;
04770     } else {
04771         /* all others */
04772         type = XML_REGEXP_OTHER;
04773     }
04774     } else if (cur == 'I') {
04775     const xmlChar *start;
04776     NEXT;
04777     cur = CUR;
04778     if (cur != 's') {
04779         ERROR("IsXXXX expected");
04780         return;
04781     }
04782     NEXT;
04783     start = ctxt->cur;
04784     cur = CUR;
04785     if (((cur >= 'a') && (cur <= 'z')) || 
04786         ((cur >= 'A') && (cur <= 'Z')) || 
04787         ((cur >= '0') && (cur <= '9')) || 
04788         (cur == 0x2D)) {
04789         NEXT;
04790         cur = CUR;
04791         while (((cur >= 'a') && (cur <= 'z')) || 
04792         ((cur >= 'A') && (cur <= 'Z')) || 
04793         ((cur >= '0') && (cur <= '9')) || 
04794         (cur == 0x2D)) {
04795         NEXT;
04796         cur = CUR;
04797         }
04798     }
04799     type = XML_REGEXP_BLOCK_NAME;
04800     blockName = xmlStrndup(start, ctxt->cur - start);
04801     } else {
04802     ERROR("Unknown char property");
04803     return;
04804     }
04805     if (ctxt->atom == NULL) {
04806     ctxt->atom = xmlRegNewAtom(ctxt, type);
04807     if (ctxt->atom != NULL)
04808         ctxt->atom->valuep = blockName;
04809     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
04810         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
04811                    type, 0, 0, blockName);
04812     }
04813 }
04814 
04825 static void
04826 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
04827     int cur;
04828 
04829     if (CUR == '.') {
04830     if (ctxt->atom == NULL) {
04831         ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
04832     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
04833         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
04834                    XML_REGEXP_ANYCHAR, 0, 0, NULL);
04835     }
04836     NEXT;
04837     return;
04838     }
04839     if (CUR != '\\') {
04840     ERROR("Escaped sequence: expecting \\");
04841     return;
04842     }
04843     NEXT;
04844     cur = CUR;
04845     if (cur == 'p') {
04846     NEXT;
04847     if (CUR != '{') {
04848         ERROR("Expecting '{'");
04849         return;
04850     }
04851     NEXT;
04852     xmlFAParseCharProp(ctxt);
04853     if (CUR != '}') {
04854         ERROR("Expecting '}'");
04855         return;
04856     }
04857     NEXT;
04858     } else if (cur == 'P') {
04859     NEXT;
04860     if (CUR != '{') {
04861         ERROR("Expecting '{'");
04862         return;
04863     }
04864     NEXT;
04865     xmlFAParseCharProp(ctxt);
04866     ctxt->atom->neg = 1;
04867     if (CUR != '}') {
04868         ERROR("Expecting '}'");
04869         return;
04870     }
04871     NEXT;
04872     } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
04873     (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
04874     (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
04875     (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
04876     (cur == 0x5E)) {
04877     if (ctxt->atom == NULL) {
04878         ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
04879         if (ctxt->atom != NULL) {
04880             switch (cur) {
04881             case 'n':
04882                 ctxt->atom->codepoint = '\n';
04883             break;
04884             case 'r':
04885                 ctxt->atom->codepoint = '\r';
04886             break;
04887             case 't':
04888                 ctxt->atom->codepoint = '\t';
04889             break;
04890             default:
04891             ctxt->atom->codepoint = cur;
04892         }
04893         }
04894     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
04895             switch (cur) {
04896                 case 'n':
04897                     cur = '\n';
04898                     break;
04899                 case 'r':
04900                     cur = '\r';
04901                     break;
04902                 case 't':
04903                     cur = '\t';
04904                     break;
04905             }
04906         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
04907                    XML_REGEXP_CHARVAL, cur, cur, NULL);
04908     }
04909     NEXT;
04910     } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
04911     (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
04912     (cur == 'w') || (cur == 'W')) {
04913     xmlRegAtomType type = XML_REGEXP_ANYSPACE;
04914 
04915     switch (cur) {
04916         case 's': 
04917         type = XML_REGEXP_ANYSPACE;
04918         break;
04919         case 'S': 
04920         type = XML_REGEXP_NOTSPACE;
04921         break;
04922         case 'i': 
04923         type = XML_REGEXP_INITNAME;
04924         break;
04925         case 'I': 
04926         type = XML_REGEXP_NOTINITNAME;
04927         break;
04928         case 'c': 
04929         type = XML_REGEXP_NAMECHAR;
04930         break;
04931         case 'C': 
04932         type = XML_REGEXP_NOTNAMECHAR;
04933         break;
04934         case 'd': 
04935         type = XML_REGEXP_DECIMAL;
04936         break;
04937         case 'D': 
04938         type = XML_REGEXP_NOTDECIMAL;
04939         break;
04940         case 'w': 
04941         type = XML_REGEXP_REALCHAR;
04942         break;
04943         case 'W': 
04944         type = XML_REGEXP_NOTREALCHAR;
04945         break;
04946     }
04947     NEXT;
04948     if (ctxt->atom == NULL) {
04949         ctxt->atom = xmlRegNewAtom(ctxt, type);
04950     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
04951         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
04952                    type, 0, 0, NULL);
04953     }
04954     } else {
04955     ERROR("Wrong escape sequence, misuse of character '\\'");
04956     }
04957 }
04958 
04969 static void
04970 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
04971     int cur, len;
04972     int start = -1;
04973     int end = -1;
04974 
04975     if (CUR == '\0') {
04976         ERROR("Expecting ']'");
04977     return;
04978     }
04979 
04980     cur = CUR;
04981     if (cur == '\\') {
04982     NEXT;
04983     cur = CUR;
04984     switch (cur) {
04985         case 'n': start = 0xA; break;
04986         case 'r': start = 0xD; break;
04987         case 't': start = 0x9; break;
04988         case '\\': case '|': case '.': case '-': case '^': case '?':
04989         case '*': case '+': case '{': case '}': case '(': case ')':
04990         case '[': case ']':
04991         start = cur; break;
04992         default:
04993         ERROR("Invalid escape value");
04994         return;
04995     }
04996     end = start;
04997         len = 1;
04998     } else if ((cur != 0x5B) && (cur != 0x5D)) {
04999         end = start = CUR_SCHAR(ctxt->cur, len);
05000     } else {
05001     ERROR("Expecting a char range");
05002     return;
05003     }
05004     /*
05005      * Since we are "inside" a range, we can assume ctxt->cur is past
05006      * the start of ctxt->string, and PREV should be safe
05007      */
05008     if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
05009     NEXTL(len);
05010     return;
05011     }
05012     NEXTL(len);
05013     cur = CUR;
05014     if ((cur != '-') || (NXT(1) == ']')) {
05015         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
05016                       XML_REGEXP_CHARVAL, start, end, NULL);
05017     return;
05018     }
05019     NEXT;
05020     cur = CUR;
05021     if (cur == '\\') {
05022     NEXT;
05023     cur = CUR;
05024     switch (cur) {
05025         case 'n': end = 0xA; break;
05026         case 'r': end = 0xD; break;
05027         case 't': end = 0x9; break;
05028         case '\\': case '|': case '.': case '-': case '^': case '?':
05029         case '*': case '+': case '{': case '}': case '(': case ')':
05030         case '[': case ']':
05031         end = cur; break;
05032         default:
05033         ERROR("Invalid escape value");
05034         return;
05035     }
05036         len = 1;
05037     } else if ((cur != 0x5B) && (cur != 0x5D)) {
05038         end = CUR_SCHAR(ctxt->cur, len);
05039     } else {
05040     ERROR("Expecting the end of a char range");
05041     return;
05042     }
05043     NEXTL(len);
05044     /* TODO check that the values are acceptable character ranges for XML */
05045     if (end < start) {
05046     ERROR("End of range is before start of range");
05047     } else {
05048         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
05049                    XML_REGEXP_CHARVAL, start, end, NULL);
05050     }
05051     return;
05052 }
05053 
05060 static void
05061 xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
05062     do {
05063     if (CUR == '\\') {
05064         xmlFAParseCharClassEsc(ctxt);
05065     } else {
05066         xmlFAParseCharRange(ctxt);
05067     }
05068     } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
05069              (CUR != 0) && (ctxt->error == 0));
05070 }
05071 
05081 static void
05082 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
05083     int n = ctxt->neg;
05084     while ((CUR != ']') && (ctxt->error == 0)) {
05085     if (CUR == '^') {
05086         int neg = ctxt->neg;
05087 
05088         NEXT;
05089         ctxt->neg = !ctxt->neg;
05090         xmlFAParsePosCharGroup(ctxt);
05091         ctxt->neg = neg;
05092     } else if ((CUR == '-') && (NXT(1) == '[')) {
05093         int neg = ctxt->neg;
05094         ctxt->neg = 2;
05095         NEXT;   /* eat the '-' */
05096         NEXT;   /* eat the '[' */
05097         xmlFAParseCharGroup(ctxt);
05098         if (CUR == ']') {
05099         NEXT;
05100         } else {
05101         ERROR("charClassExpr: ']' expected");
05102         break;
05103         }
05104         ctxt->neg = neg;
05105         break;
05106     } else if (CUR != ']') {
05107         xmlFAParsePosCharGroup(ctxt);
05108     }
05109     }
05110     ctxt->neg = n;
05111 }
05112 
05120 static void
05121 xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
05122     if (CUR == '[') {
05123     NEXT;
05124     ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
05125     if (ctxt->atom == NULL)
05126         return;
05127     xmlFAParseCharGroup(ctxt);
05128     if (CUR == ']') {
05129         NEXT;
05130     } else {
05131         ERROR("xmlFAParseCharClass: ']' expected");
05132     }
05133     } else {
05134     xmlFAParseCharClassEsc(ctxt);
05135     }
05136 }
05137 
05146 static int
05147 xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
05148     int ret = 0;
05149     int ok = 0;
05150 
05151     while ((CUR >= '0') && (CUR <= '9')) {
05152     ret = ret * 10 + (CUR - '0');
05153     ok = 1;
05154     NEXT;
05155     }
05156     if (ok != 1) {
05157     return(-1);
05158     }
05159     return(ret);
05160 }
05161 
05172 static int
05173 xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
05174     int cur;
05175 
05176     cur = CUR;
05177     if ((cur == '?') || (cur == '*') || (cur == '+')) {
05178     if (ctxt->atom != NULL) {
05179         if (cur == '?')
05180         ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
05181         else if (cur == '*')
05182         ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
05183         else if (cur == '+')
05184         ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
05185     }
05186     NEXT;
05187     return(1);
05188     }
05189     if (cur == '{') {
05190     int min = 0, max = 0;
05191 
05192     NEXT;
05193     cur = xmlFAParseQuantExact(ctxt);
05194     if (cur >= 0)
05195         min = cur;
05196     if (CUR == ',') {
05197         NEXT;
05198         if (CUR == '}')
05199             max = INT_MAX;
05200         else {
05201             cur = xmlFAParseQuantExact(ctxt);
05202             if (cur >= 0)
05203             max = cur;
05204         else {
05205             ERROR("Improper quantifier");
05206         }
05207         }
05208     }
05209     if (CUR == '}') {
05210         NEXT;
05211     } else {
05212         ERROR("Unterminated quantifier");
05213     }
05214     if (max == 0)
05215         max = min;
05216     if (ctxt->atom != NULL) {
05217         ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
05218         ctxt->atom->min = min;
05219         ctxt->atom->max = max;
05220     }
05221     return(1);
05222     }
05223     return(0);
05224 }
05225 
05232 static int
05233 xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
05234     int codepoint, len;
05235 
05236     codepoint = xmlFAIsChar(ctxt);
05237     if (codepoint > 0) {
05238     ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
05239     if (ctxt->atom == NULL)
05240         return(-1);
05241     codepoint = CUR_SCHAR(ctxt->cur, len);
05242     ctxt->atom->codepoint = codepoint;
05243     NEXTL(len);
05244     return(1);
05245     } else if (CUR == '|') {
05246     return(0);
05247     } else if (CUR == 0) {
05248     return(0);
05249     } else if (CUR == ')') {
05250     return(0);
05251     } else if (CUR == '(') {
05252     xmlRegStatePtr start, oldend, start0;
05253 
05254     NEXT;
05255     /*
05256      * this extra Epsilon transition is needed if we count with 0 allowed
05257      * unfortunately this can't be known at that point
05258      */
05259     xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
05260     start0 = ctxt->state;
05261     xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
05262     start = ctxt->state;
05263     oldend = ctxt->end;
05264     ctxt->end = NULL;
05265     ctxt->atom = NULL;
05266     xmlFAParseRegExp(ctxt, 0);
05267     if (CUR == ')') {
05268         NEXT;
05269     } else {
05270         ERROR("xmlFAParseAtom: expecting ')'");
05271     }
05272     ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
05273     if (ctxt->atom == NULL)
05274         return(-1);
05275     ctxt->atom->start = start;
05276     ctxt->atom->start0 = start0;
05277     ctxt->atom->stop = ctxt->state;
05278     ctxt->end = oldend;
05279     return(1);
05280     } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
05281     xmlFAParseCharClass(ctxt);
05282     return(1);
05283     }
05284     return(0);
05285 }
05286 
05293 static int
05294 xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
05295     int ret;
05296 
05297     ctxt->atom = NULL;
05298     ret = xmlFAParseAtom(ctxt);
05299     if (ret == 0)
05300     return(0);
05301     if (ctxt->atom == NULL) {
05302     ERROR("internal: no atom generated");
05303     }
05304     xmlFAParseQuantifier(ctxt);
05305     return(1);
05306 }
05307 
05318 static int
05319 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
05320     xmlRegStatePtr previous;
05321     int ret;
05322 
05323     previous = ctxt->state;
05324     ret = xmlFAParsePiece(ctxt);
05325     if (ret != 0) {
05326     if (xmlFAGenerateTransitions(ctxt, previous, 
05327             (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
05328         return(-1);
05329     previous = ctxt->state;
05330     ctxt->atom = NULL;
05331     }
05332     while ((ret != 0) && (ctxt->error == 0)) {
05333     ret = xmlFAParsePiece(ctxt);
05334     if (ret != 0) {
05335         if (xmlFAGenerateTransitions(ctxt, previous, 
05336                 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
05337             return(-1);
05338         previous = ctxt->state;
05339         ctxt->atom = NULL;
05340     }
05341     }
05342     return(0);
05343 }
05344 
05352 static void
05353 xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
05354     xmlRegStatePtr start, end;
05355 
05356     /* if not top start should have been generated by an epsilon trans */
05357     start = ctxt->state;
05358     ctxt->end = NULL;
05359     xmlFAParseBranch(ctxt, NULL);
05360     if (top) {
05361 #ifdef DEBUG_REGEXP_GRAPH
05362     printf("State %d is final\n", ctxt->state->no);
05363 #endif
05364     ctxt->state->type = XML_REGEXP_FINAL_STATE;
05365     }
05366     if (CUR != '|') {
05367     ctxt->end = ctxt->state;
05368     return;
05369     }
05370     end = ctxt->state;
05371     while ((CUR == '|') && (ctxt->error == 0)) {
05372     NEXT;
05373     ctxt->state = start;
05374     ctxt->end = NULL;
05375     xmlFAParseBranch(ctxt, end);
05376     }
05377     if (!top) {
05378     ctxt->state = end;
05379     ctxt->end = end;
05380     }
05381 }
05382 
05383 /************************************************************************
05384  *                                  *
05385  *          The basic API                   *
05386  *                                  *
05387  ************************************************************************/
05388 
05396 void
05397 xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
05398     int i;
05399 
05400     if (output == NULL)
05401         return;
05402     fprintf(output, " regexp: ");
05403     if (regexp == NULL) {
05404     fprintf(output, "NULL\n");
05405     return;
05406     }
05407     fprintf(output, "'%s' ", regexp->string);
05408     fprintf(output, "\n");
05409     fprintf(output, "%d atoms:\n", regexp->nbAtoms);
05410     for (i = 0;i < regexp->nbAtoms; i++) {
05411     fprintf(output, " %02d ", i);
05412     xmlRegPrintAtom(output, regexp->atoms[i]);
05413     }
05414     fprintf(output, "%d states:", regexp->nbStates);
05415     fprintf(output, "\n");
05416     for (i = 0;i < regexp->nbStates; i++) {
05417     xmlRegPrintState(output, regexp->states[i]);
05418     }
05419     fprintf(output, "%d counters:\n", regexp->nbCounters);
05420     for (i = 0;i < regexp->nbCounters; i++) {
05421     fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
05422                                         regexp->counters[i].max);
05423     }
05424 }
05425 
05436 xmlRegexpPtr
05437 xmlRegexpCompile(const xmlChar *regexp) {
05438     xmlRegexpPtr ret;
05439     xmlRegParserCtxtPtr ctxt;
05440 
05441     ctxt = xmlRegNewParserCtxt(regexp);
05442     if (ctxt == NULL)
05443     return(NULL);
05444 
05445     /* initialize the parser */
05446     ctxt->end = NULL;
05447     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
05448     xmlRegStatePush(ctxt, ctxt->start);
05449 
05450     /* parse the expression building an automata */
05451     xmlFAParseRegExp(ctxt, 1);
05452     if (CUR != 0) {
05453     ERROR("xmlFAParseRegExp: extra characters");
05454     }
05455     if (ctxt->error != 0) {
05456     xmlRegFreeParserCtxt(ctxt);
05457     return(NULL);
05458     }
05459     ctxt->end = ctxt->state;
05460     ctxt->start->type = XML_REGEXP_START_STATE;
05461     ctxt->end->type = XML_REGEXP_FINAL_STATE;
05462 
05463     /* remove the Epsilon except for counted transitions */
05464     xmlFAEliminateEpsilonTransitions(ctxt);
05465 
05466 
05467     if (ctxt->error != 0) {
05468     xmlRegFreeParserCtxt(ctxt);
05469     return(NULL);
05470     }
05471     ret = xmlRegEpxFromParse(ctxt);
05472     xmlRegFreeParserCtxt(ctxt);
05473     return(ret);
05474 }
05475 
05485 int
05486 xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
05487     if ((comp == NULL) || (content == NULL))
05488     return(-1);
05489     return(xmlFARegExec(comp, content));
05490 }
05491 
05500 int
05501 xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
05502     xmlAutomataPtr am;
05503     int ret;
05504 
05505     if (comp == NULL)
05506     return(-1);
05507     if (comp->determinist != -1)
05508     return(comp->determinist);
05509 
05510     am = xmlNewAutomata();
05511     if (am->states != NULL) {
05512     int i;
05513 
05514     for (i = 0;i < am->nbStates;i++)
05515         xmlRegFreeState(am->states[i]);
05516     xmlFree(am->states);
05517     }
05518     am->nbAtoms = comp->nbAtoms;
05519     am->atoms = comp->atoms;
05520     am->nbStates = comp->nbStates;
05521     am->states = comp->states;
05522     am->determinist = -1;
05523     am->flags = comp->flags;
05524     ret = xmlFAComputesDeterminism(am);
05525     am->atoms = NULL;
05526     am->states = NULL;
05527     xmlFreeAutomata(am);
05528     comp->determinist = ret;
05529     return(ret);
05530 }
05531 
05538 void
05539 xmlRegFreeRegexp(xmlRegexpPtr regexp) {
05540     int i;
05541     if (regexp == NULL)
05542     return;
05543 
05544     if (regexp->string != NULL)
05545     xmlFree(regexp->string);
05546     if (regexp->states != NULL) {
05547     for (i = 0;i < regexp->nbStates;i++)
05548         xmlRegFreeState(regexp->states[i]);
05549     xmlFree(regexp->states);
05550     }
05551     if (regexp->atoms != NULL) {
05552     for (i = 0;i < regexp->nbAtoms;i++)
05553         xmlRegFreeAtom(regexp->atoms[i]);
05554     xmlFree(regexp->atoms);
05555     }
05556     if (regexp->counters != NULL)
05557     xmlFree(regexp->counters);
05558     if (regexp->compact != NULL)
05559     xmlFree(regexp->compact);
05560     if (regexp->transdata != NULL)
05561     xmlFree(regexp->transdata);
05562     if (regexp->stringMap != NULL) {
05563     for (i = 0; i < regexp->nbstrings;i++)
05564         xmlFree(regexp->stringMap[i]);
05565     xmlFree(regexp->stringMap);
05566     }
05567 
05568     xmlFree(regexp);
05569 }
05570 
05571 #ifdef LIBXML_AUTOMATA_ENABLED
05572 /************************************************************************
05573  *                                  *
05574  *          The Automata interface              *
05575  *                                  *
05576  ************************************************************************/
05577 
05585 xmlAutomataPtr
05586 xmlNewAutomata(void) {
05587     xmlAutomataPtr ctxt;
05588 
05589     ctxt = xmlRegNewParserCtxt(NULL);
05590     if (ctxt == NULL)
05591     return(NULL);
05592 
05593     /* initialize the parser */
05594     ctxt->end = NULL;
05595     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
05596     if (ctxt->start == NULL) {
05597     xmlFreeAutomata(ctxt);
05598     return(NULL);
05599     }
05600     ctxt->start->type = XML_REGEXP_START_STATE;
05601     if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
05602         xmlRegFreeState(ctxt->start);
05603     xmlFreeAutomata(ctxt);
05604     return(NULL);
05605     }
05606     ctxt->flags = 0;
05607 
05608     return(ctxt);
05609 }
05610 
05617 void
05618 xmlFreeAutomata(xmlAutomataPtr am) {
05619     if (am == NULL)
05620     return;
05621     xmlRegFreeParserCtxt(am);
05622 }
05623 
05631 void
05632 xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
05633     if (am == NULL)
05634     return;
05635     am->flags |= flags;
05636 }
05637 
05646 xmlAutomataStatePtr
05647 xmlAutomataGetInitState(xmlAutomataPtr am) {
05648     if (am == NULL)
05649     return(NULL);
05650     return(am->start);
05651 }
05652 
05662 int
05663 xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
05664     if ((am == NULL) || (state == NULL))
05665     return(-1);
05666     state->type = XML_REGEXP_FINAL_STATE;
05667     return(0);
05668 }
05669 
05684 xmlAutomataStatePtr
05685 xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
05686              xmlAutomataStatePtr to, const xmlChar *token,
05687              void *data) {
05688     xmlRegAtomPtr atom;
05689 
05690     if ((am == NULL) || (from == NULL) || (token == NULL))
05691     return(NULL);
05692     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
05693     if (atom == NULL)
05694         return(NULL);
05695     atom->data = data;
05696     if (atom == NULL)
05697     return(NULL);
05698     atom->valuep = xmlStrdup(token);
05699 
05700     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
05701         xmlRegFreeAtom(atom);
05702     return(NULL);
05703     }
05704     if (to == NULL)
05705     return(am->state);
05706     return(to);
05707 }
05708 
05724 xmlAutomataStatePtr
05725 xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
05726               xmlAutomataStatePtr to, const xmlChar *token,
05727               const xmlChar *token2, void *data) {
05728     xmlRegAtomPtr atom;
05729 
05730     if ((am == NULL) || (from == NULL) || (token == NULL))
05731     return(NULL);
05732     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
05733     if (atom == NULL)
05734     return(NULL);
05735     atom->data = data;
05736     if ((token2 == NULL) || (*token2 == 0)) {
05737     atom->valuep = xmlStrdup(token);
05738     } else {
05739     int lenn, lenp;
05740     xmlChar *str;
05741 
05742     lenn = strlen((char *) token2);
05743     lenp = strlen((char *) token);
05744 
05745     str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
05746     if (str == NULL) {
05747         xmlRegFreeAtom(atom);
05748         return(NULL);
05749     }
05750     memcpy(&str[0], token, lenp);
05751     str[lenp] = '|';
05752     memcpy(&str[lenp + 1], token2, lenn);
05753     str[lenn + lenp + 1] = 0;
05754 
05755     atom->valuep = str;
05756     }
05757 
05758     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
05759         xmlRegFreeAtom(atom);
05760     return(NULL);
05761     }
05762     if (to == NULL)
05763     return(am->state);
05764     return(to);
05765 }
05766 
05784 xmlAutomataStatePtr
05785 xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
05786                xmlAutomataStatePtr to, const xmlChar *token,
05787                const xmlChar *token2, void *data) {
05788     xmlRegAtomPtr atom;
05789     xmlChar err_msg[200];
05790 
05791     if ((am == NULL) || (from == NULL) || (token == NULL))
05792     return(NULL);
05793     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
05794     if (atom == NULL)
05795     return(NULL);
05796     atom->data = data;
05797     atom->neg = 1;
05798     if ((token2 == NULL) || (*token2 == 0)) {
05799     atom->valuep = xmlStrdup(token);
05800     } else {
05801     int lenn, lenp;
05802     xmlChar *str;
05803 
05804     lenn = strlen((char *) token2);
05805     lenp = strlen((char *) token);
05806 
05807     str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
05808     if (str == NULL) {
05809         xmlRegFreeAtom(atom);
05810         return(NULL);
05811     }
05812     memcpy(&str[0], token, lenp);
05813     str[lenp] = '|';
05814     memcpy(&str[lenp + 1], token2, lenn);
05815     str[lenn + lenp + 1] = 0;
05816 
05817     atom->valuep = str;
05818     }
05819     snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
05820     err_msg[199] = 0;
05821     atom->valuep2 = xmlStrdup(err_msg);
05822 
05823     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
05824         xmlRegFreeAtom(atom);
05825     return(NULL);
05826     }
05827     am->negs++;
05828     if (to == NULL)
05829     return(am->state);
05830     return(to);
05831 }
05832 
05851 xmlAutomataStatePtr
05852 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
05853              xmlAutomataStatePtr to, const xmlChar *token,
05854              const xmlChar *token2,
05855              int min, int max, void *data) {
05856     xmlRegAtomPtr atom;
05857     int counter;
05858 
05859     if ((am == NULL) || (from == NULL) || (token == NULL))
05860     return(NULL);
05861     if (min < 0)
05862     return(NULL);
05863     if ((max < min) || (max < 1))
05864     return(NULL);
05865     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
05866     if (atom == NULL)
05867     return(NULL);
05868     if ((token2 == NULL) || (*token2 == 0)) {
05869     atom->valuep = xmlStrdup(token);
05870     } else {
05871     int lenn, lenp;
05872     xmlChar *str;
05873 
05874     lenn = strlen((char *) token2);
05875     lenp = strlen((char *) token);
05876 
05877     str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
05878     if (str == NULL) {
05879         xmlRegFreeAtom(atom);
05880         return(NULL);
05881     }
05882     memcpy(&str[0], token, lenp);
05883     str[lenp] = '|';
05884     memcpy(&str[lenp + 1], token2, lenn);
05885     str[lenn + lenp + 1] = 0;
05886 
05887     atom->valuep = str;
05888     }
05889     atom->data = data;
05890     if (min == 0)
05891     atom->min = 1;
05892     else
05893     atom->min = min;
05894     atom->max = max;
05895 
05896     /*
05897      * associate a counter to the transition.
05898      */
05899     counter = xmlRegGetCounter(am);
05900     am->counters[counter].min = min;
05901     am->counters[counter].max = max;
05902 
05903     /* xmlFAGenerateTransitions(am, from, to, atom); */
05904     if (to == NULL) {
05905         to = xmlRegNewState(am);
05906     xmlRegStatePush(am, to);
05907     }
05908     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
05909     xmlRegAtomPush(am, atom);
05910     am->state = to;
05911 
05912     if (to == NULL)
05913     to = am->state;
05914     if (to == NULL)
05915     return(NULL);
05916     if (min == 0)
05917     xmlFAGenerateEpsilonTransition(am, from, to);
05918     return(to);
05919 }
05920 
05938 xmlAutomataStatePtr
05939 xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
05940              xmlAutomataStatePtr to, const xmlChar *token,
05941              int min, int max, void *data) {
05942     xmlRegAtomPtr atom;
05943     int counter;
05944 
05945     if ((am == NULL) || (from == NULL) || (token == NULL))
05946     return(NULL);
05947     if (min < 0)
05948     return(NULL);
05949     if ((max < min) || (max < 1))
05950     return(NULL);
05951     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
05952     if (atom == NULL)
05953     return(NULL);
05954     atom->valuep = xmlStrdup(token);
05955     atom->data = data;
05956     if (min == 0)
05957     atom->min = 1;
05958     else
05959     atom->min = min;
05960     atom->max = max;
05961 
05962     /*
05963      * associate a counter to the transition.
05964      */
05965     counter = xmlRegGetCounter(am);
05966     am->counters[counter].min = min;
05967     am->counters[counter].max = max;
05968 
05969     /* xmlFAGenerateTransitions(am, from, to, atom); */
05970     if (to == NULL) {
05971         to = xmlRegNewState(am);
05972     xmlRegStatePush(am, to);
05973     }
05974     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
05975     xmlRegAtomPush(am, atom);
05976     am->state = to;
05977 
05978     if (to == NULL)
05979     to = am->state;
05980     if (to == NULL)
05981     return(NULL);
05982     if (min == 0)
05983     xmlFAGenerateEpsilonTransition(am, from, to);
05984     return(to);
05985 }
05986 
06006 xmlAutomataStatePtr
06007 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
06008              xmlAutomataStatePtr to, const xmlChar *token,
06009              const xmlChar *token2,
06010              int min, int max, void *data) {
06011     xmlRegAtomPtr atom;
06012     int counter;
06013 
06014     if ((am == NULL) || (from == NULL) || (token == NULL))
06015     return(NULL);
06016     if (min < 1)
06017     return(NULL);
06018     if ((max < min) || (max < 1))
06019     return(NULL);
06020     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
06021     if (atom == NULL)
06022     return(NULL);
06023     if ((token2 == NULL) || (*token2 == 0)) {
06024     atom->valuep = xmlStrdup(token);
06025     } else {
06026     int lenn, lenp;
06027     xmlChar *str;
06028 
06029     lenn = strlen((char *) token2);
06030     lenp = strlen((char *) token);
06031 
06032     str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
06033     if (str == NULL) {
06034         xmlRegFreeAtom(atom);
06035         return(NULL);
06036     }
06037     memcpy(&str[0], token, lenp);
06038     str[lenp] = '|';
06039     memcpy(&str[lenp + 1], token2, lenn);
06040     str[lenn + lenp + 1] = 0;
06041 
06042     atom->valuep = str;
06043     }    
06044     atom->data = data;
06045     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
06046     atom->min = min;
06047     atom->max = max;
06048     /*
06049      * associate a counter to the transition.
06050      */
06051     counter = xmlRegGetCounter(am);
06052     am->counters[counter].min = 1;
06053     am->counters[counter].max = 1;
06054 
06055     /* xmlFAGenerateTransitions(am, from, to, atom); */
06056     if (to == NULL) {
06057     to = xmlRegNewState(am);
06058     xmlRegStatePush(am, to);
06059     }
06060     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
06061     xmlRegAtomPush(am, atom);
06062     am->state = to;
06063     return(to);
06064 }
06065 
06066     
06067 
06086 xmlAutomataStatePtr
06087 xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
06088              xmlAutomataStatePtr to, const xmlChar *token,
06089              int min, int max, void *data) {
06090     xmlRegAtomPtr atom;
06091     int counter;
06092 
06093     if ((am == NULL) || (from == NULL) || (token == NULL))
06094     return(NULL);
06095     if (min < 1)
06096     return(NULL);
06097     if ((max < min) || (max < 1))
06098     return(NULL);
06099     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
06100     if (atom == NULL)
06101     return(NULL);
06102     atom->valuep = xmlStrdup(token);
06103     atom->data = data;
06104     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
06105     atom->min = min;
06106     atom->max = max;
06107     /*
06108      * associate a counter to the transition.
06109      */
06110     counter = xmlRegGetCounter(am);
06111     am->counters[counter].min = 1;
06112     am->counters[counter].max = 1;
06113 
06114     /* xmlFAGenerateTransitions(am, from, to, atom); */
06115     if (to == NULL) {
06116     to = xmlRegNewState(am);
06117     xmlRegStatePush(am, to);
06118     }
06119     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
06120     xmlRegAtomPush(am, atom);
06121     am->state = to;
06122     return(to);
06123 }
06124 
06133 xmlAutomataStatePtr
06134 xmlAutomataNewState(xmlAutomataPtr am) {
06135     xmlAutomataStatePtr to; 
06136 
06137     if (am == NULL)
06138     return(NULL);
06139     to = xmlRegNewState(am);
06140     xmlRegStatePush(am, to);
06141     return(to);
06142 }
06143 
06156 xmlAutomataStatePtr
06157 xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
06158               xmlAutomataStatePtr to) {
06159     if ((am == NULL) || (from == NULL))
06160     return(NULL);
06161     xmlFAGenerateEpsilonTransition(am, from, to);
06162     if (to == NULL)
06163     return(am->state);
06164     return(to);
06165 }
06166 
06181 xmlAutomataStatePtr
06182 xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
06183                xmlAutomataStatePtr to, int lax) {
06184     if ((am == NULL) || (from == NULL))
06185     return(NULL);
06186     xmlFAGenerateAllTransition(am, from, to, lax);
06187     if (to == NULL)
06188     return(am->state);
06189     return(to);
06190 }
06191 
06202 int     
06203 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
06204     int ret;
06205 
06206     if (am == NULL)
06207     return(-1);
06208 
06209     ret = xmlRegGetCounter(am);
06210     if (ret < 0)
06211     return(-1);
06212     am->counters[ret].min = min;
06213     am->counters[ret].max = max;
06214     return(ret);
06215 }
06216 
06230 xmlAutomataStatePtr
06231 xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
06232         xmlAutomataStatePtr to, int counter) {
06233     if ((am == NULL) || (from == NULL) || (counter < 0))
06234     return(NULL);
06235     xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
06236     if (to == NULL)
06237     return(am->state);
06238     return(to);
06239 }
06240 
06254 xmlAutomataStatePtr
06255 xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
06256         xmlAutomataStatePtr to, int counter) {
06257     if ((am == NULL) || (from == NULL) || (counter < 0))
06258     return(NULL);
06259     xmlFAGenerateCountedTransition(am, from, to, counter);
06260     if (to == NULL)
06261     return(am->state);
06262     return(to);
06263 }
06264 
06274 xmlRegexpPtr          
06275 xmlAutomataCompile(xmlAutomataPtr am) {
06276     xmlRegexpPtr ret;
06277 
06278     if ((am == NULL) || (am->error != 0)) return(NULL);
06279     xmlFAEliminateEpsilonTransitions(am);
06280     /* xmlFAComputesDeterminism(am); */
06281     ret = xmlRegEpxFromParse(am);
06282 
06283     return(ret);
06284 }
06285 
06294 int          
06295 xmlAutomataIsDeterminist(xmlAutomataPtr am) {
06296     int ret;
06297 
06298     if (am == NULL)
06299     return(-1);
06300 
06301     ret = xmlFAComputesDeterminism(am);
06302     return(ret);
06303 }
06304 #endif /* LIBXML_AUTOMATA_ENABLED */
06305 
06306 #ifdef LIBXML_EXPR_ENABLED
06307 /************************************************************************
06308  *                                  *
06309  *      Formal Expression handling code             *
06310  *                                  *
06311  ************************************************************************/
06312 /************************************************************************
06313  *                                  *
06314  *      Expression handling context             *
06315  *                                  *
06316  ************************************************************************/
06317 
06318 struct _xmlExpCtxt {
06319     xmlDictPtr dict;
06320     xmlExpNodePtr *table;
06321     int size;
06322     int nbElems;
06323     int nb_nodes;
06324     int maxNodes;
06325     const char *expr;
06326     const char *cur;
06327     int nb_cons;
06328     int tabSize;
06329 };
06330 
06340 xmlExpCtxtPtr
06341 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
06342     xmlExpCtxtPtr ret;
06343     int size = 256;
06344 
06345     if (maxNodes <= 4096)
06346         maxNodes = 4096;
06347     
06348     ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
06349     if (ret == NULL)
06350         return(NULL);
06351     memset(ret, 0, sizeof(xmlExpCtxt));
06352     ret->size = size;
06353     ret->nbElems = 0;
06354     ret->maxNodes = maxNodes;
06355     ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
06356     if (ret->table == NULL) {
06357         xmlFree(ret);
06358     return(NULL);
06359     }
06360     memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
06361     if (dict == NULL) {
06362         ret->dict = xmlDictCreate();
06363     if (ret->dict == NULL) {
06364         xmlFree(ret->table);
06365         xmlFree(ret);
06366         return(NULL);
06367     }
06368     } else {
06369         ret->dict = dict;
06370     xmlDictReference(ret->dict);
06371     }
06372     return(ret);
06373 }
06374 
06381 void
06382 xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
06383     if (ctxt == NULL)
06384         return;
06385     xmlDictFree(ctxt->dict);
06386     if (ctxt->table != NULL)
06387     xmlFree(ctxt->table);
06388     xmlFree(ctxt);
06389 }
06390 
06391 /************************************************************************
06392  *                                  *
06393  *      Structure associated to an expression node      *
06394  *                                  *
06395  ************************************************************************/
06396 #define MAX_NODES 10000
06397 
06398 /* #define DEBUG_DERIV */
06399 
06400 /*
06401  * TODO: 
06402  * - Wildcards
06403  * - public API for creation
06404  *
06405  * Started
06406  * - regression testing
06407  *
06408  * Done
06409  * - split into module and test tool
06410  * - memleaks
06411  */
06412 
06413 typedef enum {
06414     XML_EXP_NILABLE = (1 << 0)
06415 } xmlExpNodeInfo;
06416 
06417 #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
06418 
06419 struct _xmlExpNode {
06420     unsigned char type;/* xmlExpNodeType */
06421     unsigned char info;/* OR of xmlExpNodeInfo */
06422     unsigned short key; /* the hash key */
06423     unsigned int ref;   /* The number of references */
06424     int c_max;      /* the maximum length it can consume */
06425     xmlExpNodePtr exp_left;
06426     xmlExpNodePtr next;/* the next node in the hash table or free list */
06427     union {
06428     struct {
06429         int f_min;
06430         int f_max;
06431     } count;
06432     struct {
06433         xmlExpNodePtr f_right;
06434     } children;
06435         const xmlChar *f_str;
06436     } field;
06437 };
06438 
06439 #define exp_min field.count.f_min
06440 #define exp_max field.count.f_max
06441 /* #define exp_left field.children.f_left */
06442 #define exp_right field.children.f_right
06443 #define exp_str field.f_str
06444 
06445 static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
06446 static xmlExpNode forbiddenExpNode = {
06447     XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
06448 };
06449 xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
06450 static xmlExpNode emptyExpNode = {
06451     XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
06452 };
06453 xmlExpNodePtr emptyExp = &emptyExpNode;
06454 
06455 /************************************************************************
06456  *                                  *
06457  *  The custom hash table for unicity and canonicalization      *
06458  *  of sub-expressions pointers                     *
06459  *                                  *
06460  ************************************************************************/
06461 /*
06462  * xmlExpHashNameComputeKey:
06463  * Calculate the hash key for a token
06464  */
06465 static unsigned short
06466 xmlExpHashNameComputeKey(const xmlChar *name) {
06467     unsigned short value = 0L;
06468     char ch;
06469     
06470     if (name != NULL) {
06471     value += 30 * (*name);
06472     while ((ch = *name++) != 0) {
06473         value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
06474     }
06475     }
06476     return (value);
06477 }
06478 
06479 /*
06480  * xmlExpHashComputeKey:
06481  * Calculate the hash key for a compound expression
06482  */
06483 static unsigned short
06484 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
06485                      xmlExpNodePtr right) {
06486     unsigned long value;
06487     unsigned short ret;
06488     
06489     switch (type) {
06490         case XML_EXP_SEQ:
06491         value = left->key;
06492         value += right->key;
06493         value *= 3;
06494         ret = (unsigned short) value;
06495         break;
06496         case XML_EXP_OR:
06497         value = left->key;
06498         value += right->key;
06499         value *= 7;
06500         ret = (unsigned short) value;
06501         break;
06502         case XML_EXP_COUNT:
06503         value = left->key;
06504         value += right->key;
06505         ret = (unsigned short) value;
06506         break;
06507     default:
06508         ret = 0;
06509     }
06510     return(ret);
06511 }
06512 
06513 
06514 static xmlExpNodePtr
06515 xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
06516     xmlExpNodePtr ret;
06517 
06518     if (ctxt->nb_nodes >= MAX_NODES)
06519         return(NULL);
06520     ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
06521     if (ret == NULL)
06522         return(NULL);
06523     memset(ret, 0, sizeof(xmlExpNode));
06524     ret->type = type;
06525     ret->next = NULL;
06526     ctxt->nb_nodes++;
06527     ctxt->nb_cons++;
06528     return(ret);
06529 }
06530 
06541 static xmlExpNodePtr
06542 xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
06543                    xmlExpNodePtr left, xmlExpNodePtr right,
06544            const xmlChar *name, int min, int max) {
06545     unsigned short kbase, key;
06546     xmlExpNodePtr entry;
06547     xmlExpNodePtr insert;
06548 
06549     if (ctxt == NULL)
06550     return(NULL);
06551 
06552     /*
06553      * Check for duplicate and insertion location.
06554      */
06555     if (type == XML_EXP_ATOM) {
06556     kbase = xmlExpHashNameComputeKey(name);
06557     } else if (type == XML_EXP_COUNT) {
06558         /* COUNT reduction rule 1 */
06559     /* a{1} -> a */
06560     if (min == max) {
06561         if (min == 1) {
06562         return(left);
06563         }
06564         if (min == 0) {
06565         xmlExpFree(ctxt, left);
06566             return(emptyExp);
06567         }
06568     }
06569     if (min < 0) {
06570         xmlExpFree(ctxt, left);
06571         return(forbiddenExp);
06572     }
06573         if (max == -1)
06574         kbase = min + 79;
06575     else
06576         kbase = max - min;
06577     kbase += left->key;
06578     } else if (type == XML_EXP_OR) {
06579         /* Forbid reduction rules */
06580         if (left->type == XML_EXP_FORBID) {
06581         xmlExpFree(ctxt, left);
06582         return(right);
06583     }
06584         if (right->type == XML_EXP_FORBID) {
06585         xmlExpFree(ctxt, right);
06586         return(left);
06587     }
06588 
06589         /* OR reduction rule 1 */
06590     /* a | a reduced to a */
06591         if (left == right) {
06592         left->ref--;
06593         return(left);
06594     }
06595         /* OR canonicalization rule 1 */
06596     /* linearize (a | b) | c into a | (b | c) */
06597         if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
06598         xmlExpNodePtr tmp = left;
06599             left = right;
06600         right = tmp;
06601     }
06602         /* OR reduction rule 2 */
06603     /* a | (a | b) and b | (a | b) are reduced to a | b */
06604         if (right->type == XML_EXP_OR) {
06605         if ((left == right->exp_left) ||
06606             (left == right->exp_right)) {
06607         xmlExpFree(ctxt, left);
06608         return(right);
06609         }
06610     }
06611         /* OR canonicalization rule 2 */
06612     /* linearize (a | b) | c into a | (b | c) */
06613         if (left->type == XML_EXP_OR) {
06614         xmlExpNodePtr tmp;
06615 
06616         /* OR canonicalization rule 2 */
06617         if ((left->exp_right->type != XML_EXP_OR) &&
06618             (left->exp_right->key < left->exp_left->key)) {
06619             tmp = left->exp_right;
06620         left->exp_right = left->exp_left;
06621         left->exp_left = tmp;
06622         }
06623         left->exp_right->ref++;
06624         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
06625                                  NULL, 0, 0);
06626         left->exp_left->ref++;
06627         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
06628                                  NULL, 0, 0);
06629     
06630         xmlExpFree(ctxt, left);
06631         return(tmp);
06632     }
06633     if (right->type == XML_EXP_OR) {
06634         /* Ordering in the tree */
06635         /* C | (A | B) -> A | (B | C) */
06636         if (left->key > right->exp_right->key) {
06637         xmlExpNodePtr tmp;
06638         right->exp_right->ref++;
06639         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
06640                                  left, NULL, 0, 0);
06641         right->exp_left->ref++;
06642         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
06643                                  tmp, NULL, 0, 0);
06644         xmlExpFree(ctxt, right);
06645         return(tmp);
06646         }
06647         /* Ordering in the tree */
06648         /* B | (A | C) -> A | (B | C) */
06649         if (left->key > right->exp_left->key) {
06650         xmlExpNodePtr tmp;
06651         right->exp_right->ref++;
06652         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
06653                                  right->exp_right, NULL, 0, 0);
06654         right->exp_left->ref++;
06655         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
06656                                  tmp, NULL, 0, 0);
06657         xmlExpFree(ctxt, right);
06658         return(tmp);
06659         }
06660     }
06661     /* we know both types are != XML_EXP_OR here */
06662         else if (left->key > right->key) {
06663         xmlExpNodePtr tmp = left;
06664             left = right;
06665         right = tmp;
06666     }
06667     kbase = xmlExpHashComputeKey(type, left, right);
06668     } else if (type == XML_EXP_SEQ) {
06669         /* Forbid reduction rules */
06670         if (left->type == XML_EXP_FORBID) {
06671         xmlExpFree(ctxt, right);
06672         return(left);
06673     }
06674         if (right->type == XML_EXP_FORBID) {
06675         xmlExpFree(ctxt, left);
06676         return(right);
06677     }
06678         /* Empty reduction rules */
06679         if (right->type == XML_EXP_EMPTY) {
06680         return(left);
06681     }
06682         if (left->type == XML_EXP_EMPTY) {
06683         return(right);
06684     }
06685     kbase = xmlExpHashComputeKey(type, left, right);
06686     } else 
06687         return(NULL);
06688 
06689     key = kbase % ctxt->size;
06690     if (ctxt->table[key] != NULL) {
06691     for (insert = ctxt->table[key]; insert != NULL;
06692          insert = insert->next) {
06693         if ((insert->key == kbase) &&
06694             (insert->type == type)) {
06695         if (type == XML_EXP_ATOM) {
06696             if (name == insert->exp_str) {
06697             insert->ref++;
06698             return(insert);
06699             }
06700         } else if (type == XML_EXP_COUNT) {
06701             if ((insert->exp_min == min) && (insert->exp_max == max) &&
06702                 (insert->exp_left == left)) {
06703             insert->ref++;
06704             left->ref--;
06705             return(insert);
06706             }
06707         } else if ((insert->exp_left == left) &&
06708                (insert->exp_right == right)) {
06709             insert->ref++;
06710             left->ref--;
06711             right->ref--;
06712             return(insert);
06713         }
06714         }
06715     }
06716     }
06717 
06718     entry = xmlExpNewNode(ctxt, type);
06719     if (entry == NULL)
06720         return(NULL);
06721     entry->key = kbase;
06722     if (type == XML_EXP_ATOM) {
06723     entry->exp_str = name;
06724     entry->c_max = 1;
06725     } else if (type == XML_EXP_COUNT) {
06726         entry->exp_min = min;
06727         entry->exp_max = max;
06728     entry->exp_left = left;
06729     if ((min == 0) || (IS_NILLABLE(left)))
06730         entry->info |= XML_EXP_NILABLE;
06731     if (max < 0)
06732         entry->c_max = -1;
06733     else
06734         entry->c_max = max * entry->exp_left->c_max;
06735     } else {
06736     entry->exp_left = left;
06737     entry->exp_right = right;
06738     if (type == XML_EXP_OR) {
06739         if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
06740         entry->info |= XML_EXP_NILABLE;
06741         if ((entry->exp_left->c_max == -1) ||
06742             (entry->exp_right->c_max == -1))
06743         entry->c_max = -1;
06744         else if (entry->exp_left->c_max > entry->exp_right->c_max)
06745             entry->c_max = entry->exp_left->c_max;
06746         else
06747             entry->c_max = entry->exp_right->c_max;
06748     } else {
06749         if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
06750         entry->info |= XML_EXP_NILABLE;
06751         if ((entry->exp_left->c_max == -1) ||
06752             (entry->exp_right->c_max == -1))
06753         entry->c_max = -1;
06754         else
06755             entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
06756     }
06757     }
06758     entry->ref = 1;
06759     if (ctxt->table[key] != NULL)
06760         entry->next = ctxt->table[key];
06761 
06762     ctxt->table[key] = entry;
06763     ctxt->nbElems++;
06764 
06765     return(entry);
06766 }
06767 
06775 void
06776 xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
06777     if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
06778         return;
06779     exp->ref--;
06780     if (exp->ref == 0) {
06781         unsigned short key;
06782 
06783         /* Unlink it first from the hash table */
06784     key = exp->key % ctxt->size;
06785     if (ctxt->table[key] == exp) {
06786         ctxt->table[key] = exp->next;
06787     } else {
06788         xmlExpNodePtr tmp;
06789 
06790         tmp = ctxt->table[key];
06791         while (tmp != NULL) {
06792             if (tmp->next == exp) {
06793             tmp->next = exp->next;
06794             break;
06795         }
06796             tmp = tmp->next;
06797         }
06798     }
06799 
06800         if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
06801         xmlExpFree(ctxt, exp->exp_left);
06802         xmlExpFree(ctxt, exp->exp_right);
06803     } else if (exp->type == XML_EXP_COUNT) {
06804         xmlExpFree(ctxt, exp->exp_left);
06805     }
06806         xmlFree(exp);
06807     ctxt->nb_nodes--;
06808     }
06809 }
06810 
06817 void
06818 xmlExpRef(xmlExpNodePtr exp) {
06819     if (exp != NULL)
06820         exp->ref++;
06821 }
06822 
06833 xmlExpNodePtr
06834 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
06835     if ((ctxt == NULL) || (name == NULL))
06836         return(NULL);
06837     name = xmlDictLookup(ctxt->dict, name, len);
06838     if (name == NULL)
06839         return(NULL);
06840     return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
06841 }
06842 
06856 xmlExpNodePtr
06857 xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
06858     if (ctxt == NULL)
06859         return(NULL);
06860     if ((left == NULL) || (right == NULL)) {
06861         xmlExpFree(ctxt, left);
06862         xmlExpFree(ctxt, right);
06863         return(NULL);
06864     }
06865     return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
06866 }
06867 
06881 xmlExpNodePtr
06882 xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
06883     if (ctxt == NULL)
06884         return(NULL);
06885     if ((left == NULL) || (right == NULL)) {
06886         xmlExpFree(ctxt, left);
06887         xmlExpFree(ctxt, right);
06888         return(NULL);
06889     }
06890     return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
06891 }
06892 
06907 xmlExpNodePtr
06908 xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
06909     if (ctxt == NULL)
06910         return(NULL);
06911     if ((subset == NULL) || (min < 0) || (max < -1) ||
06912         ((max >= 0) && (min > max))) {
06913     xmlExpFree(ctxt, subset);
06914         return(NULL);
06915     }
06916     return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
06917                               NULL, NULL, min, max));
06918 }
06919 
06920 /************************************************************************
06921  *                                  *
06922  *      Public API for operations on expressions        *
06923  *                                  *
06924  ************************************************************************/
06925 
06926 static int
06927 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
06928                      const xmlChar**list, int len, int nb) {
06929     int tmp, tmp2;
06930 tail:
06931     switch (exp->type) {
06932         case XML_EXP_EMPTY:
06933         return(0);
06934         case XML_EXP_ATOM:
06935         for (tmp = 0;tmp < nb;tmp++)
06936             if (list[tmp] == exp->exp_str)
06937             return(0);
06938             if (nb >= len)
06939             return(-2);
06940         list[nb] = exp->exp_str;
06941         return(1);
06942         case XML_EXP_COUNT:
06943         exp = exp->exp_left;
06944         goto tail;
06945         case XML_EXP_SEQ:
06946         case XML_EXP_OR:
06947         tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
06948         if (tmp < 0)
06949             return(tmp);
06950         tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
06951                                     nb + tmp);
06952         if (tmp2 < 0)
06953             return(tmp2);
06954             return(tmp + tmp2);
06955     }
06956     return(-1);
06957 }
06958 
06971 int
06972 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
06973                   const xmlChar**langList, int len) {
06974     if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
06975         return(-1);
06976     return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
06977 }
06978 
06979 static int
06980 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
06981                   const xmlChar**list, int len, int nb) {
06982     int tmp, tmp2;
06983 tail:
06984     switch (exp->type) {
06985         case XML_EXP_FORBID:
06986         return(0);
06987         case XML_EXP_EMPTY:
06988         return(0);
06989         case XML_EXP_ATOM:
06990         for (tmp = 0;tmp < nb;tmp++)
06991             if (list[tmp] == exp->exp_str)
06992             return(0);
06993             if (nb >= len)
06994             return(-2);
06995         list[nb] = exp->exp_str;
06996         return(1);
06997         case XML_EXP_COUNT:
06998         exp = exp->exp_left;
06999         goto tail;
07000         case XML_EXP_SEQ:
07001         tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
07002         if (tmp < 0)
07003             return(tmp);
07004         if (IS_NILLABLE(exp->exp_left)) {
07005         tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
07006                         nb + tmp);
07007         if (tmp2 < 0)
07008             return(tmp2);
07009         tmp += tmp2;
07010         }
07011             return(tmp);
07012         case XML_EXP_OR:
07013         tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
07014         if (tmp < 0)
07015             return(tmp);
07016         tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
07017                                     nb + tmp);
07018         if (tmp2 < 0)
07019             return(tmp2);
07020             return(tmp + tmp2);
07021     }
07022     return(-1);
07023 }
07024 
07039 int
07040 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
07041                const xmlChar**tokList, int len) {
07042     if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
07043         return(-1);
07044     return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
07045 }
07046 
07055 int
07056 xmlExpIsNillable(xmlExpNodePtr exp) {
07057     if (exp == NULL)
07058         return(-1);
07059     return(IS_NILLABLE(exp) != 0);
07060 }
07061 
07062 static xmlExpNodePtr
07063 xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
07064 {
07065     xmlExpNodePtr ret;
07066 
07067     switch (exp->type) {
07068     case XML_EXP_EMPTY:
07069         return(forbiddenExp);
07070     case XML_EXP_FORBID:
07071         return(forbiddenExp);
07072     case XML_EXP_ATOM:
07073         if (exp->exp_str == str) {
07074 #ifdef DEBUG_DERIV
07075         printf("deriv atom: equal => Empty\n");
07076 #endif
07077             ret = emptyExp;
07078         } else {
07079 #ifdef DEBUG_DERIV
07080         printf("deriv atom: mismatch => forbid\n");
07081 #endif
07082             /* TODO wildcards here */
07083         ret = forbiddenExp;
07084         }
07085         return(ret);
07086     case XML_EXP_OR: {
07087         xmlExpNodePtr tmp;
07088 
07089 #ifdef DEBUG_DERIV
07090         printf("deriv or: => or(derivs)\n");
07091 #endif
07092         tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
07093         if (tmp == NULL) {
07094         return(NULL);
07095         }
07096         ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
07097         if (ret == NULL) {
07098             xmlExpFree(ctxt, tmp);
07099         return(NULL);
07100         }
07101             ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
07102                  NULL, 0, 0);
07103         return(ret);
07104     }
07105     case XML_EXP_SEQ:
07106 #ifdef DEBUG_DERIV
07107         printf("deriv seq: starting with left\n");
07108 #endif
07109         ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
07110         if (ret == NULL) {
07111             return(NULL);
07112         } else if (ret == forbiddenExp) {
07113             if (IS_NILLABLE(exp->exp_left)) {
07114 #ifdef DEBUG_DERIV
07115             printf("deriv seq: left failed but nillable\n");
07116 #endif
07117             ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
07118         }
07119         } else {
07120 #ifdef DEBUG_DERIV
07121         printf("deriv seq: left match => sequence\n");
07122 #endif
07123             exp->exp_right->ref++;
07124             ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
07125                                  NULL, 0, 0);
07126         }
07127         return(ret);
07128     case XML_EXP_COUNT: {
07129         int min, max;
07130         xmlExpNodePtr tmp;
07131 
07132         if (exp->exp_max == 0)
07133         return(forbiddenExp);
07134         ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
07135         if (ret == NULL)
07136             return(NULL);
07137         if (ret == forbiddenExp) {
07138 #ifdef DEBUG_DERIV
07139         printf("deriv count: pattern mismatch => forbid\n");
07140 #endif
07141             return(ret);
07142         }
07143         if (exp->exp_max == 1)
07144         return(ret);
07145         if (exp->exp_max < 0) /* unbounded */
07146         max = -1;
07147         else
07148         max = exp->exp_max - 1;
07149         if (exp->exp_min > 0)
07150         min = exp->exp_min - 1;
07151         else
07152         min = 0;
07153         exp->exp_left->ref++;
07154         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
07155                      NULL, min, max);
07156         if (ret == emptyExp) {
07157 #ifdef DEBUG_DERIV
07158         printf("deriv count: match to empty => new count\n");
07159 #endif
07160             return(tmp);
07161         }
07162 #ifdef DEBUG_DERIV
07163         printf("deriv count: match => sequence with new count\n");
07164 #endif
07165         return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
07166                                   NULL, 0, 0));
07167     }
07168     }
07169     return(NULL);
07170 }
07171 
07184 xmlExpNodePtr
07185 xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
07186                    const xmlChar *str, int len) {
07187     const xmlChar *input;
07188 
07189     if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
07190         return(NULL);
07191     }
07192     /*
07193      * check the string is in the dictionnary, if yes use an interned
07194      * copy, otherwise we know it's not an acceptable input
07195      */
07196     input = xmlDictExists(ctxt->dict, str, len);
07197     if (input == NULL) {
07198         return(forbiddenExp);
07199     }
07200     return(xmlExpStringDeriveInt(ctxt, exp, input));
07201 }
07202 
07203 static int
07204 xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
07205     int ret = 1;
07206 
07207     if (sub->c_max == -1) {
07208         if (exp->c_max != -1)
07209         ret = 0;
07210     } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
07211         ret = 0;
07212     }
07213 #if 0
07214     if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
07215         ret = 0;
07216 #endif
07217     return(ret);
07218 }
07219 
07220 static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
07221                                         xmlExpNodePtr sub);
07237 static int
07238 xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
07239              xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
07240     int i;
07241     xmlExpNodePtr tmp, tmp2;
07242 
07243     if (mult != NULL) *mult = NULL;
07244     if (remain != NULL) *remain = NULL;
07245     if (exp->c_max == -1) return(0);
07246     if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
07247 
07248     for (i = 1;i <= exp->c_max;i++) {
07249         sub->ref++;
07250         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
07251                  sub, NULL, NULL, i, i);
07252     if (tmp == NULL) {
07253         return(-1);
07254     }
07255     if (!xmlExpCheckCard(tmp, exp)) {
07256         xmlExpFree(ctxt, tmp);
07257         continue;
07258     }
07259     tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
07260     if (tmp2 == NULL) {
07261         xmlExpFree(ctxt, tmp);
07262         return(-1);
07263     }
07264     if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
07265         if (remain != NULL)
07266             *remain = tmp2;
07267         else
07268             xmlExpFree(ctxt, tmp2);
07269         if (mult != NULL)
07270             *mult = tmp;
07271         else
07272             xmlExpFree(ctxt, tmp);
07273 #ifdef DEBUG_DERIV
07274         printf("Divide succeeded %d\n", i);
07275 #endif
07276         return(i);
07277     }
07278     xmlExpFree(ctxt, tmp);
07279     xmlExpFree(ctxt, tmp2);
07280     }
07281 #ifdef DEBUG_DERIV
07282     printf("Divide failed\n");
07283 #endif
07284     return(0);
07285 }
07286 
07298 static xmlExpNodePtr
07299 xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
07300     xmlExpNodePtr ret, tmp, tmp2, tmp3;
07301     const xmlChar **tab;
07302     int len, i;
07303 
07304     /*
07305      * In case of equality and if the expression can only consume a finite
07306      * amount, then the derivation is empty
07307      */
07308     if ((exp == sub) && (exp->c_max >= 0)) {
07309 #ifdef DEBUG_DERIV
07310         printf("Equal(exp, sub) and finite -> Empty\n");
07311 #endif
07312         return(emptyExp);
07313     }
07314     /*
07315      * decompose sub sequence first
07316      */
07317     if (sub->type == XML_EXP_EMPTY) {
07318 #ifdef DEBUG_DERIV
07319         printf("Empty(sub) -> Empty\n");
07320 #endif
07321     exp->ref++;
07322         return(exp);
07323     }
07324     if (sub->type == XML_EXP_SEQ) {
07325 #ifdef DEBUG_DERIV
07326         printf("Seq(sub) -> decompose\n");
07327 #endif
07328         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
07329     if (tmp == NULL)
07330         return(NULL);
07331     if (tmp == forbiddenExp)
07332         return(tmp);
07333     ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
07334     xmlExpFree(ctxt, tmp);
07335     return(ret);
07336     }
07337     if (sub->type == XML_EXP_OR) {
07338 #ifdef DEBUG_DERIV
07339         printf("Or(sub) -> decompose\n");
07340 #endif
07341         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
07342     if (tmp == forbiddenExp)
07343         return(tmp);
07344     if (tmp == NULL)
07345         return(NULL);
07346     ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
07347     if ((ret == NULL) || (ret == forbiddenExp)) {
07348         xmlExpFree(ctxt, tmp);
07349         return(ret);
07350     }
07351     return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
07352     }
07353     if (!xmlExpCheckCard(exp, sub)) {
07354 #ifdef DEBUG_DERIV
07355         printf("CheckCard(exp, sub) failed -> Forbid\n");
07356 #endif
07357         return(forbiddenExp);
07358     }
07359     switch (exp->type) {
07360         case XML_EXP_EMPTY:
07361         if (sub == emptyExp)
07362             return(emptyExp);
07363 #ifdef DEBUG_DERIV
07364         printf("Empty(exp) -> Forbid\n");
07365 #endif
07366         return(forbiddenExp);
07367         case XML_EXP_FORBID:
07368 #ifdef DEBUG_DERIV
07369         printf("Forbid(exp) -> Forbid\n");
07370 #endif
07371         return(forbiddenExp);
07372         case XML_EXP_ATOM:
07373         if (sub->type == XML_EXP_ATOM) {
07374             /* TODO: handle wildcards */
07375             if (exp->exp_str == sub->exp_str) {
07376 #ifdef DEBUG_DERIV
07377             printf("Atom match -> Empty\n");
07378 #endif
07379             return(emptyExp);
07380                 }
07381 #ifdef DEBUG_DERIV
07382         printf("Atom mismatch -> Forbid\n");
07383 #endif
07384             return(forbiddenExp);
07385         }
07386         if ((sub->type == XML_EXP_COUNT) &&
07387             (sub->exp_max == 1) &&
07388             (sub->exp_left->type == XML_EXP_ATOM)) {
07389             /* TODO: handle wildcards */
07390             if (exp->exp_str == sub->exp_left->exp_str) {
07391 #ifdef DEBUG_DERIV
07392             printf("Atom match -> Empty\n");
07393 #endif
07394             return(emptyExp);
07395         }
07396 #ifdef DEBUG_DERIV
07397         printf("Atom mismatch -> Forbid\n");
07398 #endif
07399             return(forbiddenExp);
07400         }
07401 #ifdef DEBUG_DERIV
07402         printf("Compex exp vs Atom -> Forbid\n");
07403 #endif
07404         return(forbiddenExp);
07405         case XML_EXP_SEQ:
07406         /* try to get the sequence consumed only if possible */
07407         if (xmlExpCheckCard(exp->exp_left, sub)) {
07408         /* See if the sequence can be consumed directly */
07409 #ifdef DEBUG_DERIV
07410         printf("Seq trying left only\n");
07411 #endif
07412         ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
07413         if ((ret != forbiddenExp) && (ret != NULL)) {
07414 #ifdef DEBUG_DERIV
07415             printf("Seq trying left only worked\n");
07416 #endif
07417             /*
07418              * TODO: assumption here that we are determinist
07419              *       i.e. we won't get to a nillable exp left
07420              *       subset which could be matched by the right
07421              *       part too.
07422              * e.g.: (a | b)+,(a | c) and 'a+,a'
07423              */
07424             exp->exp_right->ref++;
07425             return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
07426                           exp->exp_right, NULL, 0, 0));
07427         }
07428 #ifdef DEBUG_DERIV
07429         } else {
07430         printf("Seq: left too short\n");
07431 #endif
07432         }
07433         /* Try instead to decompose */
07434         if (sub->type == XML_EXP_COUNT) {
07435         int min, max;
07436 
07437 #ifdef DEBUG_DERIV
07438         printf("Seq: sub is a count\n");
07439 #endif
07440             ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
07441         if (ret == NULL)
07442             return(NULL);
07443         if (ret != forbiddenExp) {
07444 #ifdef DEBUG_DERIV
07445             printf("Seq , Count match on left\n");
07446 #endif
07447             if (sub->exp_max < 0)
07448                 max = -1;
07449                 else
07450                 max = sub->exp_max -1;
07451             if (sub->exp_min > 0)
07452                 min = sub->exp_min -1;
07453             else
07454                 min = 0;
07455             exp->exp_right->ref++;
07456             tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
07457                                      exp->exp_right, NULL, 0, 0);
07458             if (tmp == NULL)
07459                 return(NULL);
07460 
07461             sub->exp_left->ref++;
07462             tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
07463                       sub->exp_left, NULL, NULL, min, max);
07464             if (tmp2 == NULL) {
07465                 xmlExpFree(ctxt, tmp);
07466             return(NULL);
07467             }
07468             ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
07469             xmlExpFree(ctxt, tmp);
07470             xmlExpFree(ctxt, tmp2);
07471             return(ret);
07472         }
07473         }
07474         /* we made no progress on structured operations */
07475         break;
07476         case XML_EXP_OR:
07477 #ifdef DEBUG_DERIV
07478         printf("Or , trying both side\n");
07479 #endif
07480         ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
07481         if (ret == NULL)
07482             return(NULL);
07483         tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
07484         if (tmp == NULL) {
07485         xmlExpFree(ctxt, ret);
07486             return(NULL);
07487         }
07488         return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
07489         case XML_EXP_COUNT: {
07490         int min, max;
07491 
07492         if (sub->type == XML_EXP_COUNT) {
07493             /*
07494          * Try to see if the loop is completely subsumed
07495          */
07496             tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
07497         if (tmp == NULL)
07498             return(NULL);
07499         if (tmp == forbiddenExp) {
07500             int mult;
07501 
07502 #ifdef DEBUG_DERIV
07503             printf("Count, Count inner don't subsume\n");
07504 #endif
07505             mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
07506                                 NULL, &tmp);
07507             if (mult <= 0) {
07508 #ifdef DEBUG_DERIV
07509             printf("Count, Count not multiple => forbidden\n");
07510 #endif
07511                         return(forbiddenExp);
07512             }
07513             if (sub->exp_max == -1) {
07514                 max = -1;
07515             if (exp->exp_max == -1) {
07516                 if (exp->exp_min <= sub->exp_min * mult)
07517                     min = 0;
07518                 else
07519                     min = exp->exp_min - sub->exp_min * mult;
07520             } else {
07521 #ifdef DEBUG_DERIV
07522                 printf("Count, Count finite can't subsume infinite\n");
07523 #endif
07524                             xmlExpFree(ctxt, tmp);
07525                 return(forbiddenExp);
07526             }
07527             } else {
07528             if (exp->exp_max == -1) {
07529 #ifdef DEBUG_DERIV
07530                 printf("Infinite loop consume mult finite loop\n");
07531 #endif
07532                 if (exp->exp_min > sub->exp_min * mult) {
07533                 max = -1;
07534                 min = exp->exp_min - sub->exp_min * mult;
07535                 } else {
07536                 max = -1;
07537                 min = 0;
07538                 }
07539             } else {
07540                 if (exp->exp_max < sub->exp_max * mult) {
07541 #ifdef DEBUG_DERIV
07542                 printf("loops max mult mismatch => forbidden\n");
07543 #endif
07544                 xmlExpFree(ctxt, tmp);
07545                 return(forbiddenExp);
07546                 }
07547                 if (sub->exp_max * mult > exp->exp_min)
07548                 min = 0;
07549                 else
07550                 min = exp->exp_min - sub->exp_max * mult;
07551                 max = exp->exp_max - sub->exp_max * mult;
07552             }
07553             }
07554         } else if (!IS_NILLABLE(tmp)) {
07555             /*
07556              * TODO: loop here to try to grow if working on finite
07557              *       blocks.
07558              */
07559 #ifdef DEBUG_DERIV
07560             printf("Count, Count remain not nillable => forbidden\n");
07561 #endif
07562             xmlExpFree(ctxt, tmp);
07563             return(forbiddenExp);
07564         } else if (sub->exp_max == -1) {
07565             if (exp->exp_max == -1) {
07566                 if (exp->exp_min <= sub->exp_min) {
07567 #ifdef DEBUG_DERIV
07568                 printf("Infinite loops Okay => COUNT(0,Inf)\n");
07569 #endif
07570                             max = -1;
07571                 min = 0;
07572             } else {
07573 #ifdef DEBUG_DERIV
07574                 printf("Infinite loops min => Count(X,Inf)\n");
07575 #endif
07576                             max = -1;
07577                 min = exp->exp_min - sub->exp_min;
07578             }
07579             } else if (exp->exp_min > sub->exp_min) {
07580 #ifdef DEBUG_DERIV
07581             printf("loops min mismatch 1 => forbidden ???\n");
07582 #endif
07583                 xmlExpFree(ctxt, tmp);
07584                 return(forbiddenExp);
07585             } else {
07586             max = -1;
07587             min = 0;
07588             }
07589         } else {
07590             if (exp->exp_max == -1) {
07591 #ifdef DEBUG_DERIV
07592             printf("Infinite loop consume finite loop\n");
07593 #endif
07594                 if (exp->exp_min > sub->exp_min) {
07595                 max = -1;
07596                 min = exp->exp_min - sub->exp_min;
07597             } else {
07598                 max = -1;
07599                 min = 0;
07600             }
07601             } else {
07602                 if (exp->exp_max < sub->exp_max) {
07603 #ifdef DEBUG_DERIV
07604                 printf("loops max mismatch => forbidden\n");
07605 #endif
07606                 xmlExpFree(ctxt, tmp);
07607                 return(forbiddenExp);
07608             }
07609             if (sub->exp_max > exp->exp_min)
07610                 min = 0;
07611             else
07612                 min = exp->exp_min - sub->exp_max;
07613             max = exp->exp_max - sub->exp_max;
07614             }
07615         }
07616 #ifdef DEBUG_DERIV
07617         printf("loops match => SEQ(COUNT())\n");
07618 #endif
07619         exp->exp_left->ref++;
07620         tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
07621                                   NULL, NULL, min, max);
07622         if (tmp2 == NULL) {
07623             return(NULL);
07624         }
07625                 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
07626                                  NULL, 0, 0);
07627         return(ret);
07628         }
07629         tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
07630         if (tmp == NULL)
07631         return(NULL);
07632         if (tmp == forbiddenExp) {
07633 #ifdef DEBUG_DERIV
07634         printf("loop mismatch => forbidden\n");
07635 #endif
07636         return(forbiddenExp);
07637         }
07638         if (exp->exp_min > 0)
07639         min = exp->exp_min - 1;
07640         else
07641         min = 0;
07642         if (exp->exp_max < 0)
07643         max = -1;
07644         else
07645         max = exp->exp_max - 1;
07646 
07647 #ifdef DEBUG_DERIV
07648         printf("loop match => SEQ(COUNT())\n");
07649 #endif
07650         exp->exp_left->ref++;
07651         tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
07652                       NULL, NULL, min, max);
07653         if (tmp2 == NULL)
07654         return(NULL);
07655         ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
07656                      NULL, 0, 0);
07657         return(ret);
07658     }
07659     }
07660 
07661 #ifdef DEBUG_DERIV
07662     printf("Fallback to derivative\n");
07663 #endif
07664     if (IS_NILLABLE(sub)) {
07665         if (!(IS_NILLABLE(exp)))
07666         return(forbiddenExp);
07667     else
07668         ret = emptyExp;
07669     } else
07670     ret = NULL;
07671     /*
07672      * here the structured derivation made no progress so
07673      * we use the default token based derivation to force one more step
07674      */
07675     if (ctxt->tabSize == 0)
07676         ctxt->tabSize = 40;
07677 
07678     tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
07679                                    sizeof(const xmlChar *));
07680     if (tab == NULL) {
07681     return(NULL);
07682     }
07683 
07684     /*
07685      * collect all the strings accepted by the subexpression on input
07686      */
07687     len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
07688     while (len < 0) {
07689         const xmlChar **temp;
07690     temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
07691                                          sizeof(const xmlChar *));
07692     if (temp == NULL) {
07693         xmlFree((xmlChar **) tab);
07694         return(NULL);
07695     }
07696     tab = temp;
07697     ctxt->tabSize *= 2;
07698     len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
07699     }
07700     for (i = 0;i < len;i++) {
07701         tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
07702     if ((tmp == NULL) || (tmp == forbiddenExp)) {
07703         xmlExpFree(ctxt, ret);
07704         xmlFree((xmlChar **) tab);
07705         return(tmp);
07706     }
07707     tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
07708     if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
07709         xmlExpFree(ctxt, tmp);
07710         xmlExpFree(ctxt, ret);
07711         xmlFree((xmlChar **) tab);
07712         return(tmp);
07713     }
07714     tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
07715     xmlExpFree(ctxt, tmp);
07716     xmlExpFree(ctxt, tmp2);
07717 
07718     if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
07719         xmlExpFree(ctxt, ret);
07720         xmlFree((xmlChar **) tab);
07721         return(tmp3);
07722     }
07723 
07724     if (ret == NULL)
07725         ret = tmp3;
07726     else {
07727         ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
07728         if (ret == NULL) {
07729         xmlFree((xmlChar **) tab);
07730             return(NULL);
07731         }
07732     }
07733     }
07734     xmlFree((xmlChar **) tab);
07735     return(ret);
07736 }
07737     
07752 xmlExpNodePtr
07753 xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
07754     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
07755         return(NULL);
07756 
07757     /*
07758      * O(1) speedups
07759      */
07760     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
07761 #ifdef DEBUG_DERIV
07762     printf("Sub nillable and not exp : can't subsume\n");
07763 #endif
07764         return(forbiddenExp);
07765     }
07766     if (xmlExpCheckCard(exp, sub) == 0) {
07767 #ifdef DEBUG_DERIV
07768     printf("sub generate longuer sequances than exp : can't subsume\n");
07769 #endif
07770         return(forbiddenExp);
07771     }
07772     return(xmlExpExpDeriveInt(ctxt, exp, sub));
07773 }
07774 
07786 int
07787 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
07788     xmlExpNodePtr tmp;
07789     
07790     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
07791         return(-1);
07792 
07793     /*
07794      * TODO: speedup by checking the language of sub is a subset of the
07795      *       language of exp
07796      */
07797     /*
07798      * O(1) speedups
07799      */
07800     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
07801 #ifdef DEBUG_DERIV
07802     printf("Sub nillable and not exp : can't subsume\n");
07803 #endif
07804         return(0);
07805     }
07806     if (xmlExpCheckCard(exp, sub) == 0) {
07807 #ifdef DEBUG_DERIV
07808     printf("sub generate longuer sequances than exp : can't subsume\n");
07809 #endif
07810         return(0);
07811     }
07812     tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
07813 #ifdef DEBUG_DERIV
07814     printf("Result derivation :\n");
07815     PRINT_EXP(tmp);
07816 #endif
07817     if (tmp == NULL)
07818         return(-1);
07819     if (tmp == forbiddenExp)
07820     return(0);
07821     if (tmp == emptyExp)
07822     return(1);
07823     if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
07824         xmlExpFree(ctxt, tmp);
07825         return(1);
07826     }
07827     xmlExpFree(ctxt, tmp);
07828     return(0);
07829 }
07830 
07831 /************************************************************************
07832  *                                  *
07833  *          Parsing expression              *
07834  *                                  *
07835  ************************************************************************/
07836 
07837 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
07838 
07839 #undef CUR
07840 #define CUR (*ctxt->cur)
07841 #undef NEXT
07842 #define NEXT ctxt->cur++;
07843 #undef IS_BLANK
07844 #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
07845 #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
07846 
07847 static int
07848 xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
07849     int ret = 0;
07850 
07851     SKIP_BLANKS
07852     if (CUR == '*') {
07853     NEXT
07854     return(-1);
07855     }
07856     if ((CUR < '0') || (CUR > '9'))
07857         return(-1);
07858     while ((CUR >= '0') && (CUR <= '9')) {
07859         ret = ret * 10 + (CUR - '0');
07860     NEXT
07861     }
07862     return(ret);
07863 }
07864 
07865 static xmlExpNodePtr
07866 xmlExpParseOr(xmlExpCtxtPtr ctxt) {
07867     const char *base;
07868     xmlExpNodePtr ret;
07869     const xmlChar *val;
07870 
07871     SKIP_BLANKS
07872     base = ctxt->cur;
07873     if (*ctxt->cur == '(') {
07874         NEXT
07875     ret = xmlExpParseExpr(ctxt);
07876     SKIP_BLANKS
07877     if (*ctxt->cur != ')') {
07878         fprintf(stderr, "unbalanced '(' : %s\n", base);
07879         xmlExpFree(ctxt, ret);
07880         return(NULL);
07881     }
07882     NEXT;
07883     SKIP_BLANKS
07884     goto parse_quantifier;
07885     }
07886     while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
07887            (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
07888        (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
07889     NEXT;
07890     val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
07891     if (val == NULL)
07892         return(NULL);
07893     ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
07894     if (ret == NULL)
07895         return(NULL);
07896     SKIP_BLANKS
07897 parse_quantifier:
07898     if (CUR == '{') {
07899         int min, max;
07900 
07901         NEXT
07902     min = xmlExpParseNumber(ctxt);
07903     if (min < 0) {
07904         xmlExpFree(ctxt, ret);
07905         return(NULL);
07906     }
07907     SKIP_BLANKS
07908     if (CUR == ',') {
07909         NEXT
07910         max = xmlExpParseNumber(ctxt);
07911         SKIP_BLANKS
07912     } else
07913         max = min;
07914     if (CUR != '}') {
07915         xmlExpFree(ctxt, ret);
07916         return(NULL);
07917     }
07918         NEXT
07919     ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
07920                              min, max);
07921     SKIP_BLANKS
07922     } else if (CUR == '?') {
07923         NEXT
07924     ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
07925                              0, 1);
07926     SKIP_BLANKS
07927     } else if (CUR == '+') {
07928         NEXT
07929     ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
07930                              1, -1);
07931     SKIP_BLANKS
07932     } else if (CUR == '*') {
07933         NEXT
07934     ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
07935                              0, -1);
07936     SKIP_BLANKS
07937     } 
07938     return(ret);
07939 }
07940 
07941 
07942 static xmlExpNodePtr
07943 xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
07944     xmlExpNodePtr ret, right;
07945 
07946     ret = xmlExpParseOr(ctxt);
07947     SKIP_BLANKS
07948     while (CUR == '|') {
07949         NEXT
07950     right = xmlExpParseOr(ctxt);
07951     if (right == NULL) {
07952         xmlExpFree(ctxt, ret);
07953         return(NULL);
07954     }
07955     ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
07956     if (ret == NULL)
07957         return(NULL);
07958     }
07959     return(ret);
07960 }
07961 
07962 static xmlExpNodePtr
07963 xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
07964     xmlExpNodePtr ret, right;
07965 
07966     ret = xmlExpParseSeq(ctxt);
07967     SKIP_BLANKS
07968     while (CUR == ',') {
07969         NEXT
07970     right = xmlExpParseSeq(ctxt);
07971     if (right == NULL) {
07972         xmlExpFree(ctxt, ret);
07973         return(NULL);
07974     }
07975     ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
07976     if (ret == NULL)
07977         return(NULL);
07978     }
07979     return(ret);
07980 }
07981 
07999 xmlExpNodePtr
08000 xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
08001     xmlExpNodePtr ret;
08002 
08003     ctxt->expr = expr;
08004     ctxt->cur = expr;
08005 
08006     ret = xmlExpParseExpr(ctxt);
08007     SKIP_BLANKS
08008     if (*ctxt->cur != 0) {
08009         xmlExpFree(ctxt, ret);
08010         return(NULL);
08011     }
08012     return(ret);
08013 }
08014 
08015 static void
08016 xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
08017     xmlExpNodePtr c;
08018 
08019     if (expr == NULL) return;
08020     if (glob) xmlBufferWriteChar(buf, "(");
08021     switch (expr->type) {
08022         case XML_EXP_EMPTY:
08023         xmlBufferWriteChar(buf, "empty");
08024         break;
08025         case XML_EXP_FORBID:
08026         xmlBufferWriteChar(buf, "forbidden");
08027         break;
08028         case XML_EXP_ATOM:
08029         xmlBufferWriteCHAR(buf, expr->exp_str);
08030         break;
08031         case XML_EXP_SEQ:
08032         c = expr->exp_left;
08033         if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
08034             xmlExpDumpInt(buf, c, 1);
08035         else
08036             xmlExpDumpInt(buf, c, 0);
08037         xmlBufferWriteChar(buf, " , ");
08038         c = expr->exp_right;
08039         if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
08040             xmlExpDumpInt(buf, c, 1);
08041         else
08042             xmlExpDumpInt(buf, c, 0);
08043             break;
08044         case XML_EXP_OR:
08045         c = expr->exp_left;
08046         if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
08047             xmlExpDumpInt(buf, c, 1);
08048         else
08049             xmlExpDumpInt(buf, c, 0);
08050         xmlBufferWriteChar(buf, " | ");
08051         c = expr->exp_right;
08052         if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
08053             xmlExpDumpInt(buf, c, 1);
08054         else
08055             xmlExpDumpInt(buf, c, 0);
08056             break;
08057         case XML_EXP_COUNT: {
08058         char rep[40];
08059         
08060         c = expr->exp_left;
08061         if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
08062             xmlExpDumpInt(buf, c, 1);
08063         else
08064             xmlExpDumpInt(buf, c, 0);
08065         if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
08066         rep[0] = '?';
08067         rep[1] = 0;
08068         } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
08069         rep[0] = '*';
08070         rep[1] = 0;
08071         } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
08072         rep[0] = '+';
08073         rep[1] = 0;
08074         } else if (expr->exp_max == expr->exp_min) {
08075             snprintf(rep, 39, "{%d}", expr->exp_min);
08076         } else if (expr->exp_max < 0) {
08077             snprintf(rep, 39, "{%d,inf}", expr->exp_min);
08078         } else {
08079             snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
08080         }
08081         rep[39] = 0;
08082         xmlBufferWriteChar(buf, rep);
08083         break;
08084     }
08085     default:
08086         fprintf(stderr, "Error in tree\n");
08087     }
08088     if (glob)
08089         xmlBufferWriteChar(buf, ")");
08090 }
08098 void
08099 xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
08100     if ((buf == NULL) || (expr == NULL))
08101         return;
08102     xmlExpDumpInt(buf, expr, 0);
08103 }
08104 
08113 int
08114 xmlExpMaxToken(xmlExpNodePtr expr) {
08115     if (expr == NULL)
08116         return(-1);
08117     return(expr->c_max);
08118 }
08119 
08128 int
08129 xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
08130     if (ctxt == NULL)
08131         return(-1);
08132     return(ctxt->nb_nodes);
08133 }
08134 
08143 int
08144 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
08145     if (ctxt == NULL)
08146         return(-1);
08147     return(ctxt->nb_cons);
08148 }
08149 
08150 #endif /* LIBXML_EXPR_ENABLED */
08151 #define bottom_xmlregexp
08152 #include "elfgcchack.h"
08153 #endif /* LIBXML_REGEXP_ENABLED */

Generated on Fri May 25 2012 04:33:17 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.