Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenxmlregexp.c
Go to the documentation of this file.
00001 /* 00002 * regexp.c: generic and extensible Regular Expression engine 00003 * 00004 * Basically designed with the purpose of compiling regexps for 00005 * the variety of validation/shemas mechanisms now available in 00006 * XML related specifications these include: 00007 * - XML-1.0 DTD validation 00008 * - XML Schemas structure part 1 00009 * - XML Schemas Datatypes part 2 especially Appendix F 00010 * - RELAX-NG/TREX i.e. the counter proposal 00011 * 00012 * See Copyright for the status of this software. 00013 * 00014 * Daniel Veillard <veillard@redhat.com> 00015 */ 00016 00017 #define IN_LIBXML 00018 #include "libxml.h" 00019 00020 #ifdef LIBXML_REGEXP_ENABLED 00021 00022 /* #define DEBUG_ERR */ 00023 00024 #include <stdio.h> 00025 #include <string.h> 00026 #ifdef HAVE_LIMITS_H 00027 #include <limits.h> 00028 #endif 00029 00030 #include <libxml/tree.h> 00031 #include <libxml/parserInternals.h> 00032 #include <libxml/xmlregexp.h> 00033 #include <libxml/xmlautomata.h> 00034 #include <libxml/xmlunicode.h> 00035 00036 #ifndef INT_MAX 00037 #define INT_MAX 123456789 /* easy to flag and big enough for our needs */ 00038 #endif 00039 00040 /* #define DEBUG_REGEXP_GRAPH */ 00041 /* #define DEBUG_REGEXP_EXEC */ 00042 /* #define DEBUG_PUSH */ 00043 /* #define DEBUG_COMPACTION */ 00044 00045 #define MAX_PUSH 10000000 00046 00047 #define ERROR(str) \ 00048 ctxt->error = XML_REGEXP_COMPILE_ERROR; \ 00049 xmlRegexpErrCompile(ctxt, str); 00050 #define NEXT ctxt->cur++ 00051 #define CUR (*(ctxt->cur)) 00052 #define NXT(index) (ctxt->cur[index]) 00053 00054 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) 00055 #define NEXTL(l) ctxt->cur += l; 00056 #define XML_REG_STRING_SEPARATOR '|' 00057 /* 00058 * Need PREV to check on a '-' within a Character Group. May only be used 00059 * when it's guaranteed that cur is not at the beginning of ctxt->string! 00060 */ 00061 #define PREV (ctxt->cur[-1]) 00062 00068 #define TODO \ 00069 xmlGenericError(xmlGenericErrorContext, \ 00070 "Unimplemented block at %s:%d\n", \ 00071 __FILE__, __LINE__); 00072 00073 /************************************************************************ 00074 * * 00075 * Datatypes and structures * 00076 * * 00077 ************************************************************************/ 00078 00079 /* 00080 * Note: the order of the enums below is significant, do not shuffle 00081 */ 00082 typedef enum { 00083 XML_REGEXP_EPSILON = 1, 00084 XML_REGEXP_CHARVAL, 00085 XML_REGEXP_RANGES, 00086 XML_REGEXP_SUBREG, /* used for () sub regexps */ 00087 XML_REGEXP_STRING, 00088 XML_REGEXP_ANYCHAR, /* . */ 00089 XML_REGEXP_ANYSPACE, /* \s */ 00090 XML_REGEXP_NOTSPACE, /* \S */ 00091 XML_REGEXP_INITNAME, /* \l */ 00092 XML_REGEXP_NOTINITNAME, /* \L */ 00093 XML_REGEXP_NAMECHAR, /* \c */ 00094 XML_REGEXP_NOTNAMECHAR, /* \C */ 00095 XML_REGEXP_DECIMAL, /* \d */ 00096 XML_REGEXP_NOTDECIMAL, /* \D */ 00097 XML_REGEXP_REALCHAR, /* \w */ 00098 XML_REGEXP_NOTREALCHAR, /* \W */ 00099 XML_REGEXP_LETTER = 100, 00100 XML_REGEXP_LETTER_UPPERCASE, 00101 XML_REGEXP_LETTER_LOWERCASE, 00102 XML_REGEXP_LETTER_TITLECASE, 00103 XML_REGEXP_LETTER_MODIFIER, 00104 XML_REGEXP_LETTER_OTHERS, 00105 XML_REGEXP_MARK, 00106 XML_REGEXP_MARK_NONSPACING, 00107 XML_REGEXP_MARK_SPACECOMBINING, 00108 XML_REGEXP_MARK_ENCLOSING, 00109 XML_REGEXP_NUMBER, 00110 XML_REGEXP_NUMBER_DECIMAL, 00111 XML_REGEXP_NUMBER_LETTER, 00112 XML_REGEXP_NUMBER_OTHERS, 00113 XML_REGEXP_PUNCT, 00114 XML_REGEXP_PUNCT_CONNECTOR, 00115 XML_REGEXP_PUNCT_DASH, 00116 XML_REGEXP_PUNCT_OPEN, 00117 XML_REGEXP_PUNCT_CLOSE, 00118 XML_REGEXP_PUNCT_INITQUOTE, 00119 XML_REGEXP_PUNCT_FINQUOTE, 00120 XML_REGEXP_PUNCT_OTHERS, 00121 XML_REGEXP_SEPAR, 00122 XML_REGEXP_SEPAR_SPACE, 00123 XML_REGEXP_SEPAR_LINE, 00124 XML_REGEXP_SEPAR_PARA, 00125 XML_REGEXP_SYMBOL, 00126 XML_REGEXP_SYMBOL_MATH, 00127 XML_REGEXP_SYMBOL_CURRENCY, 00128 XML_REGEXP_SYMBOL_MODIFIER, 00129 XML_REGEXP_SYMBOL_OTHERS, 00130 XML_REGEXP_OTHER, 00131 XML_REGEXP_OTHER_CONTROL, 00132 XML_REGEXP_OTHER_FORMAT, 00133 XML_REGEXP_OTHER_PRIVATE, 00134 XML_REGEXP_OTHER_NA, 00135 XML_REGEXP_BLOCK_NAME 00136 } xmlRegAtomType; 00137 00138 typedef enum { 00139 XML_REGEXP_QUANT_EPSILON = 1, 00140 XML_REGEXP_QUANT_ONCE, 00141 XML_REGEXP_QUANT_OPT, 00142 XML_REGEXP_QUANT_MULT, 00143 XML_REGEXP_QUANT_PLUS, 00144 XML_REGEXP_QUANT_ONCEONLY, 00145 XML_REGEXP_QUANT_ALL, 00146 XML_REGEXP_QUANT_RANGE 00147 } xmlRegQuantType; 00148 00149 typedef enum { 00150 XML_REGEXP_START_STATE = 1, 00151 XML_REGEXP_FINAL_STATE, 00152 XML_REGEXP_TRANS_STATE, 00153 XML_REGEXP_SINK_STATE, 00154 XML_REGEXP_UNREACH_STATE 00155 } xmlRegStateType; 00156 00157 typedef enum { 00158 XML_REGEXP_MARK_NORMAL = 0, 00159 XML_REGEXP_MARK_START, 00160 XML_REGEXP_MARK_VISITED 00161 } xmlRegMarkedType; 00162 00163 typedef struct _xmlRegRange xmlRegRange; 00164 typedef xmlRegRange *xmlRegRangePtr; 00165 00166 struct _xmlRegRange { 00167 int neg; /* 0 normal, 1 not, 2 exclude */ 00168 xmlRegAtomType type; 00169 int start; 00170 int end; 00171 xmlChar *blockName; 00172 }; 00173 00174 typedef struct _xmlRegAtom xmlRegAtom; 00175 typedef xmlRegAtom *xmlRegAtomPtr; 00176 00177 typedef struct _xmlAutomataState xmlRegState; 00178 typedef xmlRegState *xmlRegStatePtr; 00179 00180 struct _xmlRegAtom { 00181 int no; 00182 xmlRegAtomType type; 00183 xmlRegQuantType quant; 00184 int min; 00185 int max; 00186 00187 void *valuep; 00188 void *valuep2; 00189 int neg; 00190 int codepoint; 00191 xmlRegStatePtr start; 00192 xmlRegStatePtr start0; 00193 xmlRegStatePtr stop; 00194 int maxRanges; 00195 int nbRanges; 00196 xmlRegRangePtr *ranges; 00197 void *data; 00198 }; 00199 00200 typedef struct _xmlRegCounter xmlRegCounter; 00201 typedef xmlRegCounter *xmlRegCounterPtr; 00202 00203 struct _xmlRegCounter { 00204 int min; 00205 int max; 00206 }; 00207 00208 typedef struct _xmlRegTrans xmlRegTrans; 00209 typedef xmlRegTrans *xmlRegTransPtr; 00210 00211 struct _xmlRegTrans { 00212 xmlRegAtomPtr atom; 00213 int to; 00214 int counter; 00215 int count; 00216 int nd; 00217 }; 00218 00219 struct _xmlAutomataState { 00220 xmlRegStateType type; 00221 xmlRegMarkedType mark; 00222 xmlRegMarkedType reached; 00223 int no; 00224 int maxTrans; 00225 int nbTrans; 00226 xmlRegTrans *trans; 00227 /* knowing states ponting to us can speed things up */ 00228 int maxTransTo; 00229 int nbTransTo; 00230 int *transTo; 00231 }; 00232 00233 typedef struct _xmlAutomata xmlRegParserCtxt; 00234 typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; 00235 00236 #define AM_AUTOMATA_RNG 1 00237 00238 struct _xmlAutomata { 00239 xmlChar *string; 00240 xmlChar *cur; 00241 00242 int error; 00243 int neg; 00244 00245 xmlRegStatePtr start; 00246 xmlRegStatePtr end; 00247 xmlRegStatePtr state; 00248 00249 xmlRegAtomPtr atom; 00250 00251 int maxAtoms; 00252 int nbAtoms; 00253 xmlRegAtomPtr *atoms; 00254 00255 int maxStates; 00256 int nbStates; 00257 xmlRegStatePtr *states; 00258 00259 int maxCounters; 00260 int nbCounters; 00261 xmlRegCounter *counters; 00262 00263 int determinist; 00264 int negs; 00265 int flags; 00266 }; 00267 00268 struct _xmlRegexp { 00269 xmlChar *string; 00270 int nbStates; 00271 xmlRegStatePtr *states; 00272 int nbAtoms; 00273 xmlRegAtomPtr *atoms; 00274 int nbCounters; 00275 xmlRegCounter *counters; 00276 int determinist; 00277 int flags; 00278 /* 00279 * That's the compact form for determinists automatas 00280 */ 00281 int nbstates; 00282 int *compact; 00283 void **transdata; 00284 int nbstrings; 00285 xmlChar **stringMap; 00286 }; 00287 00288 typedef struct _xmlRegExecRollback xmlRegExecRollback; 00289 typedef xmlRegExecRollback *xmlRegExecRollbackPtr; 00290 00291 struct _xmlRegExecRollback { 00292 xmlRegStatePtr state;/* the current state */ 00293 int index; /* the index in the input stack */ 00294 int nextbranch; /* the next transition to explore in that state */ 00295 int *counts; /* save the automata state if it has some */ 00296 }; 00297 00298 typedef struct _xmlRegInputToken xmlRegInputToken; 00299 typedef xmlRegInputToken *xmlRegInputTokenPtr; 00300 00301 struct _xmlRegInputToken { 00302 xmlChar *value; 00303 void *data; 00304 }; 00305 00306 struct _xmlRegExecCtxt { 00307 int status; /* execution status != 0 indicate an error */ 00308 int determinist; /* did we find an indeterministic behaviour */ 00309 xmlRegexpPtr comp; /* the compiled regexp */ 00310 xmlRegExecCallbacks callback; 00311 void *data; 00312 00313 xmlRegStatePtr state;/* the current state */ 00314 int transno; /* the current transition on that state */ 00315 int transcount; /* the number of chars in char counted transitions */ 00316 00317 /* 00318 * A stack of rollback states 00319 */ 00320 int maxRollbacks; 00321 int nbRollbacks; 00322 xmlRegExecRollback *rollbacks; 00323 00324 /* 00325 * The state of the automata if any 00326 */ 00327 int *counts; 00328 00329 /* 00330 * The input stack 00331 */ 00332 int inputStackMax; 00333 int inputStackNr; 00334 int index; 00335 int *charStack; 00336 const xmlChar *inputString; /* when operating on characters */ 00337 xmlRegInputTokenPtr inputStack;/* when operating on strings */ 00338 00339 /* 00340 * error handling 00341 */ 00342 int errStateNo; /* the error state number */ 00343 xmlRegStatePtr errState; /* the error state */ 00344 xmlChar *errString; /* the string raising the error */ 00345 int *errCounts; /* counters at the error state */ 00346 int nbPush; 00347 }; 00348 00349 #define REGEXP_ALL_COUNTER 0x123456 00350 #define REGEXP_ALL_LAX_COUNTER 0x123457 00351 00352 static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); 00353 static void xmlRegFreeState(xmlRegStatePtr state); 00354 static void xmlRegFreeAtom(xmlRegAtomPtr atom); 00355 static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); 00356 static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint); 00357 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, 00358 int neg, int start, int end, const xmlChar *blockName); 00359 00360 void xmlAutomataSetFlags(xmlAutomataPtr am, int flags); 00361 00362 /************************************************************************ 00363 * * 00364 * Regexp memory error handler * 00365 * * 00366 ************************************************************************/ 00373 static void 00374 xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) 00375 { 00376 const char *regexp = NULL; 00377 if (ctxt != NULL) { 00378 regexp = (const char *) ctxt->string; 00379 ctxt->error = XML_ERR_NO_MEMORY; 00380 } 00381 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 00382 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 00383 regexp, NULL, 0, 0, 00384 "Memory allocation failed : %s\n", extra); 00385 } 00386 00393 static void 00394 xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra) 00395 { 00396 const char *regexp = NULL; 00397 int idx = 0; 00398 00399 if (ctxt != NULL) { 00400 regexp = (const char *) ctxt->string; 00401 idx = ctxt->cur - ctxt->string; 00402 ctxt->error = XML_REGEXP_COMPILE_ERROR; 00403 } 00404 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 00405 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, 00406 regexp, NULL, idx, 0, 00407 "failed to compile: %s\n", extra); 00408 } 00409 00410 /************************************************************************ 00411 * * 00412 * Allocation/Deallocation * 00413 * * 00414 ************************************************************************/ 00415 00416 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); 00425 static xmlRegexpPtr 00426 xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { 00427 xmlRegexpPtr ret; 00428 00429 ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); 00430 if (ret == NULL) { 00431 xmlRegexpErrMemory(ctxt, "compiling regexp"); 00432 return(NULL); 00433 } 00434 memset(ret, 0, sizeof(xmlRegexp)); 00435 ret->string = ctxt->string; 00436 ret->nbStates = ctxt->nbStates; 00437 ret->states = ctxt->states; 00438 ret->nbAtoms = ctxt->nbAtoms; 00439 ret->atoms = ctxt->atoms; 00440 ret->nbCounters = ctxt->nbCounters; 00441 ret->counters = ctxt->counters; 00442 ret->determinist = ctxt->determinist; 00443 ret->flags = ctxt->flags; 00444 if (ret->determinist == -1) { 00445 xmlRegexpIsDeterminist(ret); 00446 } 00447 00448 if ((ret->determinist != 0) && 00449 (ret->nbCounters == 0) && 00450 (ctxt->negs == 0) && 00451 (ret->atoms != NULL) && 00452 (ret->atoms[0] != NULL) && 00453 (ret->atoms[0]->type == XML_REGEXP_STRING)) { 00454 int i, j, nbstates = 0, nbatoms = 0; 00455 int *stateRemap; 00456 int *stringRemap; 00457 int *transitions; 00458 void **transdata; 00459 xmlChar **stringMap; 00460 xmlChar *value; 00461 00462 /* 00463 * Switch to a compact representation 00464 * 1/ counting the effective number of states left 00465 * 2/ counting the unique number of atoms, and check that 00466 * they are all of the string type 00467 * 3/ build a table state x atom for the transitions 00468 */ 00469 00470 stateRemap = xmlMalloc(ret->nbStates * sizeof(int)); 00471 if (stateRemap == NULL) { 00472 xmlRegexpErrMemory(ctxt, "compiling regexp"); 00473 xmlFree(ret); 00474 return(NULL); 00475 } 00476 for (i = 0;i < ret->nbStates;i++) { 00477 if (ret->states[i] != NULL) { 00478 stateRemap[i] = nbstates; 00479 nbstates++; 00480 } else { 00481 stateRemap[i] = -1; 00482 } 00483 } 00484 #ifdef DEBUG_COMPACTION 00485 printf("Final: %d states\n", nbstates); 00486 #endif 00487 stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *)); 00488 if (stringMap == NULL) { 00489 xmlRegexpErrMemory(ctxt, "compiling regexp"); 00490 xmlFree(stateRemap); 00491 xmlFree(ret); 00492 return(NULL); 00493 } 00494 stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int)); 00495 if (stringRemap == NULL) { 00496 xmlRegexpErrMemory(ctxt, "compiling regexp"); 00497 xmlFree(stringMap); 00498 xmlFree(stateRemap); 00499 xmlFree(ret); 00500 return(NULL); 00501 } 00502 for (i = 0;i < ret->nbAtoms;i++) { 00503 if ((ret->atoms[i]->type == XML_REGEXP_STRING) && 00504 (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { 00505 value = ret->atoms[i]->valuep; 00506 for (j = 0;j < nbatoms;j++) { 00507 if (xmlStrEqual(stringMap[j], value)) { 00508 stringRemap[i] = j; 00509 break; 00510 } 00511 } 00512 if (j >= nbatoms) { 00513 stringRemap[i] = nbatoms; 00514 stringMap[nbatoms] = xmlStrdup(value); 00515 if (stringMap[nbatoms] == NULL) { 00516 for (i = 0;i < nbatoms;i++) 00517 xmlFree(stringMap[i]); 00518 xmlFree(stringRemap); 00519 xmlFree(stringMap); 00520 xmlFree(stateRemap); 00521 xmlFree(ret); 00522 return(NULL); 00523 } 00524 nbatoms++; 00525 } 00526 } else { 00527 xmlFree(stateRemap); 00528 xmlFree(stringRemap); 00529 for (i = 0;i < nbatoms;i++) 00530 xmlFree(stringMap[i]); 00531 xmlFree(stringMap); 00532 xmlFree(ret); 00533 return(NULL); 00534 } 00535 } 00536 #ifdef DEBUG_COMPACTION 00537 printf("Final: %d atoms\n", nbatoms); 00538 #endif 00539 transitions = (int *) xmlMalloc((nbstates + 1) * 00540 (nbatoms + 1) * sizeof(int)); 00541 if (transitions == NULL) { 00542 xmlFree(stateRemap); 00543 xmlFree(stringRemap); 00544 xmlFree(stringMap); 00545 xmlFree(ret); 00546 return(NULL); 00547 } 00548 memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int)); 00549 00550 /* 00551 * Allocate the transition table. The first entry for each 00552 * state corresponds to the state type. 00553 */ 00554 transdata = NULL; 00555 00556 for (i = 0;i < ret->nbStates;i++) { 00557 int stateno, atomno, targetno, prev; 00558 xmlRegStatePtr state; 00559 xmlRegTransPtr trans; 00560 00561 stateno = stateRemap[i]; 00562 if (stateno == -1) 00563 continue; 00564 state = ret->states[i]; 00565 00566 transitions[stateno * (nbatoms + 1)] = state->type; 00567 00568 for (j = 0;j < state->nbTrans;j++) { 00569 trans = &(state->trans[j]); 00570 if ((trans->to == -1) || (trans->atom == NULL)) 00571 continue; 00572 atomno = stringRemap[trans->atom->no]; 00573 if ((trans->atom->data != NULL) && (transdata == NULL)) { 00574 transdata = (void **) xmlMalloc(nbstates * nbatoms * 00575 sizeof(void *)); 00576 if (transdata != NULL) 00577 memset(transdata, 0, 00578 nbstates * nbatoms * sizeof(void *)); 00579 else { 00580 xmlRegexpErrMemory(ctxt, "compiling regexp"); 00581 break; 00582 } 00583 } 00584 targetno = stateRemap[trans->to]; 00585 /* 00586 * if the same atom can generate transitions to 2 different 00587 * states then it means the automata is not determinist and 00588 * the compact form can't be used ! 00589 */ 00590 prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; 00591 if (prev != 0) { 00592 if (prev != targetno + 1) { 00593 ret->determinist = 0; 00594 #ifdef DEBUG_COMPACTION 00595 printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", 00596 i, j, trans->atom->no, trans->to, atomno, targetno); 00597 printf(" previous to is %d\n", prev); 00598 #endif 00599 if (transdata != NULL) 00600 xmlFree(transdata); 00601 xmlFree(transitions); 00602 xmlFree(stateRemap); 00603 xmlFree(stringRemap); 00604 for (i = 0;i < nbatoms;i++) 00605 xmlFree(stringMap[i]); 00606 xmlFree(stringMap); 00607 goto not_determ; 00608 } 00609 } else { 00610 #if 0 00611 printf("State %d trans %d: atom %d to %d : %d to %d\n", 00612 i, j, trans->atom->no, trans->to, atomno, targetno); 00613 #endif 00614 transitions[stateno * (nbatoms + 1) + atomno + 1] = 00615 targetno + 1; /* to avoid 0 */ 00616 if (transdata != NULL) 00617 transdata[stateno * nbatoms + atomno] = 00618 trans->atom->data; 00619 } 00620 } 00621 } 00622 ret->determinist = 1; 00623 #ifdef DEBUG_COMPACTION 00624 /* 00625 * Debug 00626 */ 00627 for (i = 0;i < nbstates;i++) { 00628 for (j = 0;j < nbatoms + 1;j++) { 00629 printf("%02d ", transitions[i * (nbatoms + 1) + j]); 00630 } 00631 printf("\n"); 00632 } 00633 printf("\n"); 00634 #endif 00635 /* 00636 * Cleanup of the old data 00637 */ 00638 if (ret->states != NULL) { 00639 for (i = 0;i < ret->nbStates;i++) 00640 xmlRegFreeState(ret->states[i]); 00641 xmlFree(ret->states); 00642 } 00643 ret->states = NULL; 00644 ret->nbStates = 0; 00645 if (ret->atoms != NULL) { 00646 for (i = 0;i < ret->nbAtoms;i++) 00647 xmlRegFreeAtom(ret->atoms[i]); 00648 xmlFree(ret->atoms); 00649 } 00650 ret->atoms = NULL; 00651 ret->nbAtoms = 0; 00652 00653 ret->compact = transitions; 00654 ret->transdata = transdata; 00655 ret->stringMap = stringMap; 00656 ret->nbstrings = nbatoms; 00657 ret->nbstates = nbstates; 00658 xmlFree(stateRemap); 00659 xmlFree(stringRemap); 00660 } 00661 not_determ: 00662 ctxt->string = NULL; 00663 ctxt->nbStates = 0; 00664 ctxt->states = NULL; 00665 ctxt->nbAtoms = 0; 00666 ctxt->atoms = NULL; 00667 ctxt->nbCounters = 0; 00668 ctxt->counters = NULL; 00669 return(ret); 00670 } 00671 00680 static xmlRegParserCtxtPtr 00681 xmlRegNewParserCtxt(const xmlChar *string) { 00682 xmlRegParserCtxtPtr ret; 00683 00684 ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); 00685 if (ret == NULL) 00686 return(NULL); 00687 memset(ret, 0, sizeof(xmlRegParserCtxt)); 00688 if (string != NULL) 00689 ret->string = xmlStrdup(string); 00690 ret->cur = ret->string; 00691 ret->neg = 0; 00692 ret->negs = 0; 00693 ret->error = 0; 00694 ret->determinist = -1; 00695 return(ret); 00696 } 00697 00710 static xmlRegRangePtr 00711 xmlRegNewRange(xmlRegParserCtxtPtr ctxt, 00712 int neg, xmlRegAtomType type, int start, int end) { 00713 xmlRegRangePtr ret; 00714 00715 ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); 00716 if (ret == NULL) { 00717 xmlRegexpErrMemory(ctxt, "allocating range"); 00718 return(NULL); 00719 } 00720 ret->neg = neg; 00721 ret->type = type; 00722 ret->start = start; 00723 ret->end = end; 00724 return(ret); 00725 } 00726 00733 static void 00734 xmlRegFreeRange(xmlRegRangePtr range) { 00735 if (range == NULL) 00736 return; 00737 00738 if (range->blockName != NULL) 00739 xmlFree(range->blockName); 00740 xmlFree(range); 00741 } 00742 00751 static xmlRegRangePtr 00752 xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) { 00753 xmlRegRangePtr ret; 00754 00755 if (range == NULL) 00756 return(NULL); 00757 00758 ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start, 00759 range->end); 00760 if (ret == NULL) 00761 return(NULL); 00762 if (range->blockName != NULL) { 00763 ret->blockName = xmlStrdup(range->blockName); 00764 if (ret->blockName == NULL) { 00765 xmlRegexpErrMemory(ctxt, "allocating range"); 00766 xmlRegFreeRange(ret); 00767 return(NULL); 00768 } 00769 } 00770 return(ret); 00771 } 00772 00782 static xmlRegAtomPtr 00783 xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { 00784 xmlRegAtomPtr ret; 00785 00786 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); 00787 if (ret == NULL) { 00788 xmlRegexpErrMemory(ctxt, "allocating atom"); 00789 return(NULL); 00790 } 00791 memset(ret, 0, sizeof(xmlRegAtom)); 00792 ret->type = type; 00793 ret->quant = XML_REGEXP_QUANT_ONCE; 00794 ret->min = 0; 00795 ret->max = 0; 00796 return(ret); 00797 } 00798 00805 static void 00806 xmlRegFreeAtom(xmlRegAtomPtr atom) { 00807 int i; 00808 00809 if (atom == NULL) 00810 return; 00811 00812 for (i = 0;i < atom->nbRanges;i++) 00813 xmlRegFreeRange(atom->ranges[i]); 00814 if (atom->ranges != NULL) 00815 xmlFree(atom->ranges); 00816 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL)) 00817 xmlFree(atom->valuep); 00818 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL)) 00819 xmlFree(atom->valuep2); 00820 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL)) 00821 xmlFree(atom->valuep); 00822 xmlFree(atom); 00823 } 00824 00834 static xmlRegAtomPtr 00835 xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { 00836 xmlRegAtomPtr ret; 00837 00838 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); 00839 if (ret == NULL) { 00840 xmlRegexpErrMemory(ctxt, "copying atom"); 00841 return(NULL); 00842 } 00843 memset(ret, 0, sizeof(xmlRegAtom)); 00844 ret->type = atom->type; 00845 ret->quant = atom->quant; 00846 ret->min = atom->min; 00847 ret->max = atom->max; 00848 if (atom->nbRanges > 0) { 00849 int i; 00850 00851 ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) * 00852 atom->nbRanges); 00853 if (ret->ranges == NULL) { 00854 xmlRegexpErrMemory(ctxt, "copying atom"); 00855 goto error; 00856 } 00857 for (i = 0;i < atom->nbRanges;i++) { 00858 ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]); 00859 if (ret->ranges[i] == NULL) 00860 goto error; 00861 ret->nbRanges = i + 1; 00862 } 00863 } 00864 return(ret); 00865 00866 error: 00867 xmlRegFreeAtom(ret); 00868 return(NULL); 00869 } 00870 00871 static xmlRegStatePtr 00872 xmlRegNewState(xmlRegParserCtxtPtr ctxt) { 00873 xmlRegStatePtr ret; 00874 00875 ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); 00876 if (ret == NULL) { 00877 xmlRegexpErrMemory(ctxt, "allocating state"); 00878 return(NULL); 00879 } 00880 memset(ret, 0, sizeof(xmlRegState)); 00881 ret->type = XML_REGEXP_TRANS_STATE; 00882 ret->mark = XML_REGEXP_MARK_NORMAL; 00883 return(ret); 00884 } 00885 00892 static void 00893 xmlRegFreeState(xmlRegStatePtr state) { 00894 if (state == NULL) 00895 return; 00896 00897 if (state->trans != NULL) 00898 xmlFree(state->trans); 00899 if (state->transTo != NULL) 00900 xmlFree(state->transTo); 00901 xmlFree(state); 00902 } 00903 00910 static void 00911 xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { 00912 int i; 00913 if (ctxt == NULL) 00914 return; 00915 00916 if (ctxt->string != NULL) 00917 xmlFree(ctxt->string); 00918 if (ctxt->states != NULL) { 00919 for (i = 0;i < ctxt->nbStates;i++) 00920 xmlRegFreeState(ctxt->states[i]); 00921 xmlFree(ctxt->states); 00922 } 00923 if (ctxt->atoms != NULL) { 00924 for (i = 0;i < ctxt->nbAtoms;i++) 00925 xmlRegFreeAtom(ctxt->atoms[i]); 00926 xmlFree(ctxt->atoms); 00927 } 00928 if (ctxt->counters != NULL) 00929 xmlFree(ctxt->counters); 00930 xmlFree(ctxt); 00931 } 00932 00933 /************************************************************************ 00934 * * 00935 * Display of Data structures * 00936 * * 00937 ************************************************************************/ 00938 00939 static void 00940 xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { 00941 switch (type) { 00942 case XML_REGEXP_EPSILON: 00943 fprintf(output, "epsilon "); break; 00944 case XML_REGEXP_CHARVAL: 00945 fprintf(output, "charval "); break; 00946 case XML_REGEXP_RANGES: 00947 fprintf(output, "ranges "); break; 00948 case XML_REGEXP_SUBREG: 00949 fprintf(output, "subexpr "); break; 00950 case XML_REGEXP_STRING: 00951 fprintf(output, "string "); break; 00952 case XML_REGEXP_ANYCHAR: 00953 fprintf(output, "anychar "); break; 00954 case XML_REGEXP_ANYSPACE: 00955 fprintf(output, "anyspace "); break; 00956 case XML_REGEXP_NOTSPACE: 00957 fprintf(output, "notspace "); break; 00958 case XML_REGEXP_INITNAME: 00959 fprintf(output, "initname "); break; 00960 case XML_REGEXP_NOTINITNAME: 00961 fprintf(output, "notinitname "); break; 00962 case XML_REGEXP_NAMECHAR: 00963 fprintf(output, "namechar "); break; 00964 case XML_REGEXP_NOTNAMECHAR: 00965 fprintf(output, "notnamechar "); break; 00966 case XML_REGEXP_DECIMAL: 00967 fprintf(output, "decimal "); break; 00968 case XML_REGEXP_NOTDECIMAL: 00969 fprintf(output, "notdecimal "); break; 00970 case XML_REGEXP_REALCHAR: 00971 fprintf(output, "realchar "); break; 00972 case XML_REGEXP_NOTREALCHAR: 00973 fprintf(output, "notrealchar "); break; 00974 case XML_REGEXP_LETTER: 00975 fprintf(output, "LETTER "); break; 00976 case XML_REGEXP_LETTER_UPPERCASE: 00977 fprintf(output, "LETTER_UPPERCASE "); break; 00978 case XML_REGEXP_LETTER_LOWERCASE: 00979 fprintf(output, "LETTER_LOWERCASE "); break; 00980 case XML_REGEXP_LETTER_TITLECASE: 00981 fprintf(output, "LETTER_TITLECASE "); break; 00982 case XML_REGEXP_LETTER_MODIFIER: 00983 fprintf(output, "LETTER_MODIFIER "); break; 00984 case XML_REGEXP_LETTER_OTHERS: 00985 fprintf(output, "LETTER_OTHERS "); break; 00986 case XML_REGEXP_MARK: 00987 fprintf(output, "MARK "); break; 00988 case XML_REGEXP_MARK_NONSPACING: 00989 fprintf(output, "MARK_NONSPACING "); break; 00990 case XML_REGEXP_MARK_SPACECOMBINING: 00991 fprintf(output, "MARK_SPACECOMBINING "); break; 00992 case XML_REGEXP_MARK_ENCLOSING: 00993 fprintf(output, "MARK_ENCLOSING "); break; 00994 case XML_REGEXP_NUMBER: 00995 fprintf(output, "NUMBER "); break; 00996 case XML_REGEXP_NUMBER_DECIMAL: 00997 fprintf(output, "NUMBER_DECIMAL "); break; 00998 case XML_REGEXP_NUMBER_LETTER: 00999 fprintf(output, "NUMBER_LETTER "); break; 01000 case XML_REGEXP_NUMBER_OTHERS: 01001 fprintf(output, "NUMBER_OTHERS "); break; 01002 case XML_REGEXP_PUNCT: 01003 fprintf(output, "PUNCT "); break; 01004 case XML_REGEXP_PUNCT_CONNECTOR: 01005 fprintf(output, "PUNCT_CONNECTOR "); break; 01006 case XML_REGEXP_PUNCT_DASH: 01007 fprintf(output, "PUNCT_DASH "); break; 01008 case XML_REGEXP_PUNCT_OPEN: 01009 fprintf(output, "PUNCT_OPEN "); break; 01010 case XML_REGEXP_PUNCT_CLOSE: 01011 fprintf(output, "PUNCT_CLOSE "); break; 01012 case XML_REGEXP_PUNCT_INITQUOTE: 01013 fprintf(output, "PUNCT_INITQUOTE "); break; 01014 case XML_REGEXP_PUNCT_FINQUOTE: 01015 fprintf(output, "PUNCT_FINQUOTE "); break; 01016 case XML_REGEXP_PUNCT_OTHERS: 01017 fprintf(output, "PUNCT_OTHERS "); break; 01018 case XML_REGEXP_SEPAR: 01019 fprintf(output, "SEPAR "); break; 01020 case XML_REGEXP_SEPAR_SPACE: 01021 fprintf(output, "SEPAR_SPACE "); break; 01022 case XML_REGEXP_SEPAR_LINE: 01023 fprintf(output, "SEPAR_LINE "); break; 01024 case XML_REGEXP_SEPAR_PARA: 01025 fprintf(output, "SEPAR_PARA "); break; 01026 case XML_REGEXP_SYMBOL: 01027 fprintf(output, "SYMBOL "); break; 01028 case XML_REGEXP_SYMBOL_MATH: 01029 fprintf(output, "SYMBOL_MATH "); break; 01030 case XML_REGEXP_SYMBOL_CURRENCY: 01031 fprintf(output, "SYMBOL_CURRENCY "); break; 01032 case XML_REGEXP_SYMBOL_MODIFIER: 01033 fprintf(output, "SYMBOL_MODIFIER "); break; 01034 case XML_REGEXP_SYMBOL_OTHERS: 01035 fprintf(output, "SYMBOL_OTHERS "); break; 01036 case XML_REGEXP_OTHER: 01037 fprintf(output, "OTHER "); break; 01038 case XML_REGEXP_OTHER_CONTROL: 01039 fprintf(output, "OTHER_CONTROL "); break; 01040 case XML_REGEXP_OTHER_FORMAT: 01041 fprintf(output, "OTHER_FORMAT "); break; 01042 case XML_REGEXP_OTHER_PRIVATE: 01043 fprintf(output, "OTHER_PRIVATE "); break; 01044 case XML_REGEXP_OTHER_NA: 01045 fprintf(output, "OTHER_NA "); break; 01046 case XML_REGEXP_BLOCK_NAME: 01047 fprintf(output, "BLOCK "); break; 01048 } 01049 } 01050 01051 static void 01052 xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { 01053 switch (type) { 01054 case XML_REGEXP_QUANT_EPSILON: 01055 fprintf(output, "epsilon "); break; 01056 case XML_REGEXP_QUANT_ONCE: 01057 fprintf(output, "once "); break; 01058 case XML_REGEXP_QUANT_OPT: 01059 fprintf(output, "? "); break; 01060 case XML_REGEXP_QUANT_MULT: 01061 fprintf(output, "* "); break; 01062 case XML_REGEXP_QUANT_PLUS: 01063 fprintf(output, "+ "); break; 01064 case XML_REGEXP_QUANT_RANGE: 01065 fprintf(output, "range "); break; 01066 case XML_REGEXP_QUANT_ONCEONLY: 01067 fprintf(output, "onceonly "); break; 01068 case XML_REGEXP_QUANT_ALL: 01069 fprintf(output, "all "); break; 01070 } 01071 } 01072 static void 01073 xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { 01074 fprintf(output, " range: "); 01075 if (range->neg) 01076 fprintf(output, "negative "); 01077 xmlRegPrintAtomType(output, range->type); 01078 fprintf(output, "%c - %c\n", range->start, range->end); 01079 } 01080 01081 static void 01082 xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { 01083 fprintf(output, " atom: "); 01084 if (atom == NULL) { 01085 fprintf(output, "NULL\n"); 01086 return; 01087 } 01088 if (atom->neg) 01089 fprintf(output, "not "); 01090 xmlRegPrintAtomType(output, atom->type); 01091 xmlRegPrintQuantType(output, atom->quant); 01092 if (atom->quant == XML_REGEXP_QUANT_RANGE) 01093 fprintf(output, "%d-%d ", atom->min, atom->max); 01094 if (atom->type == XML_REGEXP_STRING) 01095 fprintf(output, "'%s' ", (char *) atom->valuep); 01096 if (atom->type == XML_REGEXP_CHARVAL) 01097 fprintf(output, "char %c\n", atom->codepoint); 01098 else if (atom->type == XML_REGEXP_RANGES) { 01099 int i; 01100 fprintf(output, "%d entries\n", atom->nbRanges); 01101 for (i = 0; i < atom->nbRanges;i++) 01102 xmlRegPrintRange(output, atom->ranges[i]); 01103 } else if (atom->type == XML_REGEXP_SUBREG) { 01104 fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); 01105 } else { 01106 fprintf(output, "\n"); 01107 } 01108 } 01109 01110 static void 01111 xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { 01112 fprintf(output, " trans: "); 01113 if (trans == NULL) { 01114 fprintf(output, "NULL\n"); 01115 return; 01116 } 01117 if (trans->to < 0) { 01118 fprintf(output, "removed\n"); 01119 return; 01120 } 01121 if (trans->nd != 0) { 01122 if (trans->nd == 2) 01123 fprintf(output, "last not determinist, "); 01124 else 01125 fprintf(output, "not determinist, "); 01126 } 01127 if (trans->counter >= 0) { 01128 fprintf(output, "counted %d, ", trans->counter); 01129 } 01130 if (trans->count == REGEXP_ALL_COUNTER) { 01131 fprintf(output, "all transition, "); 01132 } else if (trans->count >= 0) { 01133 fprintf(output, "count based %d, ", trans->count); 01134 } 01135 if (trans->atom == NULL) { 01136 fprintf(output, "epsilon to %d\n", trans->to); 01137 return; 01138 } 01139 if (trans->atom->type == XML_REGEXP_CHARVAL) 01140 fprintf(output, "char %c ", trans->atom->codepoint); 01141 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); 01142 } 01143 01144 static void 01145 xmlRegPrintState(FILE *output, xmlRegStatePtr state) { 01146 int i; 01147 01148 fprintf(output, " state: "); 01149 if (state == NULL) { 01150 fprintf(output, "NULL\n"); 01151 return; 01152 } 01153 if (state->type == XML_REGEXP_START_STATE) 01154 fprintf(output, "START "); 01155 if (state->type == XML_REGEXP_FINAL_STATE) 01156 fprintf(output, "FINAL "); 01157 01158 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); 01159 for (i = 0;i < state->nbTrans; i++) { 01160 xmlRegPrintTrans(output, &(state->trans[i])); 01161 } 01162 } 01163 01164 #ifdef DEBUG_REGEXP_GRAPH 01165 static void 01166 xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { 01167 int i; 01168 01169 fprintf(output, " ctxt: "); 01170 if (ctxt == NULL) { 01171 fprintf(output, "NULL\n"); 01172 return; 01173 } 01174 fprintf(output, "'%s' ", ctxt->string); 01175 if (ctxt->error) 01176 fprintf(output, "error "); 01177 if (ctxt->neg) 01178 fprintf(output, "neg "); 01179 fprintf(output, "\n"); 01180 fprintf(output, "%d atoms:\n", ctxt->nbAtoms); 01181 for (i = 0;i < ctxt->nbAtoms; i++) { 01182 fprintf(output, " %02d ", i); 01183 xmlRegPrintAtom(output, ctxt->atoms[i]); 01184 } 01185 if (ctxt->atom != NULL) { 01186 fprintf(output, "current atom:\n"); 01187 xmlRegPrintAtom(output, ctxt->atom); 01188 } 01189 fprintf(output, "%d states:", ctxt->nbStates); 01190 if (ctxt->start != NULL) 01191 fprintf(output, " start: %d", ctxt->start->no); 01192 if (ctxt->end != NULL) 01193 fprintf(output, " end: %d", ctxt->end->no); 01194 fprintf(output, "\n"); 01195 for (i = 0;i < ctxt->nbStates; i++) { 01196 xmlRegPrintState(output, ctxt->states[i]); 01197 } 01198 fprintf(output, "%d counters:\n", ctxt->nbCounters); 01199 for (i = 0;i < ctxt->nbCounters; i++) { 01200 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, 01201 ctxt->counters[i].max); 01202 } 01203 } 01204 #endif 01205 01206 /************************************************************************ 01207 * * 01208 * Finite Automata structures manipulations * 01209 * * 01210 ************************************************************************/ 01211 01212 static void 01213 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, 01214 int neg, xmlRegAtomType type, int start, int end, 01215 xmlChar *blockName) { 01216 xmlRegRangePtr range; 01217 01218 if (atom == NULL) { 01219 ERROR("add range: atom is NULL"); 01220 return; 01221 } 01222 if (atom->type != XML_REGEXP_RANGES) { 01223 ERROR("add range: atom is not ranges"); 01224 return; 01225 } 01226 if (atom->maxRanges == 0) { 01227 atom->maxRanges = 4; 01228 atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * 01229 sizeof(xmlRegRangePtr)); 01230 if (atom->ranges == NULL) { 01231 xmlRegexpErrMemory(ctxt, "adding ranges"); 01232 atom->maxRanges = 0; 01233 return; 01234 } 01235 } else if (atom->nbRanges >= atom->maxRanges) { 01236 xmlRegRangePtr *tmp; 01237 atom->maxRanges *= 2; 01238 tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * 01239 sizeof(xmlRegRangePtr)); 01240 if (tmp == NULL) { 01241 xmlRegexpErrMemory(ctxt, "adding ranges"); 01242 atom->maxRanges /= 2; 01243 return; 01244 } 01245 atom->ranges = tmp; 01246 } 01247 range = xmlRegNewRange(ctxt, neg, type, start, end); 01248 if (range == NULL) 01249 return; 01250 range->blockName = blockName; 01251 atom->ranges[atom->nbRanges++] = range; 01252 01253 } 01254 01255 static int 01256 xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { 01257 if (ctxt->maxCounters == 0) { 01258 ctxt->maxCounters = 4; 01259 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * 01260 sizeof(xmlRegCounter)); 01261 if (ctxt->counters == NULL) { 01262 xmlRegexpErrMemory(ctxt, "allocating counter"); 01263 ctxt->maxCounters = 0; 01264 return(-1); 01265 } 01266 } else if (ctxt->nbCounters >= ctxt->maxCounters) { 01267 xmlRegCounter *tmp; 01268 ctxt->maxCounters *= 2; 01269 tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * 01270 sizeof(xmlRegCounter)); 01271 if (tmp == NULL) { 01272 xmlRegexpErrMemory(ctxt, "allocating counter"); 01273 ctxt->maxCounters /= 2; 01274 return(-1); 01275 } 01276 ctxt->counters = tmp; 01277 } 01278 ctxt->counters[ctxt->nbCounters].min = -1; 01279 ctxt->counters[ctxt->nbCounters].max = -1; 01280 return(ctxt->nbCounters++); 01281 } 01282 01283 static int 01284 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { 01285 if (atom == NULL) { 01286 ERROR("atom push: atom is NULL"); 01287 return(-1); 01288 } 01289 if (ctxt->maxAtoms == 0) { 01290 ctxt->maxAtoms = 4; 01291 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * 01292 sizeof(xmlRegAtomPtr)); 01293 if (ctxt->atoms == NULL) { 01294 xmlRegexpErrMemory(ctxt, "pushing atom"); 01295 ctxt->maxAtoms = 0; 01296 return(-1); 01297 } 01298 } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { 01299 xmlRegAtomPtr *tmp; 01300 ctxt->maxAtoms *= 2; 01301 tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * 01302 sizeof(xmlRegAtomPtr)); 01303 if (tmp == NULL) { 01304 xmlRegexpErrMemory(ctxt, "allocating counter"); 01305 ctxt->maxAtoms /= 2; 01306 return(-1); 01307 } 01308 ctxt->atoms = tmp; 01309 } 01310 atom->no = ctxt->nbAtoms; 01311 ctxt->atoms[ctxt->nbAtoms++] = atom; 01312 return(0); 01313 } 01314 01315 static void 01316 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, 01317 int from) { 01318 if (target->maxTransTo == 0) { 01319 target->maxTransTo = 8; 01320 target->transTo = (int *) xmlMalloc(target->maxTransTo * 01321 sizeof(int)); 01322 if (target->transTo == NULL) { 01323 xmlRegexpErrMemory(ctxt, "adding transition"); 01324 target->maxTransTo = 0; 01325 return; 01326 } 01327 } else if (target->nbTransTo >= target->maxTransTo) { 01328 int *tmp; 01329 target->maxTransTo *= 2; 01330 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * 01331 sizeof(int)); 01332 if (tmp == NULL) { 01333 xmlRegexpErrMemory(ctxt, "adding transition"); 01334 target->maxTransTo /= 2; 01335 return; 01336 } 01337 target->transTo = tmp; 01338 } 01339 target->transTo[target->nbTransTo] = from; 01340 target->nbTransTo++; 01341 } 01342 01343 static void 01344 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 01345 xmlRegAtomPtr atom, xmlRegStatePtr target, 01346 int counter, int count) { 01347 01348 int nrtrans; 01349 01350 if (state == NULL) { 01351 ERROR("add state: state is NULL"); 01352 return; 01353 } 01354 if (target == NULL) { 01355 ERROR("add state: target is NULL"); 01356 return; 01357 } 01358 /* 01359 * Other routines follow the philosophy 'When in doubt, add a transition' 01360 * so we check here whether such a transition is already present and, if 01361 * so, silently ignore this request. 01362 */ 01363 01364 for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) { 01365 xmlRegTransPtr trans = &(state->trans[nrtrans]); 01366 if ((trans->atom == atom) && 01367 (trans->to == target->no) && 01368 (trans->counter == counter) && 01369 (trans->count == count)) { 01370 #ifdef DEBUG_REGEXP_GRAPH 01371 printf("Ignoring duplicate transition from %d to %d\n", 01372 state->no, target->no); 01373 #endif 01374 return; 01375 } 01376 } 01377 01378 if (state->maxTrans == 0) { 01379 state->maxTrans = 8; 01380 state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * 01381 sizeof(xmlRegTrans)); 01382 if (state->trans == NULL) { 01383 xmlRegexpErrMemory(ctxt, "adding transition"); 01384 state->maxTrans = 0; 01385 return; 01386 } 01387 } else if (state->nbTrans >= state->maxTrans) { 01388 xmlRegTrans *tmp; 01389 state->maxTrans *= 2; 01390 tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * 01391 sizeof(xmlRegTrans)); 01392 if (tmp == NULL) { 01393 xmlRegexpErrMemory(ctxt, "adding transition"); 01394 state->maxTrans /= 2; 01395 return; 01396 } 01397 state->trans = tmp; 01398 } 01399 #ifdef DEBUG_REGEXP_GRAPH 01400 printf("Add trans from %d to %d ", state->no, target->no); 01401 if (count == REGEXP_ALL_COUNTER) 01402 printf("all transition\n"); 01403 else if (count >= 0) 01404 printf("count based %d\n", count); 01405 else if (counter >= 0) 01406 printf("counted %d\n", counter); 01407 else if (atom == NULL) 01408 printf("epsilon transition\n"); 01409 else if (atom != NULL) 01410 xmlRegPrintAtom(stdout, atom); 01411 #endif 01412 01413 state->trans[state->nbTrans].atom = atom; 01414 state->trans[state->nbTrans].to = target->no; 01415 state->trans[state->nbTrans].counter = counter; 01416 state->trans[state->nbTrans].count = count; 01417 state->trans[state->nbTrans].nd = 0; 01418 state->nbTrans++; 01419 xmlRegStateAddTransTo(ctxt, target, state->no); 01420 } 01421 01422 static int 01423 xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { 01424 if (state == NULL) return(-1); 01425 if (ctxt->maxStates == 0) { 01426 ctxt->maxStates = 4; 01427 ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * 01428 sizeof(xmlRegStatePtr)); 01429 if (ctxt->states == NULL) { 01430 xmlRegexpErrMemory(ctxt, "adding state"); 01431 ctxt->maxStates = 0; 01432 return(-1); 01433 } 01434 } else if (ctxt->nbStates >= ctxt->maxStates) { 01435 xmlRegStatePtr *tmp; 01436 ctxt->maxStates *= 2; 01437 tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * 01438 sizeof(xmlRegStatePtr)); 01439 if (tmp == NULL) { 01440 xmlRegexpErrMemory(ctxt, "adding state"); 01441 ctxt->maxStates /= 2; 01442 return(-1); 01443 } 01444 ctxt->states = tmp; 01445 } 01446 state->no = ctxt->nbStates; 01447 ctxt->states[ctxt->nbStates++] = state; 01448 return(0); 01449 } 01450 01459 static void 01460 xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, 01461 xmlRegStatePtr from, xmlRegStatePtr to, 01462 int lax) { 01463 if (to == NULL) { 01464 to = xmlRegNewState(ctxt); 01465 xmlRegStatePush(ctxt, to); 01466 ctxt->state = to; 01467 } 01468 if (lax) 01469 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER); 01470 else 01471 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER); 01472 } 01473 01481 static void 01482 xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, 01483 xmlRegStatePtr from, xmlRegStatePtr to) { 01484 if (to == NULL) { 01485 to = xmlRegNewState(ctxt); 01486 xmlRegStatePush(ctxt, to); 01487 ctxt->state = to; 01488 } 01489 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); 01490 } 01491 01500 static void 01501 xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, 01502 xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 01503 if (to == NULL) { 01504 to = xmlRegNewState(ctxt); 01505 xmlRegStatePush(ctxt, to); 01506 ctxt->state = to; 01507 } 01508 xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); 01509 } 01510 01519 static void 01520 xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, 01521 xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 01522 if (to == NULL) { 01523 to = xmlRegNewState(ctxt); 01524 xmlRegStatePush(ctxt, to); 01525 ctxt->state = to; 01526 } 01527 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); 01528 } 01529 01539 static int 01540 xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, 01541 xmlRegStatePtr to, xmlRegAtomPtr atom) { 01542 xmlRegStatePtr end; 01543 01544 if (atom == NULL) { 01545 ERROR("genrate transition: atom == NULL"); 01546 return(-1); 01547 } 01548 if (atom->type == XML_REGEXP_SUBREG) { 01549 /* 01550 * this is a subexpression handling one should not need to 01551 * create a new node except for XML_REGEXP_QUANT_RANGE. 01552 */ 01553 if (xmlRegAtomPush(ctxt, atom) < 0) { 01554 return(-1); 01555 } 01556 if ((to != NULL) && (atom->stop != to) && 01557 (atom->quant != XML_REGEXP_QUANT_RANGE)) { 01558 /* 01559 * Generate an epsilon transition to link to the target 01560 */ 01561 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 01562 #ifdef DV 01563 } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 01564 (atom->quant != XML_REGEXP_QUANT_ONCE)) { 01565 to = xmlRegNewState(ctxt); 01566 xmlRegStatePush(ctxt, to); 01567 ctxt->state = to; 01568 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 01569 #endif 01570 } 01571 switch (atom->quant) { 01572 case XML_REGEXP_QUANT_OPT: 01573 atom->quant = XML_REGEXP_QUANT_ONCE; 01574 /* 01575 * transition done to the state after end of atom. 01576 * 1. set transition from atom start to new state 01577 * 2. set transition from atom end to this state. 01578 */ 01579 if (to == NULL) { 01580 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); 01581 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, 01582 ctxt->state); 01583 } else { 01584 xmlFAGenerateEpsilonTransition(ctxt, atom->start, to); 01585 } 01586 break; 01587 case XML_REGEXP_QUANT_MULT: 01588 atom->quant = XML_REGEXP_QUANT_ONCE; 01589 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); 01590 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 01591 break; 01592 case XML_REGEXP_QUANT_PLUS: 01593 atom->quant = XML_REGEXP_QUANT_ONCE; 01594 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 01595 break; 01596 case XML_REGEXP_QUANT_RANGE: { 01597 int counter; 01598 xmlRegStatePtr inter, newstate; 01599 01600 /* 01601 * create the final state now if needed 01602 */ 01603 if (to != NULL) { 01604 newstate = to; 01605 } else { 01606 newstate = xmlRegNewState(ctxt); 01607 xmlRegStatePush(ctxt, newstate); 01608 } 01609 01610 /* 01611 * The principle here is to use counted transition 01612 * to avoid explosion in the number of states in the 01613 * graph. This is clearly more complex but should not 01614 * be exploitable at runtime. 01615 */ 01616 if ((atom->min == 0) && (atom->start0 == NULL)) { 01617 xmlRegAtomPtr copy; 01618 /* 01619 * duplicate a transition based on atom to count next 01620 * occurences after 1. We cannot loop to atom->start 01621 * directly because we need an epsilon transition to 01622 * newstate. 01623 */ 01624 /* ???? For some reason it seems we never reach that 01625 case, I suppose this got optimized out before when 01626 building the automata */ 01627 copy = xmlRegCopyAtom(ctxt, atom); 01628 if (copy == NULL) 01629 return(-1); 01630 copy->quant = XML_REGEXP_QUANT_ONCE; 01631 copy->min = 0; 01632 copy->max = 0; 01633 01634 if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy) 01635 < 0) 01636 return(-1); 01637 inter = ctxt->state; 01638 counter = xmlRegGetCounter(ctxt); 01639 ctxt->counters[counter].min = atom->min - 1; 01640 ctxt->counters[counter].max = atom->max - 1; 01641 /* count the number of times we see it again */ 01642 xmlFAGenerateCountedEpsilonTransition(ctxt, inter, 01643 atom->stop, counter); 01644 /* allow a way out based on the count */ 01645 xmlFAGenerateCountedTransition(ctxt, inter, 01646 newstate, counter); 01647 /* and also allow a direct exit for 0 */ 01648 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 01649 newstate); 01650 } else { 01651 /* 01652 * either we need the atom at least once or there 01653 * is an atom->start0 allowing to easilly plug the 01654 * epsilon transition. 01655 */ 01656 counter = xmlRegGetCounter(ctxt); 01657 ctxt->counters[counter].min = atom->min - 1; 01658 ctxt->counters[counter].max = atom->max - 1; 01659 /* count the number of times we see it again */ 01660 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, 01661 atom->start, counter); 01662 /* allow a way out based on the count */ 01663 xmlFAGenerateCountedTransition(ctxt, atom->stop, 01664 newstate, counter); 01665 /* and if needed allow a direct exit for 0 */ 01666 if (atom->min == 0) 01667 xmlFAGenerateEpsilonTransition(ctxt, atom->start0, 01668 newstate); 01669 01670 } 01671 atom->min = 0; 01672 atom->max = 0; 01673 atom->quant = XML_REGEXP_QUANT_ONCE; 01674 ctxt->state = newstate; 01675 } 01676 default: 01677 break; 01678 } 01679 return(0); 01680 } 01681 if ((atom->min == 0) && (atom->max == 0) && 01682 (atom->quant == XML_REGEXP_QUANT_RANGE)) { 01683 /* 01684 * we can discard the atom and generate an epsilon transition instead 01685 */ 01686 if (to == NULL) { 01687 to = xmlRegNewState(ctxt); 01688 if (to != NULL) 01689 xmlRegStatePush(ctxt, to); 01690 else { 01691 return(-1); 01692 } 01693 } 01694 xmlFAGenerateEpsilonTransition(ctxt, from, to); 01695 ctxt->state = to; 01696 xmlRegFreeAtom(atom); 01697 return(0); 01698 } 01699 if (to == NULL) { 01700 to = xmlRegNewState(ctxt); 01701 if (to != NULL) 01702 xmlRegStatePush(ctxt, to); 01703 else { 01704 return(-1); 01705 } 01706 } 01707 end = to; 01708 if ((atom->quant == XML_REGEXP_QUANT_MULT) || 01709 (atom->quant == XML_REGEXP_QUANT_PLUS)) { 01710 /* 01711 * Do not pollute the target state by adding transitions from 01712 * it as it is likely to be the shared target of multiple branches. 01713 * So isolate with an epsilon transition. 01714 */ 01715 xmlRegStatePtr tmp; 01716 01717 tmp = xmlRegNewState(ctxt); 01718 if (tmp != NULL) 01719 xmlRegStatePush(ctxt, tmp); 01720 else { 01721 return(-1); 01722 } 01723 xmlFAGenerateEpsilonTransition(ctxt, tmp, to); 01724 to = tmp; 01725 } 01726 if (xmlRegAtomPush(ctxt, atom) < 0) { 01727 return(-1); 01728 } 01729 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); 01730 ctxt->state = end; 01731 switch (atom->quant) { 01732 case XML_REGEXP_QUANT_OPT: 01733 atom->quant = XML_REGEXP_QUANT_ONCE; 01734 xmlFAGenerateEpsilonTransition(ctxt, from, to); 01735 break; 01736 case XML_REGEXP_QUANT_MULT: 01737 atom->quant = XML_REGEXP_QUANT_ONCE; 01738 xmlFAGenerateEpsilonTransition(ctxt, from, to); 01739 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); 01740 break; 01741 case XML_REGEXP_QUANT_PLUS: 01742 atom->quant = XML_REGEXP_QUANT_ONCE; 01743 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); 01744 break; 01745 case XML_REGEXP_QUANT_RANGE: 01746 #if DV_test 01747 if (atom->min == 0) { 01748 xmlFAGenerateEpsilonTransition(ctxt, from, to); 01749 } 01750 #endif 01751 break; 01752 default: 01753 break; 01754 } 01755 return(0); 01756 } 01757 01766 static void 01767 xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, 01768 int tonr, int counter) { 01769 int transnr; 01770 xmlRegStatePtr from; 01771 xmlRegStatePtr to; 01772 01773 #ifdef DEBUG_REGEXP_GRAPH 01774 printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); 01775 #endif 01776 from = ctxt->states[fromnr]; 01777 if (from == NULL) 01778 return; 01779 to = ctxt->states[tonr]; 01780 if (to == NULL) 01781 return; 01782 if ((to->mark == XML_REGEXP_MARK_START) || 01783 (to->mark == XML_REGEXP_MARK_VISITED)) 01784 return; 01785 01786 to->mark = XML_REGEXP_MARK_VISITED; 01787 if (to->type == XML_REGEXP_FINAL_STATE) { 01788 #ifdef DEBUG_REGEXP_GRAPH 01789 printf("State %d is final, so %d becomes final\n", tonr, fromnr); 01790 #endif 01791 from->type = XML_REGEXP_FINAL_STATE; 01792 } 01793 for (transnr = 0;transnr < to->nbTrans;transnr++) { 01794 if (to->trans[transnr].to < 0) 01795 continue; 01796 if (to->trans[transnr].atom == NULL) { 01797 /* 01798 * Don't remove counted transitions 01799 * Don't loop either 01800 */ 01801 if (to->trans[transnr].to != fromnr) { 01802 if (to->trans[transnr].count >= 0) { 01803 int newto = to->trans[transnr].to; 01804 01805 xmlRegStateAddTrans(ctxt, from, NULL, 01806 ctxt->states[newto], 01807 -1, to->trans[transnr].count); 01808 } else { 01809 #ifdef DEBUG_REGEXP_GRAPH 01810 printf("Found epsilon trans %d from %d to %d\n", 01811 transnr, tonr, to->trans[transnr].to); 01812 #endif 01813 if (to->trans[transnr].counter >= 0) { 01814 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 01815 to->trans[transnr].to, 01816 to->trans[transnr].counter); 01817 } else { 01818 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 01819 to->trans[transnr].to, 01820 counter); 01821 } 01822 } 01823 } 01824 } else { 01825 int newto = to->trans[transnr].to; 01826 01827 if (to->trans[transnr].counter >= 0) { 01828 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 01829 ctxt->states[newto], 01830 to->trans[transnr].counter, -1); 01831 } else { 01832 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 01833 ctxt->states[newto], counter, -1); 01834 } 01835 } 01836 } 01837 to->mark = XML_REGEXP_MARK_NORMAL; 01838 } 01839 01855 static void 01856 xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 01857 int statenr, i, j, newto; 01858 xmlRegStatePtr state, tmp; 01859 01860 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 01861 state = ctxt->states[statenr]; 01862 if (state == NULL) 01863 continue; 01864 if (state->nbTrans != 1) 01865 continue; 01866 if (state->type == XML_REGEXP_UNREACH_STATE) 01867 continue; 01868 /* is the only transition out a basic transition */ 01869 if ((state->trans[0].atom == NULL) && 01870 (state->trans[0].to >= 0) && 01871 (state->trans[0].to != statenr) && 01872 (state->trans[0].counter < 0) && 01873 (state->trans[0].count < 0)) { 01874 newto = state->trans[0].to; 01875 01876 if (state->type == XML_REGEXP_START_STATE) { 01877 #ifdef DEBUG_REGEXP_GRAPH 01878 printf("Found simple epsilon trans from start %d to %d\n", 01879 statenr, newto); 01880 #endif 01881 } else { 01882 #ifdef DEBUG_REGEXP_GRAPH 01883 printf("Found simple epsilon trans from %d to %d\n", 01884 statenr, newto); 01885 #endif 01886 for (i = 0;i < state->nbTransTo;i++) { 01887 tmp = ctxt->states[state->transTo[i]]; 01888 for (j = 0;j < tmp->nbTrans;j++) { 01889 if (tmp->trans[j].to == statenr) { 01890 #ifdef DEBUG_REGEXP_GRAPH 01891 printf("Changed transition %d on %d to go to %d\n", 01892 j, tmp->no, newto); 01893 #endif 01894 tmp->trans[j].to = -1; 01895 xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, 01896 ctxt->states[newto], 01897 tmp->trans[j].counter, 01898 tmp->trans[j].count); 01899 } 01900 } 01901 } 01902 if (state->type == XML_REGEXP_FINAL_STATE) 01903 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; 01904 /* eliminate the transition completely */ 01905 state->nbTrans = 0; 01906 01907 state->type = XML_REGEXP_UNREACH_STATE; 01908 01909 } 01910 01911 } 01912 } 01913 } 01919 static void 01920 xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 01921 int statenr, transnr; 01922 xmlRegStatePtr state; 01923 int has_epsilon; 01924 01925 if (ctxt->states == NULL) return; 01926 01927 /* 01928 * Eliminate simple epsilon transition and the associated unreachable 01929 * states. 01930 */ 01931 xmlFAEliminateSimpleEpsilonTransitions(ctxt); 01932 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 01933 state = ctxt->states[statenr]; 01934 if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) { 01935 #ifdef DEBUG_REGEXP_GRAPH 01936 printf("Removed unreachable state %d\n", statenr); 01937 #endif 01938 xmlRegFreeState(state); 01939 ctxt->states[statenr] = NULL; 01940 } 01941 } 01942 01943 has_epsilon = 0; 01944 01945 /* 01946 * Build the completed transitions bypassing the epsilons 01947 * Use a marking algorithm to avoid loops 01948 * Mark sink states too. 01949 * Process from the latests states backward to the start when 01950 * there is long cascading epsilon chains this minimize the 01951 * recursions and transition compares when adding the new ones 01952 */ 01953 for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) { 01954 state = ctxt->states[statenr]; 01955 if (state == NULL) 01956 continue; 01957 if ((state->nbTrans == 0) && 01958 (state->type != XML_REGEXP_FINAL_STATE)) { 01959 state->type = XML_REGEXP_SINK_STATE; 01960 } 01961 for (transnr = 0;transnr < state->nbTrans;transnr++) { 01962 if ((state->trans[transnr].atom == NULL) && 01963 (state->trans[transnr].to >= 0)) { 01964 if (state->trans[transnr].to == statenr) { 01965 state->trans[transnr].to = -1; 01966 #ifdef DEBUG_REGEXP_GRAPH 01967 printf("Removed loopback epsilon trans %d on %d\n", 01968 transnr, statenr); 01969 #endif 01970 } else if (state->trans[transnr].count < 0) { 01971 int newto = state->trans[transnr].to; 01972 01973 #ifdef DEBUG_REGEXP_GRAPH 01974 printf("Found epsilon trans %d from %d to %d\n", 01975 transnr, statenr, newto); 01976 #endif 01977 has_epsilon = 1; 01978 state->trans[transnr].to = -2; 01979 state->mark = XML_REGEXP_MARK_START; 01980 xmlFAReduceEpsilonTransitions(ctxt, statenr, 01981 newto, state->trans[transnr].counter); 01982 state->mark = XML_REGEXP_MARK_NORMAL; 01983 #ifdef DEBUG_REGEXP_GRAPH 01984 } else { 01985 printf("Found counted transition %d on %d\n", 01986 transnr, statenr); 01987 #endif 01988 } 01989 } 01990 } 01991 } 01992 /* 01993 * Eliminate the epsilon transitions 01994 */ 01995 if (has_epsilon) { 01996 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 01997 state = ctxt->states[statenr]; 01998 if (state == NULL) 01999 continue; 02000 for (transnr = 0;transnr < state->nbTrans;transnr++) { 02001 xmlRegTransPtr trans = &(state->trans[transnr]); 02002 if ((trans->atom == NULL) && 02003 (trans->count < 0) && 02004 (trans->to >= 0)) { 02005 trans->to = -1; 02006 } 02007 } 02008 } 02009 } 02010 02011 /* 02012 * Use this pass to detect unreachable states too 02013 */ 02014 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 02015 state = ctxt->states[statenr]; 02016 if (state != NULL) 02017 state->reached = XML_REGEXP_MARK_NORMAL; 02018 } 02019 state = ctxt->states[0]; 02020 if (state != NULL) 02021 state->reached = XML_REGEXP_MARK_START; 02022 while (state != NULL) { 02023 xmlRegStatePtr target = NULL; 02024 state->reached = XML_REGEXP_MARK_VISITED; 02025 /* 02026 * Mark all states reachable from the current reachable state 02027 */ 02028 for (transnr = 0;transnr < state->nbTrans;transnr++) { 02029 if ((state->trans[transnr].to >= 0) && 02030 ((state->trans[transnr].atom != NULL) || 02031 (state->trans[transnr].count >= 0))) { 02032 int newto = state->trans[transnr].to; 02033 02034 if (ctxt->states[newto] == NULL) 02035 continue; 02036 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { 02037 ctxt->states[newto]->reached = XML_REGEXP_MARK_START; 02038 target = ctxt->states[newto]; 02039 } 02040 } 02041 } 02042 02043 /* 02044 * find the next accessible state not explored 02045 */ 02046 if (target == NULL) { 02047 for (statenr = 1;statenr < ctxt->nbStates;statenr++) { 02048 state = ctxt->states[statenr]; 02049 if ((state != NULL) && (state->reached == 02050 XML_REGEXP_MARK_START)) { 02051 target = state; 02052 break; 02053 } 02054 } 02055 } 02056 state = target; 02057 } 02058 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 02059 state = ctxt->states[statenr]; 02060 if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) { 02061 #ifdef DEBUG_REGEXP_GRAPH 02062 printf("Removed unreachable state %d\n", statenr); 02063 #endif 02064 xmlRegFreeState(state); 02065 ctxt->states[statenr] = NULL; 02066 } 02067 } 02068 02069 } 02070 02071 static int 02072 xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) { 02073 int ret = 0; 02074 02075 if ((range1->type == XML_REGEXP_RANGES) || 02076 (range2->type == XML_REGEXP_RANGES) || 02077 (range2->type == XML_REGEXP_SUBREG) || 02078 (range1->type == XML_REGEXP_SUBREG) || 02079 (range1->type == XML_REGEXP_STRING) || 02080 (range2->type == XML_REGEXP_STRING)) 02081 return(-1); 02082 02083 /* put them in order */ 02084 if (range1->type > range2->type) { 02085 xmlRegRangePtr tmp; 02086 02087 tmp = range1; 02088 range1 = range2; 02089 range2 = tmp; 02090 } 02091 if ((range1->type == XML_REGEXP_ANYCHAR) || 02092 (range2->type == XML_REGEXP_ANYCHAR)) { 02093 ret = 1; 02094 } else if ((range1->type == XML_REGEXP_EPSILON) || 02095 (range2->type == XML_REGEXP_EPSILON)) { 02096 return(0); 02097 } else if (range1->type == range2->type) { 02098 if (range1->type != XML_REGEXP_CHARVAL) 02099 ret = 1; 02100 else if ((range1->end < range2->start) || 02101 (range2->end < range1->start)) 02102 ret = 0; 02103 else 02104 ret = 1; 02105 } else if (range1->type == XML_REGEXP_CHARVAL) { 02106 int codepoint; 02107 int neg = 0; 02108 02109 /* 02110 * just check all codepoints in the range for acceptance, 02111 * this is usually way cheaper since done only once at 02112 * compilation than testing over and over at runtime or 02113 * pushing too many states when evaluating. 02114 */ 02115 if (((range1->neg == 0) && (range2->neg != 0)) || 02116 ((range1->neg != 0) && (range2->neg == 0))) 02117 neg = 1; 02118 02119 for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { 02120 ret = xmlRegCheckCharacterRange(range2->type, codepoint, 02121 0, range2->start, range2->end, 02122 range2->blockName); 02123 if (ret < 0) 02124 return(-1); 02125 if (((neg == 1) && (ret == 0)) || 02126 ((neg == 0) && (ret == 1))) 02127 return(1); 02128 } 02129 return(0); 02130 } else if ((range1->type == XML_REGEXP_BLOCK_NAME) || 02131 (range2->type == XML_REGEXP_BLOCK_NAME)) { 02132 if (range1->type == range2->type) { 02133 ret = xmlStrEqual(range1->blockName, range2->blockName); 02134 } else { 02135 /* 02136 * comparing a block range with anything else is way 02137 * too costly, and maintining the table is like too much 02138 * memory too, so let's force the automata to save state 02139 * here. 02140 */ 02141 return(1); 02142 } 02143 } else if ((range1->type < XML_REGEXP_LETTER) || 02144 (range2->type < XML_REGEXP_LETTER)) { 02145 if ((range1->type == XML_REGEXP_ANYSPACE) && 02146 (range2->type == XML_REGEXP_NOTSPACE)) 02147 ret = 0; 02148 else if ((range1->type == XML_REGEXP_INITNAME) && 02149 (range2->type == XML_REGEXP_NOTINITNAME)) 02150 ret = 0; 02151 else if ((range1->type == XML_REGEXP_NAMECHAR) && 02152 (range2->type == XML_REGEXP_NOTNAMECHAR)) 02153 ret = 0; 02154 else if ((range1->type == XML_REGEXP_DECIMAL) && 02155 (range2->type == XML_REGEXP_NOTDECIMAL)) 02156 ret = 0; 02157 else if ((range1->type == XML_REGEXP_REALCHAR) && 02158 (range2->type == XML_REGEXP_NOTREALCHAR)) 02159 ret = 0; 02160 else { 02161 /* same thing to limit complexity */ 02162 return(1); 02163 } 02164 } else { 02165 ret = 0; 02166 /* range1->type < range2->type here */ 02167 switch (range1->type) { 02168 case XML_REGEXP_LETTER: 02169 /* all disjoint except in the subgroups */ 02170 if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) || 02171 (range2->type == XML_REGEXP_LETTER_LOWERCASE) || 02172 (range2->type == XML_REGEXP_LETTER_TITLECASE) || 02173 (range2->type == XML_REGEXP_LETTER_MODIFIER) || 02174 (range2->type == XML_REGEXP_LETTER_OTHERS)) 02175 ret = 1; 02176 break; 02177 case XML_REGEXP_MARK: 02178 if ((range2->type == XML_REGEXP_MARK_NONSPACING) || 02179 (range2->type == XML_REGEXP_MARK_SPACECOMBINING) || 02180 (range2->type == XML_REGEXP_MARK_ENCLOSING)) 02181 ret = 1; 02182 break; 02183 case XML_REGEXP_NUMBER: 02184 if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) || 02185 (range2->type == XML_REGEXP_NUMBER_LETTER) || 02186 (range2->type == XML_REGEXP_NUMBER_OTHERS)) 02187 ret = 1; 02188 break; 02189 case XML_REGEXP_PUNCT: 02190 if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) || 02191 (range2->type == XML_REGEXP_PUNCT_DASH) || 02192 (range2->type == XML_REGEXP_PUNCT_OPEN) || 02193 (range2->type == XML_REGEXP_PUNCT_CLOSE) || 02194 (range2->type == XML_REGEXP_PUNCT_INITQUOTE) || 02195 (range2->type == XML_REGEXP_PUNCT_FINQUOTE) || 02196 (range2->type == XML_REGEXP_PUNCT_OTHERS)) 02197 ret = 1; 02198 break; 02199 case XML_REGEXP_SEPAR: 02200 if ((range2->type == XML_REGEXP_SEPAR_SPACE) || 02201 (range2->type == XML_REGEXP_SEPAR_LINE) || 02202 (range2->type == XML_REGEXP_SEPAR_PARA)) 02203 ret = 1; 02204 break; 02205 case XML_REGEXP_SYMBOL: 02206 if ((range2->type == XML_REGEXP_SYMBOL_MATH) || 02207 (range2->type == XML_REGEXP_SYMBOL_CURRENCY) || 02208 (range2->type == XML_REGEXP_SYMBOL_MODIFIER) || 02209 (range2->type == XML_REGEXP_SYMBOL_OTHERS)) 02210 ret = 1; 02211 break; 02212 case XML_REGEXP_OTHER: 02213 if ((range2->type == XML_REGEXP_OTHER_CONTROL) || 02214 (range2->type == XML_REGEXP_OTHER_FORMAT) || 02215 (range2->type == XML_REGEXP_OTHER_PRIVATE)) 02216 ret = 1; 02217 break; 02218 default: 02219 if ((range2->type >= XML_REGEXP_LETTER) && 02220 (range2->type < XML_REGEXP_BLOCK_NAME)) 02221 ret = 0; 02222 else { 02223 /* safety net ! */ 02224 return(1); 02225 } 02226 } 02227 } 02228 if (((range1->neg == 0) && (range2->neg != 0)) || 02229 ((range1->neg != 0) && (range2->neg == 0))) 02230 ret = !ret; 02231 return(ret); 02232 } 02233 02244 static int 02245 xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) { 02246 if ((type1 == XML_REGEXP_EPSILON) || 02247 (type1 == XML_REGEXP_CHARVAL) || 02248 (type1 == XML_REGEXP_RANGES) || 02249 (type1 == XML_REGEXP_SUBREG) || 02250 (type1 == XML_REGEXP_STRING) || 02251 (type1 == XML_REGEXP_ANYCHAR)) 02252 return(1); 02253 if ((type2 == XML_REGEXP_EPSILON) || 02254 (type2 == XML_REGEXP_CHARVAL) || 02255 (type2 == XML_REGEXP_RANGES) || 02256 (type2 == XML_REGEXP_SUBREG) || 02257 (type2 == XML_REGEXP_STRING) || 02258 (type2 == XML_REGEXP_ANYCHAR)) 02259 return(1); 02260 02261 if (type1 == type2) return(1); 02262 02263 /* simplify subsequent compares by making sure type1 < type2 */ 02264 if (type1 > type2) { 02265 xmlRegAtomType tmp = type1; 02266 type1 = type2; 02267 type2 = tmp; 02268 } 02269 switch (type1) { 02270 case XML_REGEXP_ANYSPACE: /* \s */ 02271 /* can't be a letter, number, mark, pontuation, symbol */ 02272 if ((type2 == XML_REGEXP_NOTSPACE) || 02273 ((type2 >= XML_REGEXP_LETTER) && 02274 (type2 <= XML_REGEXP_LETTER_OTHERS)) || 02275 ((type2 >= XML_REGEXP_NUMBER) && 02276 (type2 <= XML_REGEXP_NUMBER_OTHERS)) || 02277 ((type2 >= XML_REGEXP_MARK) && 02278 (type2 <= XML_REGEXP_MARK_ENCLOSING)) || 02279 ((type2 >= XML_REGEXP_PUNCT) && 02280 (type2 <= XML_REGEXP_PUNCT_OTHERS)) || 02281 ((type2 >= XML_REGEXP_SYMBOL) && 02282 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) 02283 ) return(0); 02284 break; 02285 case XML_REGEXP_NOTSPACE: /* \S */ 02286 break; 02287 case XML_REGEXP_INITNAME: /* \l */ 02288 /* can't be a number, mark, separator, pontuation, symbol or other */ 02289 if ((type2 == XML_REGEXP_NOTINITNAME) || 02290 ((type2 >= XML_REGEXP_NUMBER) && 02291 (type2 <= XML_REGEXP_NUMBER_OTHERS)) || 02292 ((type2 >= XML_REGEXP_MARK) && 02293 (type2 <= XML_REGEXP_MARK_ENCLOSING)) || 02294 ((type2 >= XML_REGEXP_SEPAR) && 02295 (type2 <= XML_REGEXP_SEPAR_PARA)) || 02296 ((type2 >= XML_REGEXP_PUNCT) && 02297 (type2 <= XML_REGEXP_PUNCT_OTHERS)) || 02298 ((type2 >= XML_REGEXP_SYMBOL) && 02299 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || 02300 ((type2 >= XML_REGEXP_OTHER) && 02301 (type2 <= XML_REGEXP_OTHER_NA)) 02302 ) return(0); 02303 break; 02304 case XML_REGEXP_NOTINITNAME: /* \L */ 02305 break; 02306 case XML_REGEXP_NAMECHAR: /* \c */ 02307 /* can't be a mark, separator, pontuation, symbol or other */ 02308 if ((type2 == XML_REGEXP_NOTNAMECHAR) || 02309 ((type2 >= XML_REGEXP_MARK) && 02310 (type2 <= XML_REGEXP_MARK_ENCLOSING)) || 02311 ((type2 >= XML_REGEXP_PUNCT) && 02312 (type2 <= XML_REGEXP_PUNCT_OTHERS)) || 02313 ((type2 >= XML_REGEXP_SEPAR) && 02314 (type2 <= XML_REGEXP_SEPAR_PARA)) || 02315 ((type2 >= XML_REGEXP_SYMBOL) && 02316 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || 02317 ((type2 >= XML_REGEXP_OTHER) && 02318 (type2 <= XML_REGEXP_OTHER_NA)) 02319 ) return(0); 02320 break; 02321 case XML_REGEXP_NOTNAMECHAR: /* \C */ 02322 break; 02323 case XML_REGEXP_DECIMAL: /* \d */ 02324 /* can't be a letter, mark, separator, pontuation, symbol or other */ 02325 if ((type2 == XML_REGEXP_NOTDECIMAL) || 02326 (type2 == XML_REGEXP_REALCHAR) || 02327 ((type2 >= XML_REGEXP_LETTER) && 02328 (type2 <= XML_REGEXP_LETTER_OTHERS)) || 02329 ((type2 >= XML_REGEXP_MARK) && 02330 (type2 <= XML_REGEXP_MARK_ENCLOSING)) || 02331 ((type2 >= XML_REGEXP_PUNCT) && 02332 (type2 <= XML_REGEXP_PUNCT_OTHERS)) || 02333 ((type2 >= XML_REGEXP_SEPAR) && 02334 (type2 <= XML_REGEXP_SEPAR_PARA)) || 02335 ((type2 >= XML_REGEXP_SYMBOL) && 02336 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || 02337 ((type2 >= XML_REGEXP_OTHER) && 02338 (type2 <= XML_REGEXP_OTHER_NA)) 02339 )return(0); 02340 break; 02341 case XML_REGEXP_NOTDECIMAL: /* \D */ 02342 break; 02343 case XML_REGEXP_REALCHAR: /* \w */ 02344 /* can't be a mark, separator, pontuation, symbol or other */ 02345 if ((type2 == XML_REGEXP_NOTDECIMAL) || 02346 ((type2 >= XML_REGEXP_MARK) && 02347 (type2 <= XML_REGEXP_MARK_ENCLOSING)) || 02348 ((type2 >= XML_REGEXP_PUNCT) && 02349 (type2 <= XML_REGEXP_PUNCT_OTHERS)) || 02350 ((type2 >= XML_REGEXP_SEPAR) && 02351 (type2 <= XML_REGEXP_SEPAR_PARA)) || 02352 ((type2 >= XML_REGEXP_SYMBOL) && 02353 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || 02354 ((type2 >= XML_REGEXP_OTHER) && 02355 (type2 <= XML_REGEXP_OTHER_NA)) 02356 )return(0); 02357 break; 02358 case XML_REGEXP_NOTREALCHAR: /* \W */ 02359 break; 02360 /* 02361 * at that point we know both type 1 and type2 are from 02362 * character categories are ordered and are different, 02363 * it becomes simple because this is a partition 02364 */ 02365 case XML_REGEXP_LETTER: 02366 if (type2 <= XML_REGEXP_LETTER_OTHERS) 02367 return(1); 02368 return(0); 02369 case XML_REGEXP_LETTER_UPPERCASE: 02370 case XML_REGEXP_LETTER_LOWERCASE: 02371 case XML_REGEXP_LETTER_TITLECASE: 02372 case XML_REGEXP_LETTER_MODIFIER: 02373 case XML_REGEXP_LETTER_OTHERS: 02374 return(0); 02375 case XML_REGEXP_MARK: 02376 if (type2 <= XML_REGEXP_MARK_ENCLOSING) 02377 return(1); 02378 return(0); 02379 case XML_REGEXP_MARK_NONSPACING: 02380 case XML_REGEXP_MARK_SPACECOMBINING: 02381 case XML_REGEXP_MARK_ENCLOSING: 02382 return(0); 02383 case XML_REGEXP_NUMBER: 02384 if (type2 <= XML_REGEXP_NUMBER_OTHERS) 02385 return(1); 02386 return(0); 02387 case XML_REGEXP_NUMBER_DECIMAL: 02388 case XML_REGEXP_NUMBER_LETTER: 02389 case XML_REGEXP_NUMBER_OTHERS: 02390 return(0); 02391 case XML_REGEXP_PUNCT: 02392 if (type2 <= XML_REGEXP_PUNCT_OTHERS) 02393 return(1); 02394 return(0); 02395 case XML_REGEXP_PUNCT_CONNECTOR: 02396 case XML_REGEXP_PUNCT_DASH: 02397 case XML_REGEXP_PUNCT_OPEN: 02398 case XML_REGEXP_PUNCT_CLOSE: 02399 case XML_REGEXP_PUNCT_INITQUOTE: 02400 case XML_REGEXP_PUNCT_FINQUOTE: 02401 case XML_REGEXP_PUNCT_OTHERS: 02402 return(0); 02403 case XML_REGEXP_SEPAR: 02404 if (type2 <= XML_REGEXP_SEPAR_PARA) 02405 return(1); 02406 return(0); 02407 case XML_REGEXP_SEPAR_SPACE: 02408 case XML_REGEXP_SEPAR_LINE: 02409 case XML_REGEXP_SEPAR_PARA: 02410 return(0); 02411 case XML_REGEXP_SYMBOL: 02412 if (type2 <= XML_REGEXP_SYMBOL_OTHERS) 02413 return(1); 02414 return(0); 02415 case XML_REGEXP_SYMBOL_MATH: 02416 case XML_REGEXP_SYMBOL_CURRENCY: 02417 case XML_REGEXP_SYMBOL_MODIFIER: 02418 case XML_REGEXP_SYMBOL_OTHERS: 02419 return(0); 02420 case XML_REGEXP_OTHER: 02421 if (type2 <= XML_REGEXP_OTHER_NA) 02422 return(1); 02423 return(0); 02424 case XML_REGEXP_OTHER_CONTROL: 02425 case XML_REGEXP_OTHER_FORMAT: 02426 case XML_REGEXP_OTHER_PRIVATE: 02427 case XML_REGEXP_OTHER_NA: 02428 return(0); 02429 default: 02430 break; 02431 } 02432 return(1); 02433 } 02434 02446 static int 02447 xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { 02448 int ret = 0; 02449 02450 if (atom1 == atom2) 02451 return(1); 02452 if ((atom1 == NULL) || (atom2 == NULL)) 02453 return(0); 02454 02455 if (atom1->type != atom2->type) 02456 return(0); 02457 switch (atom1->type) { 02458 case XML_REGEXP_EPSILON: 02459 ret = 0; 02460 break; 02461 case XML_REGEXP_STRING: 02462 if (!deep) 02463 ret = (atom1->valuep == atom2->valuep); 02464 else 02465 ret = xmlStrEqual((xmlChar *)atom1->valuep, 02466 (xmlChar *)atom2->valuep); 02467 break; 02468 case XML_REGEXP_CHARVAL: 02469 ret = (atom1->codepoint == atom2->codepoint); 02470 break; 02471 case XML_REGEXP_RANGES: 02472 /* too hard to do in the general case */ 02473 ret = 0; 02474 default: 02475 break; 02476 } 02477 return(ret); 02478 } 02479 02491 static int 02492 xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { 02493 int ret = 1; 02494 02495 if (atom1 == atom2) 02496 return(1); 02497 if ((atom1 == NULL) || (atom2 == NULL)) 02498 return(0); 02499 02500 if ((atom1->type == XML_REGEXP_ANYCHAR) || 02501 (atom2->type == XML_REGEXP_ANYCHAR)) 02502 return(1); 02503 02504 if (atom1->type > atom2->type) { 02505 xmlRegAtomPtr tmp; 02506 tmp = atom1; 02507 atom1 = atom2; 02508 atom2 = tmp; 02509 } 02510 if (atom1->type != atom2->type) { 02511 ret = xmlFACompareAtomTypes(atom1->type, atom2->type); 02512 /* if they can't intersect at the type level break now */ 02513 if (ret == 0) 02514 return(0); 02515 } 02516 switch (atom1->type) { 02517 case XML_REGEXP_STRING: 02518 if (!deep) 02519 ret = (atom1->valuep != atom2->valuep); 02520 else 02521 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep, 02522 (xmlChar *)atom2->valuep); 02523 break; 02524 case XML_REGEXP_EPSILON: 02525 goto not_determinist; 02526 case XML_REGEXP_CHARVAL: 02527 if (atom2->type == XML_REGEXP_CHARVAL) { 02528 ret = (atom1->codepoint == atom2->codepoint); 02529 } else { 02530 ret = xmlRegCheckCharacter(atom2, atom1->codepoint); 02531 if (ret < 0) 02532 ret = 1; 02533 } 02534 break; 02535 case XML_REGEXP_RANGES: 02536 if (atom2->type == XML_REGEXP_RANGES) { 02537 int i, j, res; 02538 xmlRegRangePtr r1, r2; 02539 02540 /* 02541 * need to check that none of the ranges eventually matches 02542 */ 02543 for (i = 0;i < atom1->nbRanges;i++) { 02544 for (j = 0;j < atom2->nbRanges;j++) { 02545 r1 = atom1->ranges[i]; 02546 r2 = atom2->ranges[j]; 02547 res = xmlFACompareRanges(r1, r2); 02548 if (res == 1) { 02549 ret = 1; 02550 goto done; 02551 } 02552 } 02553 } 02554 ret = 0; 02555 } 02556 break; 02557 default: 02558 goto not_determinist; 02559 } 02560 done: 02561 if (atom1->neg != atom2->neg) { 02562 ret = !ret; 02563 } 02564 if (ret == 0) 02565 return(0); 02566 not_determinist: 02567 return(1); 02568 } 02569 02578 static int 02579 xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 02580 int to, xmlRegAtomPtr atom) { 02581 int ret = 1; 02582 int res; 02583 int transnr, nbTrans; 02584 xmlRegTransPtr t1; 02585 int deep = 1; 02586 02587 if (state == NULL) 02588 return(ret); 02589 02590 if (ctxt->flags & AM_AUTOMATA_RNG) 02591 deep = 0; 02592 02593 /* 02594 * don't recurse on transitions potentially added in the course of 02595 * the elimination. 02596 */ 02597 nbTrans = state->nbTrans; 02598 for (transnr = 0;transnr < nbTrans;transnr++) { 02599 t1 = &(state->trans[transnr]); 02600 /* 02601 * check transitions conflicting with the one looked at 02602 */ 02603 if (t1->atom == NULL) { 02604 if (t1->to < 0) 02605 continue; 02606 res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 02607 to, atom); 02608 if (res == 0) { 02609 ret = 0; 02610 /* t1->nd = 1; */ 02611 } 02612 continue; 02613 } 02614 if (t1->to != to) 02615 continue; 02616 if (xmlFACompareAtoms(t1->atom, atom, deep)) { 02617 ret = 0; 02618 /* mark the transition as non-deterministic */ 02619 t1->nd = 1; 02620 } 02621 } 02622 return(ret); 02623 } 02624 02633 static int 02634 xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { 02635 int statenr, transnr; 02636 xmlRegStatePtr state; 02637 xmlRegTransPtr t1, t2, last; 02638 int i; 02639 int ret = 1; 02640 int deep = 1; 02641 02642 #ifdef DEBUG_REGEXP_GRAPH 02643 printf("xmlFAComputesDeterminism\n"); 02644 xmlRegPrintCtxt(stdout, ctxt); 02645 #endif 02646 if (ctxt->determinist != -1) 02647 return(ctxt->determinist); 02648 02649 if (ctxt->flags & AM_AUTOMATA_RNG) 02650 deep = 0; 02651 02652 /* 02653 * First cleanup the automata removing cancelled transitions 02654 */ 02655 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 02656 state = ctxt->states[statenr]; 02657 if (state == NULL) 02658 continue; 02659 if (state->nbTrans < 2) 02660 continue; 02661 for (transnr = 0;transnr < state->nbTrans;transnr++) { 02662 t1 = &(state->trans[transnr]); 02663 /* 02664 * Determinism checks in case of counted or all transitions 02665 * will have to be handled separately 02666 */ 02667 if (t1->atom == NULL) { 02668 /* t1->nd = 1; */ 02669 continue; 02670 } 02671 if (t1->to == -1) /* eliminated */ 02672 continue; 02673 for (i = 0;i < transnr;i++) { 02674 t2 = &(state->trans[i]); 02675 if (t2->to == -1) /* eliminated */ 02676 continue; 02677 if (t2->atom != NULL) { 02678 if (t1->to == t2->to) { 02679 /* 02680 * Here we use deep because we want to keep the 02681 * transitions which indicate a conflict 02682 */ 02683 if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) && 02684 (t1->counter == t2->counter) && 02685 (t1->count == t2->count)) 02686 t2->to = -1; /* eliminated */ 02687 } 02688 } 02689 } 02690 } 02691 } 02692 02693 /* 02694 * Check for all states that there aren't 2 transitions 02695 * with the same atom and a different target. 02696 */ 02697 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 02698 state = ctxt->states[statenr]; 02699 if (state == NULL) 02700 continue; 02701 if (state->nbTrans < 2) 02702 continue; 02703 last = NULL; 02704 for (transnr = 0;transnr < state->nbTrans;transnr++) { 02705 t1 = &(state->trans[transnr]); 02706 /* 02707 * Determinism checks in case of counted or all transitions 02708 * will have to be handled separately 02709 */ 02710 if (t1->atom == NULL) { 02711 continue; 02712 } 02713 if (t1->to == -1) /* eliminated */ 02714 continue; 02715 for (i = 0;i < transnr;i++) { 02716 t2 = &(state->trans[i]); 02717 if (t2->to == -1) /* eliminated */ 02718 continue; 02719 if (t2->atom != NULL) { 02720 /* 02721 * But here we don't use deep because we want to 02722 * find transitions which indicate a conflict 02723 */ 02724 if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) { 02725 ret = 0; 02726 /* mark the transitions as non-deterministic ones */ 02727 t1->nd = 1; 02728 t2->nd = 1; 02729 last = t1; 02730 } 02731 } else if (t1->to != -1) { 02732 /* 02733 * do the closure in case of remaining specific 02734 * epsilon transitions like choices or all 02735 */ 02736 ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 02737 t2->to, t2->atom); 02738 /* don't shortcut the computation so all non deterministic 02739 transition get marked down 02740 if (ret == 0) 02741 return(0); 02742 */ 02743 if (ret == 0) { 02744 t1->nd = 1; 02745 /* t2->nd = 1; */ 02746 last = t1; 02747 } 02748 } 02749 } 02750 /* don't shortcut the computation so all non deterministic 02751 transition get marked down 02752 if (ret == 0) 02753 break; */ 02754 } 02755 02756 /* 02757 * mark specifically the last non-deterministic transition 02758 * from a state since there is no need to set-up rollback 02759 * from it 02760 */ 02761 if (last != NULL) { 02762 last->nd = 2; 02763 } 02764 02765 /* don't shortcut the computation so all non deterministic 02766 transition get marked down 02767 if (ret == 0) 02768 break; */ 02769 } 02770 02771 ctxt->determinist = ret; 02772 return(ret); 02773 } 02774 02775 /************************************************************************ 02776 * * 02777 * Routines to check input against transition atoms * 02778 * * 02779 ************************************************************************/ 02780 02781 static int 02782 xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, 02783 int start, int end, const xmlChar *blockName) { 02784 int ret = 0; 02785 02786 switch (type) { 02787 case XML_REGEXP_STRING: 02788 case XML_REGEXP_SUBREG: 02789 case XML_REGEXP_RANGES: 02790 case XML_REGEXP_EPSILON: 02791 return(-1); 02792 case XML_REGEXP_ANYCHAR: 02793 ret = ((codepoint != '\n') && (codepoint != '\r')); 02794 break; 02795 case XML_REGEXP_CHARVAL: 02796 ret = ((codepoint >= start) && (codepoint <= end)); 02797 break; 02798 case XML_REGEXP_NOTSPACE: 02799 neg = !neg; 02800 case XML_REGEXP_ANYSPACE: 02801 ret = ((codepoint == '\n') || (codepoint == '\r') || 02802 (codepoint == '\t') || (codepoint == ' ')); 02803 break; 02804 case XML_REGEXP_NOTINITNAME: 02805 neg = !neg; 02806 case XML_REGEXP_INITNAME: 02807 ret = (IS_LETTER(codepoint) || 02808 (codepoint == '_') || (codepoint == ':')); 02809 break; 02810 case XML_REGEXP_NOTNAMECHAR: 02811 neg = !neg; 02812 case XML_REGEXP_NAMECHAR: 02813 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || 02814 (codepoint == '.') || (codepoint == '-') || 02815 (codepoint == '_') || (codepoint == ':') || 02816 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); 02817 break; 02818 case XML_REGEXP_NOTDECIMAL: 02819 neg = !neg; 02820 case XML_REGEXP_DECIMAL: 02821 ret = xmlUCSIsCatNd(codepoint); 02822 break; 02823 case XML_REGEXP_REALCHAR: 02824 neg = !neg; 02825 case XML_REGEXP_NOTREALCHAR: 02826 ret = xmlUCSIsCatP(codepoint); 02827 if (ret == 0) 02828 ret = xmlUCSIsCatZ(codepoint); 02829 if (ret == 0) 02830 ret = xmlUCSIsCatC(codepoint); 02831 break; 02832 case XML_REGEXP_LETTER: 02833 ret = xmlUCSIsCatL(codepoint); 02834 break; 02835 case XML_REGEXP_LETTER_UPPERCASE: 02836 ret = xmlUCSIsCatLu(codepoint); 02837 break; 02838 case XML_REGEXP_LETTER_LOWERCASE: 02839 ret = xmlUCSIsCatLl(codepoint); 02840 break; 02841 case XML_REGEXP_LETTER_TITLECASE: 02842 ret = xmlUCSIsCatLt(codepoint); 02843 break; 02844 case XML_REGEXP_LETTER_MODIFIER: 02845 ret = xmlUCSIsCatLm(codepoint); 02846 break; 02847 case XML_REGEXP_LETTER_OTHERS: 02848 ret = xmlUCSIsCatLo(codepoint); 02849 break; 02850 case XML_REGEXP_MARK: 02851 ret = xmlUCSIsCatM(codepoint); 02852 break; 02853 case XML_REGEXP_MARK_NONSPACING: 02854 ret = xmlUCSIsCatMn(codepoint); 02855 break; 02856 case XML_REGEXP_MARK_SPACECOMBINING: 02857 ret = xmlUCSIsCatMc(codepoint); 02858 break; 02859 case XML_REGEXP_MARK_ENCLOSING: 02860 ret = xmlUCSIsCatMe(codepoint); 02861 break; 02862 case XML_REGEXP_NUMBER: 02863 ret = xmlUCSIsCatN(codepoint); 02864 break; 02865 case XML_REGEXP_NUMBER_DECIMAL: 02866 ret = xmlUCSIsCatNd(codepoint); 02867 break; 02868 case XML_REGEXP_NUMBER_LETTER: 02869 ret = xmlUCSIsCatNl(codepoint); 02870 break; 02871 case XML_REGEXP_NUMBER_OTHERS: 02872 ret = xmlUCSIsCatNo(codepoint); 02873 break; 02874 case XML_REGEXP_PUNCT: 02875 ret = xmlUCSIsCatP(codepoint); 02876 break; 02877 case XML_REGEXP_PUNCT_CONNECTOR: 02878 ret = xmlUCSIsCatPc(codepoint); 02879 break; 02880 case XML_REGEXP_PUNCT_DASH: 02881 ret = xmlUCSIsCatPd(codepoint); 02882 break; 02883 case XML_REGEXP_PUNCT_OPEN: 02884 ret = xmlUCSIsCatPs(codepoint); 02885 break; 02886 case XML_REGEXP_PUNCT_CLOSE: 02887 ret = xmlUCSIsCatPe(codepoint); 02888 break; 02889 case XML_REGEXP_PUNCT_INITQUOTE: 02890 ret = xmlUCSIsCatPi(codepoint); 02891 break; 02892 case XML_REGEXP_PUNCT_FINQUOTE: 02893 ret = xmlUCSIsCatPf(codepoint); 02894 break; 02895 case XML_REGEXP_PUNCT_OTHERS: 02896 ret = xmlUCSIsCatPo(codepoint); 02897 break; 02898 case XML_REGEXP_SEPAR: 02899 ret = xmlUCSIsCatZ(codepoint); 02900 break; 02901 case XML_REGEXP_SEPAR_SPACE: 02902 ret = xmlUCSIsCatZs(codepoint); 02903 break; 02904 case XML_REGEXP_SEPAR_LINE: 02905 ret = xmlUCSIsCatZl(codepoint); 02906 break; 02907 case XML_REGEXP_SEPAR_PARA: 02908 ret = xmlUCSIsCatZp(codepoint); 02909 break; 02910 case XML_REGEXP_SYMBOL: 02911 ret = xmlUCSIsCatS(codepoint); 02912 break; 02913 case XML_REGEXP_SYMBOL_MATH: 02914 ret = xmlUCSIsCatSm(codepoint); 02915 break; 02916 case XML_REGEXP_SYMBOL_CURRENCY: 02917 ret = xmlUCSIsCatSc(codepoint); 02918 break; 02919 case XML_REGEXP_SYMBOL_MODIFIER: 02920 ret = xmlUCSIsCatSk(codepoint); 02921 break; 02922 case XML_REGEXP_SYMBOL_OTHERS: 02923 ret = xmlUCSIsCatSo(codepoint); 02924 break; 02925 case XML_REGEXP_OTHER: 02926 ret = xmlUCSIsCatC(codepoint); 02927 break; 02928 case XML_REGEXP_OTHER_CONTROL: 02929 ret = xmlUCSIsCatCc(codepoint); 02930 break; 02931 case XML_REGEXP_OTHER_FORMAT: 02932 ret = xmlUCSIsCatCf(codepoint); 02933 break; 02934 case XML_REGEXP_OTHER_PRIVATE: 02935 ret = xmlUCSIsCatCo(codepoint); 02936 break; 02937 case XML_REGEXP_OTHER_NA: 02938 /* ret = xmlUCSIsCatCn(codepoint); */ 02939 /* Seems it doesn't exist anymore in recent Unicode releases */ 02940 ret = 0; 02941 break; 02942 case XML_REGEXP_BLOCK_NAME: 02943 ret = xmlUCSIsBlock(codepoint, (const char *) blockName); 02944 break; 02945 } 02946 if (neg) 02947 return(!ret); 02948 return(ret); 02949 } 02950 02951 static int 02952 xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { 02953 int i, ret = 0; 02954 xmlRegRangePtr range; 02955 02956 if ((atom == NULL) || (!IS_CHAR(codepoint))) 02957 return(-1); 02958 02959 switch (atom->type) { 02960 case XML_REGEXP_SUBREG: 02961 case XML_REGEXP_EPSILON: 02962 return(-1); 02963 case XML_REGEXP_CHARVAL: 02964 return(codepoint == atom->codepoint); 02965 case XML_REGEXP_RANGES: { 02966 int accept = 0; 02967 02968 for (i = 0;i < atom->nbRanges;i++) { 02969 range = atom->ranges[i]; 02970 if (range->neg == 2) { 02971 ret = xmlRegCheckCharacterRange(range->type, codepoint, 02972 0, range->start, range->end, 02973 range->blockName); 02974 if (ret != 0) 02975 return(0); /* excluded char */ 02976 } else if (range->neg) { 02977 ret = xmlRegCheckCharacterRange(range->type, codepoint, 02978 0, range->start, range->end, 02979 range->blockName); 02980 if (ret == 0) 02981 accept = 1; 02982 else 02983 return(0); 02984 } else { 02985 ret = xmlRegCheckCharacterRange(range->type, codepoint, 02986 0, range->start, range->end, 02987 range->blockName); 02988 if (ret != 0) 02989 accept = 1; /* might still be excluded */ 02990 } 02991 } 02992 return(accept); 02993 } 02994 case XML_REGEXP_STRING: 02995 printf("TODO: XML_REGEXP_STRING\n"); 02996 return(-1); 02997 case XML_REGEXP_ANYCHAR: 02998 case XML_REGEXP_ANYSPACE: 02999 case XML_REGEXP_NOTSPACE: 03000 case XML_REGEXP_INITNAME: 03001 case XML_REGEXP_NOTINITNAME: 03002 case XML_REGEXP_NAMECHAR: 03003 case XML_REGEXP_NOTNAMECHAR: 03004 case XML_REGEXP_DECIMAL: 03005 case XML_REGEXP_NOTDECIMAL: 03006 case XML_REGEXP_REALCHAR: 03007 case XML_REGEXP_NOTREALCHAR: 03008 case XML_REGEXP_LETTER: 03009 case XML_REGEXP_LETTER_UPPERCASE: 03010 case XML_REGEXP_LETTER_LOWERCASE: 03011 case XML_REGEXP_LETTER_TITLECASE: 03012 case XML_REGEXP_LETTER_MODIFIER: 03013 case XML_REGEXP_LETTER_OTHERS: 03014 case XML_REGEXP_MARK: 03015 case XML_REGEXP_MARK_NONSPACING: 03016 case XML_REGEXP_MARK_SPACECOMBINING: 03017 case XML_REGEXP_MARK_ENCLOSING: 03018 case XML_REGEXP_NUMBER: 03019 case XML_REGEXP_NUMBER_DECIMAL: 03020 case XML_REGEXP_NUMBER_LETTER: 03021 case XML_REGEXP_NUMBER_OTHERS: 03022 case XML_REGEXP_PUNCT: 03023 case XML_REGEXP_PUNCT_CONNECTOR: 03024 case XML_REGEXP_PUNCT_DASH: 03025 case XML_REGEXP_PUNCT_OPEN: 03026 case XML_REGEXP_PUNCT_CLOSE: 03027 case XML_REGEXP_PUNCT_INITQUOTE: 03028 case XML_REGEXP_PUNCT_FINQUOTE: 03029 case XML_REGEXP_PUNCT_OTHERS: 03030 case XML_REGEXP_SEPAR: 03031 case XML_REGEXP_SEPAR_SPACE: 03032 case XML_REGEXP_SEPAR_LINE: 03033 case XML_REGEXP_SEPAR_PARA: 03034 case XML_REGEXP_SYMBOL: 03035 case XML_REGEXP_SYMBOL_MATH: 03036 case XML_REGEXP_SYMBOL_CURRENCY: 03037 case XML_REGEXP_SYMBOL_MODIFIER: 03038 case XML_REGEXP_SYMBOL_OTHERS: 03039 case XML_REGEXP_OTHER: 03040 case XML_REGEXP_OTHER_CONTROL: 03041 case XML_REGEXP_OTHER_FORMAT: 03042 case XML_REGEXP_OTHER_PRIVATE: 03043 case XML_REGEXP_OTHER_NA: 03044 case XML_REGEXP_BLOCK_NAME: 03045 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, 03046 (const xmlChar *)atom->valuep); 03047 if (atom->neg) 03048 ret = !ret; 03049 break; 03050 } 03051 return(ret); 03052 } 03053 03054 /************************************************************************ 03055 * * 03056 * Saving and restoring state of an execution context * 03057 * * 03058 ************************************************************************/ 03059 03060 #ifdef DEBUG_REGEXP_EXEC 03061 static void 03062 xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { 03063 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); 03064 if (exec->inputStack != NULL) { 03065 int i; 03066 printf(": "); 03067 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) 03068 printf("%s ", (const char *) 03069 exec->inputStack[exec->inputStackNr - (i + 1)].value); 03070 } else { 03071 printf(": %s", &(exec->inputString[exec->index])); 03072 } 03073 printf("\n"); 03074 } 03075 #endif 03076 03077 static void 03078 xmlFARegExecSave(xmlRegExecCtxtPtr exec) { 03079 #ifdef DEBUG_REGEXP_EXEC 03080 printf("saving "); 03081 exec->transno++; 03082 xmlFARegDebugExec(exec); 03083 exec->transno--; 03084 #endif 03085 #ifdef MAX_PUSH 03086 if (exec->nbPush > MAX_PUSH) { 03087 return; 03088 } 03089 exec->nbPush++; 03090 #endif 03091 03092 if (exec->maxRollbacks == 0) { 03093 exec->maxRollbacks = 4; 03094 exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * 03095 sizeof(xmlRegExecRollback)); 03096 if (exec->rollbacks == NULL) { 03097 xmlRegexpErrMemory(NULL, "saving regexp"); 03098 exec->maxRollbacks = 0; 03099 return; 03100 } 03101 memset(exec->rollbacks, 0, 03102 exec->maxRollbacks * sizeof(xmlRegExecRollback)); 03103 } else if (exec->nbRollbacks >= exec->maxRollbacks) { 03104 xmlRegExecRollback *tmp; 03105 int len = exec->maxRollbacks; 03106 03107 exec->maxRollbacks *= 2; 03108 tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, 03109 exec->maxRollbacks * sizeof(xmlRegExecRollback)); 03110 if (tmp == NULL) { 03111 xmlRegexpErrMemory(NULL, "saving regexp"); 03112 exec->maxRollbacks /= 2; 03113 return; 03114 } 03115 exec->rollbacks = tmp; 03116 tmp = &exec->rollbacks[len]; 03117 memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); 03118 } 03119 exec->rollbacks[exec->nbRollbacks].state = exec->state; 03120 exec->rollbacks[exec->nbRollbacks].index = exec->index; 03121 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; 03122 if (exec->comp->nbCounters > 0) { 03123 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 03124 exec->rollbacks[exec->nbRollbacks].counts = (int *) 03125 xmlMalloc(exec->comp->nbCounters * sizeof(int)); 03126 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 03127 xmlRegexpErrMemory(NULL, "saving regexp"); 03128 exec->status = -5; 03129 return; 03130 } 03131 } 03132 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, 03133 exec->comp->nbCounters * sizeof(int)); 03134 } 03135 exec->nbRollbacks++; 03136 } 03137 03138 static void 03139 xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { 03140 if (exec->nbRollbacks <= 0) { 03141 exec->status = -1; 03142 #ifdef DEBUG_REGEXP_EXEC 03143 printf("rollback failed on empty stack\n"); 03144 #endif 03145 return; 03146 } 03147 exec->nbRollbacks--; 03148 exec->state = exec->rollbacks[exec->nbRollbacks].state; 03149 exec->index = exec->rollbacks[exec->nbRollbacks].index; 03150 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; 03151 if (exec->comp->nbCounters > 0) { 03152 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 03153 fprintf(stderr, "exec save: allocation failed"); 03154 exec->status = -6; 03155 return; 03156 } 03157 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, 03158 exec->comp->nbCounters * sizeof(int)); 03159 } 03160 03161 #ifdef DEBUG_REGEXP_EXEC 03162 printf("restored "); 03163 xmlFARegDebugExec(exec); 03164 #endif 03165 } 03166 03167 /************************************************************************ 03168 * * 03169 * Verifier, running an input against a compiled regexp * 03170 * * 03171 ************************************************************************/ 03172 03173 static int 03174 xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { 03175 xmlRegExecCtxt execval; 03176 xmlRegExecCtxtPtr exec = &execval; 03177 int ret, codepoint = 0, len, deter; 03178 03179 exec->inputString = content; 03180 exec->index = 0; 03181 exec->nbPush = 0; 03182 exec->determinist = 1; 03183 exec->maxRollbacks = 0; 03184 exec->nbRollbacks = 0; 03185 exec->rollbacks = NULL; 03186 exec->status = 0; 03187 exec->comp = comp; 03188 exec->state = comp->states[0]; 03189 exec->transno = 0; 03190 exec->transcount = 0; 03191 exec->inputStack = NULL; 03192 exec->inputStackMax = 0; 03193 if (comp->nbCounters > 0) { 03194 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); 03195 if (exec->counts == NULL) { 03196 xmlRegexpErrMemory(NULL, "running regexp"); 03197 return(-1); 03198 } 03199 memset(exec->counts, 0, comp->nbCounters * sizeof(int)); 03200 } else 03201 exec->counts = NULL; 03202 while ((exec->status == 0) && 03203 ((exec->inputString[exec->index] != 0) || 03204 ((exec->state != NULL) && 03205 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { 03206 xmlRegTransPtr trans; 03207 xmlRegAtomPtr atom; 03208 03209 /* 03210 * If end of input on non-terminal state, rollback, however we may 03211 * still have epsilon like transition for counted transitions 03212 * on counters, in that case don't break too early. Additionally, 03213 * if we are working on a range like "AB{0,2}", where B is not present, 03214 * we don't want to break. 03215 */ 03216 len = 1; 03217 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { 03218 /* 03219 * if there is a transition, we must check if 03220 * atom allows minOccurs of 0 03221 */ 03222 if (exec->transno < exec->state->nbTrans) { 03223 trans = &exec->state->trans[exec->transno]; 03224 if (trans->to >=0) { 03225 atom = trans->atom; 03226 if (!((atom->min == 0) && (atom->max > 0))) 03227 goto rollback; 03228 } 03229 } else 03230 goto rollback; 03231 } 03232 03233 exec->transcount = 0; 03234 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 03235 trans = &exec->state->trans[exec->transno]; 03236 if (trans->to < 0) 03237 continue; 03238 atom = trans->atom; 03239 ret = 0; 03240 deter = 1; 03241 if (trans->count >= 0) { 03242 int count; 03243 xmlRegCounterPtr counter; 03244 03245 if (exec->counts == NULL) { 03246 exec->status = -1; 03247 goto error; 03248 } 03249 /* 03250 * A counted transition. 03251 */ 03252 03253 count = exec->counts[trans->count]; 03254 counter = &exec->comp->counters[trans->count]; 03255 #ifdef DEBUG_REGEXP_EXEC 03256 printf("testing count %d: val %d, min %d, max %d\n", 03257 trans->count, count, counter->min, counter->max); 03258 #endif 03259 ret = ((count >= counter->min) && (count <= counter->max)); 03260 if ((ret) && (counter->min != counter->max)) 03261 deter = 0; 03262 } else if (atom == NULL) { 03263 fprintf(stderr, "epsilon transition left at runtime\n"); 03264 exec->status = -2; 03265 break; 03266 } else if (exec->inputString[exec->index] != 0) { 03267 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 03268 ret = xmlRegCheckCharacter(atom, codepoint); 03269 if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { 03270 xmlRegStatePtr to = comp->states[trans->to]; 03271 03272 /* 03273 * this is a multiple input sequence 03274 * If there is a counter associated increment it now. 03275 * before potentially saving and rollback 03276 * do not increment if the counter is already over the 03277 * maximum limit in which case get to next transition 03278 */ 03279 if (trans->counter >= 0) { 03280 xmlRegCounterPtr counter; 03281 03282 if ((exec->counts == NULL) || 03283 (exec->comp == NULL) || 03284 (exec->comp->counters == NULL)) { 03285 exec->status = -1; 03286 goto error; 03287 } 03288 counter = &exec->comp->counters[trans->counter]; 03289 if (exec->counts[trans->counter] >= counter->max) 03290 continue; /* for loop on transitions */ 03291 03292 #ifdef DEBUG_REGEXP_EXEC 03293 printf("Increasing count %d\n", trans->counter); 03294 #endif 03295 exec->counts[trans->counter]++; 03296 } 03297 if (exec->state->nbTrans > exec->transno + 1) { 03298 xmlFARegExecSave(exec); 03299 } 03300 exec->transcount = 1; 03301 do { 03302 /* 03303 * Try to progress as much as possible on the input 03304 */ 03305 if (exec->transcount == atom->max) { 03306 break; 03307 } 03308 exec->index += len; 03309 /* 03310 * End of input: stop here 03311 */ 03312 if (exec->inputString[exec->index] == 0) { 03313 exec->index -= len; 03314 break; 03315 } 03316 if (exec->transcount >= atom->min) { 03317 int transno = exec->transno; 03318 xmlRegStatePtr state = exec->state; 03319 03320 /* 03321 * The transition is acceptable save it 03322 */ 03323 exec->transno = -1; /* trick */ 03324 exec->state = to; 03325 xmlFARegExecSave(exec); 03326 exec->transno = transno; 03327 exec->state = state; 03328 } 03329 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 03330 len); 03331 ret = xmlRegCheckCharacter(atom, codepoint); 03332 exec->transcount++; 03333 } while (ret == 1); 03334 if (exec->transcount < atom->min) 03335 ret = 0; 03336 03337 /* 03338 * If the last check failed but one transition was found 03339 * possible, rollback 03340 */ 03341 if (ret < 0) 03342 ret = 0; 03343 if (ret == 0) { 03344 goto rollback; 03345 } 03346 if (trans->counter >= 0) { 03347 if (exec->counts == NULL) { 03348 exec->status = -1; 03349 goto error; 03350 } 03351 #ifdef DEBUG_REGEXP_EXEC 03352 printf("Decreasing count %d\n", trans->counter); 03353 #endif 03354 exec->counts[trans->counter]--; 03355 } 03356 } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) { 03357 /* 03358 * we don't match on the codepoint, but minOccurs of 0 03359 * says that's ok. Setting len to 0 inhibits stepping 03360 * over the codepoint. 03361 */ 03362 exec->transcount = 1; 03363 len = 0; 03364 ret = 1; 03365 } 03366 } else if ((atom->min == 0) && (atom->max > 0)) { 03367 /* another spot to match when minOccurs is 0 */ 03368 exec->transcount = 1; 03369 len = 0; 03370 ret = 1; 03371 } 03372 if (ret == 1) { 03373 if ((trans->nd == 1) || 03374 ((trans->count >= 0) && (deter == 0) && 03375 (exec->state->nbTrans > exec->transno + 1))) { 03376 #ifdef DEBUG_REGEXP_EXEC 03377 if (trans->nd == 1) 03378 printf("Saving on nd transition atom %d for %c at %d\n", 03379 trans->atom->no, codepoint, exec->index); 03380 else 03381 printf("Saving on counted transition count %d for %c at %d\n", 03382 trans->count, codepoint, exec->index); 03383 #endif 03384 xmlFARegExecSave(exec); 03385 } 03386 if (trans->counter >= 0) { 03387 xmlRegCounterPtr counter; 03388 03389 /* make sure we don't go over the counter maximum value */ 03390 if ((exec->counts == NULL) || 03391 (exec->comp == NULL) || 03392 (exec->comp->counters == NULL)) { 03393 exec->status = -1; 03394 goto error; 03395 } 03396 counter = &exec->comp->counters[trans->counter]; 03397 if (exec->counts[trans->counter] >= counter->max) 03398 continue; /* for loop on transitions */ 03399 #ifdef DEBUG_REGEXP_EXEC 03400 printf("Increasing count %d\n", trans->counter); 03401 #endif 03402 exec->counts[trans->counter]++; 03403 } 03404 if ((trans->count >= 0) && 03405 (trans->count < REGEXP_ALL_COUNTER)) { 03406 if (exec->counts == NULL) { 03407 exec->status = -1; 03408 goto error; 03409 } 03410 #ifdef DEBUG_REGEXP_EXEC 03411 printf("resetting count %d on transition\n", 03412 trans->count); 03413 #endif 03414 exec->counts[trans->count] = 0; 03415 } 03416 #ifdef DEBUG_REGEXP_EXEC 03417 printf("entering state %d\n", trans->to); 03418 #endif 03419 exec->state = comp->states[trans->to]; 03420 exec->transno = 0; 03421 if (trans->atom != NULL) { 03422 exec->index += len; 03423 } 03424 goto progress; 03425 } else if (ret < 0) { 03426 exec->status = -4; 03427 break; 03428 } 03429 } 03430 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 03431 rollback: 03432 /* 03433 * Failed to find a way out 03434 */ 03435 exec->determinist = 0; 03436 #ifdef DEBUG_REGEXP_EXEC 03437 printf("rollback from state %d on %d:%c\n", exec->state->no, 03438 codepoint,codepoint); 03439 #endif 03440 xmlFARegExecRollBack(exec); 03441 } 03442 progress: 03443 continue; 03444 } 03445 error: 03446 if (exec->rollbacks != NULL) { 03447 if (exec->counts != NULL) { 03448 int i; 03449 03450 for (i = 0;i < exec->maxRollbacks;i++) 03451 if (exec->rollbacks[i].counts != NULL) 03452 xmlFree(exec->rollbacks[i].counts); 03453 } 03454 xmlFree(exec->rollbacks); 03455 } 03456 if (exec->counts != NULL) 03457 xmlFree(exec->counts); 03458 if (exec->status == 0) 03459 return(1); 03460 if (exec->status == -1) { 03461 if (exec->nbPush > MAX_PUSH) 03462 return(-1); 03463 return(0); 03464 } 03465 return(exec->status); 03466 } 03467 03468 /************************************************************************ 03469 * * 03470 * Progressive interface to the verifier one atom at a time * 03471 * * 03472 ************************************************************************/ 03473 #ifdef DEBUG_ERR 03474 static void testerr(xmlRegExecCtxtPtr exec); 03475 #endif 03476 03488 xmlRegExecCtxtPtr 03489 xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { 03490 xmlRegExecCtxtPtr exec; 03491 03492 if (comp == NULL) 03493 return(NULL); 03494 if ((comp->compact == NULL) && (comp->states == NULL)) 03495 return(NULL); 03496 exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); 03497 if (exec == NULL) { 03498 xmlRegexpErrMemory(NULL, "creating execution context"); 03499 return(NULL); 03500 } 03501 memset(exec, 0, sizeof(xmlRegExecCtxt)); 03502 exec->inputString = NULL; 03503 exec->index = 0; 03504 exec->determinist = 1; 03505 exec->maxRollbacks = 0; 03506 exec->nbRollbacks = 0; 03507 exec->rollbacks = NULL; 03508 exec->status = 0; 03509 exec->comp = comp; 03510 if (comp->compact == NULL) 03511 exec->state = comp->states[0]; 03512 exec->transno = 0; 03513 exec->transcount = 0; 03514 exec->callback = callback; 03515 exec->data = data; 03516 if (comp->nbCounters > 0) { 03517 /* 03518 * For error handling, exec->counts is allocated twice the size 03519 * the second half is used to store the data in case of rollback 03520 */ 03521 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int) 03522 * 2); 03523 if (exec->counts == NULL) { 03524 xmlRegexpErrMemory(NULL, "creating execution context"); 03525 xmlFree(exec); 03526 return(NULL); 03527 } 03528 memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2); 03529 exec->errCounts = &exec->counts[comp->nbCounters]; 03530 } else { 03531 exec->counts = NULL; 03532 exec->errCounts = NULL; 03533 } 03534 exec->inputStackMax = 0; 03535 exec->inputStackNr = 0; 03536 exec->inputStack = NULL; 03537 exec->errStateNo = -1; 03538 exec->errString = NULL; 03539 exec->nbPush = 0; 03540 return(exec); 03541 } 03542 03549 void 03550 xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { 03551 if (exec == NULL) 03552 return; 03553 03554 if (exec->rollbacks != NULL) { 03555 if (exec->counts != NULL) { 03556 int i; 03557 03558 for (i = 0;i < exec->maxRollbacks;i++) 03559 if (exec->rollbacks[i].counts != NULL) 03560 xmlFree(exec->rollbacks[i].counts); 03561 } 03562 xmlFree(exec->rollbacks); 03563 } 03564 if (exec->counts != NULL) 03565 xmlFree(exec->counts); 03566 if (exec->inputStack != NULL) { 03567 int i; 03568 03569 for (i = 0;i < exec->inputStackNr;i++) { 03570 if (exec->inputStack[i].value != NULL) 03571 xmlFree(exec->inputStack[i].value); 03572 } 03573 xmlFree(exec->inputStack); 03574 } 03575 if (exec->errString != NULL) 03576 xmlFree(exec->errString); 03577 xmlFree(exec); 03578 } 03579 03580 static void 03581 xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, 03582 void *data) { 03583 #ifdef DEBUG_PUSH 03584 printf("saving value: %d:%s\n", exec->inputStackNr, value); 03585 #endif 03586 if (exec->inputStackMax == 0) { 03587 exec->inputStackMax = 4; 03588 exec->inputStack = (xmlRegInputTokenPtr) 03589 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); 03590 if (exec->inputStack == NULL) { 03591 xmlRegexpErrMemory(NULL, "pushing input string"); 03592 exec->inputStackMax = 0; 03593 return; 03594 } 03595 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { 03596 xmlRegInputTokenPtr tmp; 03597 03598 exec->inputStackMax *= 2; 03599 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, 03600 exec->inputStackMax * sizeof(xmlRegInputToken)); 03601 if (tmp == NULL) { 03602 xmlRegexpErrMemory(NULL, "pushing input string"); 03603 exec->inputStackMax /= 2; 03604 return; 03605 } 03606 exec->inputStack = tmp; 03607 } 03608 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); 03609 exec->inputStack[exec->inputStackNr].data = data; 03610 exec->inputStackNr++; 03611 exec->inputStack[exec->inputStackNr].value = NULL; 03612 exec->inputStack[exec->inputStackNr].data = NULL; 03613 } 03614 03628 static int 03629 xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { 03630 if (expStr == valStr) return(1); 03631 if (expStr == NULL) return(0); 03632 if (valStr == NULL) return(0); 03633 do { 03634 /* 03635 * Eval if we have a wildcard for the current item. 03636 */ 03637 if (*expStr != *valStr) { 03638 /* if one of them starts with a wildcard make valStr be it */ 03639 if (*valStr == '*') { 03640 const xmlChar *tmp; 03641 03642 tmp = valStr; 03643 valStr = expStr; 03644 expStr = tmp; 03645 } 03646 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) { 03647 do { 03648 if (*valStr == XML_REG_STRING_SEPARATOR) 03649 break; 03650 valStr++; 03651 } while (*valStr != 0); 03652 continue; 03653 } else 03654 return(0); 03655 } 03656 expStr++; 03657 valStr++; 03658 } while (*valStr != 0); 03659 if (*expStr != 0) 03660 return (0); 03661 else 03662 return (1); 03663 } 03664 03677 static int 03678 xmlRegCompactPushString(xmlRegExecCtxtPtr exec, 03679 xmlRegexpPtr comp, 03680 const xmlChar *value, 03681 void *data) { 03682 int state = exec->index; 03683 int i, target; 03684 03685 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) 03686 return(-1); 03687 03688 if (value == NULL) { 03689 /* 03690 * are we at a final state ? 03691 */ 03692 if (comp->compact[state * (comp->nbstrings + 1)] == 03693 XML_REGEXP_FINAL_STATE) 03694 return(1); 03695 return(0); 03696 } 03697 03698 #ifdef DEBUG_PUSH 03699 printf("value pushed: %s\n", value); 03700 #endif 03701 03702 /* 03703 * Examine all outside transitions from current state 03704 */ 03705 for (i = 0;i < comp->nbstrings;i++) { 03706 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 03707 if ((target > 0) && (target <= comp->nbstates)) { 03708 target--; /* to avoid 0 */ 03709 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { 03710 exec->index = target; 03711 if ((exec->callback != NULL) && (comp->transdata != NULL)) { 03712 exec->callback(exec->data, value, 03713 comp->transdata[state * comp->nbstrings + i], data); 03714 } 03715 #ifdef DEBUG_PUSH 03716 printf("entering state %d\n", target); 03717 #endif 03718 if (comp->compact[target * (comp->nbstrings + 1)] == 03719 XML_REGEXP_SINK_STATE) 03720 goto error; 03721 03722 if (comp->compact[target * (comp->nbstrings + 1)] == 03723 XML_REGEXP_FINAL_STATE) 03724 return(1); 03725 return(0); 03726 } 03727 } 03728 } 03729 /* 03730 * Failed to find an exit transition out from current state for the 03731 * current token 03732 */ 03733 #ifdef DEBUG_PUSH 03734 printf("failed to find a transition for %s on state %d\n", value, state); 03735 #endif 03736 error: 03737 if (exec->errString != NULL) 03738 xmlFree(exec->errString); 03739 exec->errString = xmlStrdup(value); 03740 exec->errStateNo = state; 03741 exec->status = -1; 03742 #ifdef DEBUG_ERR 03743 testerr(exec); 03744 #endif 03745 return(-1); 03746 } 03747 03760 static int 03761 xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value, 03762 void *data, int compound) { 03763 xmlRegTransPtr trans; 03764 xmlRegAtomPtr atom; 03765 int ret; 03766 int final = 0; 03767 int progress = 1; 03768 03769 if (exec == NULL) 03770 return(-1); 03771 if (exec->comp == NULL) 03772 return(-1); 03773 if (exec->status != 0) 03774 return(exec->status); 03775 03776 if (exec->comp->compact != NULL) 03777 return(xmlRegCompactPushString(exec, exec->comp, value, data)); 03778 03779 if (value == NULL) { 03780 if (exec->state->type == XML_REGEXP_FINAL_STATE) 03781 return(1); 03782 final = 1; 03783 } 03784 03785 #ifdef DEBUG_PUSH 03786 printf("value pushed: %s\n", value); 03787 #endif 03788 /* 03789 * If we have an active rollback stack push the new value there 03790 * and get back to where we were left 03791 */ 03792 if ((value != NULL) && (exec->inputStackNr > 0)) { 03793 xmlFARegExecSaveInputString(exec, value, data); 03794 value = exec->inputStack[exec->index].value; 03795 data = exec->inputStack[exec->index].data; 03796 #ifdef DEBUG_PUSH 03797 printf("value loaded: %s\n", value); 03798 #endif 03799 } 03800 03801 while ((exec->status == 0) && 03802 ((value != NULL) || 03803 ((final == 1) && 03804 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { 03805 03806 /* 03807 * End of input on non-terminal state, rollback, however we may 03808 * still have epsilon like transition for counted transitions 03809 * on counters, in that case don't break too early. 03810 */ 03811 if ((value == NULL) && (exec->counts == NULL)) 03812 goto rollback; 03813 03814 exec->transcount = 0; 03815 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 03816 trans = &exec->state->trans[exec->transno]; 03817 if (trans->to < 0) 03818 continue; 03819 atom = trans->atom; 03820 ret = 0; 03821 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 03822 int i; 03823 int count; 03824 xmlRegTransPtr t; 03825 xmlRegCounterPtr counter; 03826 03827 ret = 0; 03828 03829 #ifdef DEBUG_PUSH 03830 printf("testing all lax %d\n", trans->count); 03831 #endif 03832 /* 03833 * Check all counted transitions from the current state 03834 */ 03835 if ((value == NULL) && (final)) { 03836 ret = 1; 03837 } else if (value != NULL) { 03838 for (i = 0;i < exec->state->nbTrans;i++) { 03839 t = &exec->state->trans[i]; 03840 if ((t->counter < 0) || (t == trans)) 03841 continue; 03842 counter = &exec->comp->counters[t->counter]; 03843 count = exec->counts[t->counter]; 03844 if ((count < counter->max) && 03845 (t->atom != NULL) && 03846 (xmlStrEqual(value, t->atom->valuep))) { 03847 ret = 0; 03848 break; 03849 } 03850 if ((count >= counter->min) && 03851 (count < counter->max) && 03852 (t->atom != NULL) && 03853 (xmlStrEqual(value, t->atom->valuep))) { 03854 ret = 1; 03855 break; 03856 } 03857 } 03858 } 03859 } else if (trans->count == REGEXP_ALL_COUNTER) { 03860 int i; 03861 int count; 03862 xmlRegTransPtr t; 03863 xmlRegCounterPtr counter; 03864 03865 ret = 1; 03866 03867 #ifdef DEBUG_PUSH 03868 printf("testing all %d\n", trans->count); 03869 #endif 03870 /* 03871 * Check all counted transitions from the current state 03872 */ 03873 for (i = 0;i < exec->state->nbTrans;i++) { 03874 t = &exec->state->trans[i]; 03875 if ((t->counter < 0) || (t == trans)) 03876 continue; 03877 counter = &exec->comp->counters[t->counter]; 03878 count = exec->counts[t->counter]; 03879 if ((count < counter->min) || (count > counter->max)) { 03880 ret = 0; 03881 break; 03882 } 03883 } 03884 } else if (trans->count >= 0) { 03885 int count; 03886 xmlRegCounterPtr counter; 03887 03888 /* 03889 * A counted transition. 03890 */ 03891 03892 count = exec->counts[trans->count]; 03893 counter = &exec->comp->counters[trans->count]; 03894 #ifdef DEBUG_PUSH 03895 printf("testing count %d: val %d, min %d, max %d\n", 03896 trans->count, count, counter->min, counter->max); 03897 #endif 03898 ret = ((count >= counter->min) && (count <= counter->max)); 03899 } else if (atom == NULL) { 03900 fprintf(stderr, "epsilon transition left at runtime\n"); 03901 exec->status = -2; 03902 break; 03903 } else if (value != NULL) { 03904 ret = xmlRegStrEqualWildcard(atom->valuep, value); 03905 if (atom->neg) { 03906 ret = !ret; 03907 if (!compound) 03908 ret = 0; 03909 } 03910 if ((ret == 1) && (trans->counter >= 0)) { 03911 xmlRegCounterPtr counter; 03912 int count; 03913 03914 count = exec->counts[trans->counter]; 03915 counter = &exec->comp->counters[trans->counter]; 03916 if (count >= counter->max) 03917 ret = 0; 03918 } 03919 03920 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 03921 xmlRegStatePtr to = exec->comp->states[trans->to]; 03922 03923 /* 03924 * this is a multiple input sequence 03925 */ 03926 if (exec->state->nbTrans > exec->transno + 1) { 03927 if (exec->inputStackNr <= 0) { 03928 xmlFARegExecSaveInputString(exec, value, data); 03929 } 03930 xmlFARegExecSave(exec); 03931 } 03932 exec->transcount = 1; 03933 do { 03934 /* 03935 * Try to progress as much as possible on the input 03936 */ 03937 if (exec->transcount == atom->max) { 03938 break; 03939 } 03940 exec->index++; 03941 value = exec->inputStack[exec->index].value; 03942 data = exec->inputStack[exec->index].data; 03943 #ifdef DEBUG_PUSH 03944 printf("value loaded: %s\n", value); 03945 #endif 03946 03947 /* 03948 * End of input: stop here 03949 */ 03950 if (value == NULL) { 03951 exec->index --; 03952 break; 03953 } 03954 if (exec->transcount >= atom->min) { 03955 int transno = exec->transno; 03956 xmlRegStatePtr state = exec->state; 03957 03958 /* 03959 * The transition is acceptable save it 03960 */ 03961 exec->transno = -1; /* trick */ 03962 exec->state = to; 03963 if (exec->inputStackNr <= 0) { 03964 xmlFARegExecSaveInputString(exec, value, data); 03965 } 03966 xmlFARegExecSave(exec); 03967 exec->transno = transno; 03968 exec->state = state; 03969 } 03970 ret = xmlStrEqual(value, atom->valuep); 03971 exec->transcount++; 03972 } while (ret == 1); 03973 if (exec->transcount < atom->min) 03974 ret = 0; 03975 03976 /* 03977 * If the last check failed but one transition was found 03978 * possible, rollback 03979 */ 03980 if (ret < 0) 03981 ret = 0; 03982 if (ret == 0) { 03983 goto rollback; 03984 } 03985 } 03986 } 03987 if (ret == 1) { 03988 if ((exec->callback != NULL) && (atom != NULL) && 03989 (data != NULL)) { 03990 exec->callback(exec->data, atom->valuep, 03991 atom->data, data); 03992 } 03993 if (exec->state->nbTrans > exec->transno + 1) { 03994 if (exec->inputStackNr <= 0) { 03995 xmlFARegExecSaveInputString(exec, value, data); 03996 } 03997 xmlFARegExecSave(exec); 03998 } 03999 if (trans->counter >= 0) { 04000 #ifdef DEBUG_PUSH 04001 printf("Increasing count %d\n", trans->counter); 04002 #endif 04003 exec->counts[trans->counter]++; 04004 } 04005 if ((trans->count >= 0) && 04006 (trans->count < REGEXP_ALL_COUNTER)) { 04007 #ifdef DEBUG_REGEXP_EXEC 04008 printf("resetting count %d on transition\n", 04009 trans->count); 04010 #endif 04011 exec->counts[trans->count] = 0; 04012 } 04013 #ifdef DEBUG_PUSH 04014 printf("entering state %d\n", trans->to); 04015 #endif 04016 if ((exec->comp->states[trans->to] != NULL) && 04017 (exec->comp->states[trans->to]->type == 04018 XML_REGEXP_SINK_STATE)) { 04019 /* 04020 * entering a sink state, save the current state as error 04021 * state. 04022 */ 04023 if (exec->errString != NULL) 04024 xmlFree(exec->errString); 04025 exec->errString = xmlStrdup(value); 04026 exec->errState = exec->state; 04027 memcpy(exec->errCounts, exec->counts, 04028 exec->comp->nbCounters * sizeof(int)); 04029 } 04030 exec->state = exec->comp->states[trans->to]; 04031 exec->transno = 0; 04032 if (trans->atom != NULL) { 04033 if (exec->inputStack != NULL) { 04034 exec->index++; 04035 if (exec->index < exec->inputStackNr) { 04036 value = exec->inputStack[exec->index].value; 04037 data = exec->inputStack[exec->index].data; 04038 #ifdef DEBUG_PUSH 04039 printf("value loaded: %s\n", value); 04040 #endif 04041 } else { 04042 value = NULL; 04043 data = NULL; 04044 #ifdef DEBUG_PUSH 04045 printf("end of input\n"); 04046 #endif 04047 } 04048 } else { 04049 value = NULL; 04050 data = NULL; 04051 #ifdef DEBUG_PUSH 04052 printf("end of input\n"); 04053 #endif 04054 } 04055 } 04056 goto progress; 04057 } else if (ret < 0) { 04058 exec->status = -4; 04059 break; 04060 } 04061 } 04062 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 04063 rollback: 04064 /* 04065 * if we didn't yet rollback on the current input 04066 * store the current state as the error state. 04067 */ 04068 if ((progress) && (exec->state != NULL) && 04069 (exec->state->type != XML_REGEXP_SINK_STATE)) { 04070 progress = 0; 04071 if (exec->errString != NULL) 04072 xmlFree(exec->errString); 04073 exec->errString = xmlStrdup(value); 04074 exec->errState = exec->state; 04075 memcpy(exec->errCounts, exec->counts, 04076 exec->comp->nbCounters * sizeof(int)); 04077 } 04078 04079 /* 04080 * Failed to find a way out 04081 */ 04082 exec->determinist = 0; 04083 xmlFARegExecRollBack(exec); 04084 if (exec->status == 0) { 04085 value = exec->inputStack[exec->index].value; 04086 data = exec->inputStack[exec->index].data; 04087 #ifdef DEBUG_PUSH 04088 printf("value loaded: %s\n", value); 04089 #endif 04090 } 04091 } 04092 continue; 04093 progress: 04094 progress = 1; 04095 continue; 04096 } 04097 if (exec->status == 0) { 04098 return(exec->state->type == XML_REGEXP_FINAL_STATE); 04099 } 04100 #ifdef DEBUG_ERR 04101 if (exec->status < 0) { 04102 testerr(exec); 04103 } 04104 #endif 04105 return(exec->status); 04106 } 04107 04119 int 04120 xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, 04121 void *data) { 04122 return(xmlRegExecPushStringInternal(exec, value, data, 0)); 04123 } 04124 04137 int 04138 xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value, 04139 const xmlChar *value2, void *data) { 04140 xmlChar buf[150]; 04141 int lenn, lenp, ret; 04142 xmlChar *str; 04143 04144 if (exec == NULL) 04145 return(-1); 04146 if (exec->comp == NULL) 04147 return(-1); 04148 if (exec->status != 0) 04149 return(exec->status); 04150 04151 if (value2 == NULL) 04152 return(xmlRegExecPushString(exec, value, data)); 04153 04154 lenn = strlen((char *) value2); 04155 lenp = strlen((char *) value); 04156 04157 if (150 < lenn + lenp + 2) { 04158 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 04159 if (str == NULL) { 04160 exec->status = -1; 04161 return(-1); 04162 } 04163 } else { 04164 str = buf; 04165 } 04166 memcpy(&str[0], value, lenp); 04167 str[lenp] = XML_REG_STRING_SEPARATOR; 04168 memcpy(&str[lenp + 1], value2, lenn); 04169 str[lenn + lenp + 1] = 0; 04170 04171 if (exec->comp->compact != NULL) 04172 ret = xmlRegCompactPushString(exec, exec->comp, str, data); 04173 else 04174 ret = xmlRegExecPushStringInternal(exec, str, data, 1); 04175 04176 if (str != buf) 04177 xmlFree(str); 04178 return(ret); 04179 } 04180 04195 static int 04196 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, 04197 int *nbval, int *nbneg, 04198 xmlChar **values, int *terminal) { 04199 int maxval; 04200 int nb = 0; 04201 04202 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 04203 (values == NULL) || (*nbval <= 0)) 04204 return(-1); 04205 04206 maxval = *nbval; 04207 *nbval = 0; 04208 *nbneg = 0; 04209 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { 04210 xmlRegexpPtr comp; 04211 int target, i, state; 04212 04213 comp = exec->comp; 04214 04215 if (err) { 04216 if (exec->errStateNo == -1) return(-1); 04217 state = exec->errStateNo; 04218 } else { 04219 state = exec->index; 04220 } 04221 if (terminal != NULL) { 04222 if (comp->compact[state * (comp->nbstrings + 1)] == 04223 XML_REGEXP_FINAL_STATE) 04224 *terminal = 1; 04225 else 04226 *terminal = 0; 04227 } 04228 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 04229 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 04230 if ((target > 0) && (target <= comp->nbstates) && 04231 (comp->compact[(target - 1) * (comp->nbstrings + 1)] != 04232 XML_REGEXP_SINK_STATE)) { 04233 values[nb++] = comp->stringMap[i]; 04234 (*nbval)++; 04235 } 04236 } 04237 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 04238 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 04239 if ((target > 0) && (target <= comp->nbstates) && 04240 (comp->compact[(target - 1) * (comp->nbstrings + 1)] == 04241 XML_REGEXP_SINK_STATE)) { 04242 values[nb++] = comp->stringMap[i]; 04243 (*nbneg)++; 04244 } 04245 } 04246 } else { 04247 int transno; 04248 xmlRegTransPtr trans; 04249 xmlRegAtomPtr atom; 04250 xmlRegStatePtr state; 04251 04252 if (terminal != NULL) { 04253 if (exec->state->type == XML_REGEXP_FINAL_STATE) 04254 *terminal = 1; 04255 else 04256 *terminal = 0; 04257 } 04258 04259 if (err) { 04260 if (exec->errState == NULL) return(-1); 04261 state = exec->errState; 04262 } else { 04263 if (exec->state == NULL) return(-1); 04264 state = exec->state; 04265 } 04266 for (transno = 0; 04267 (transno < state->nbTrans) && (nb < maxval); 04268 transno++) { 04269 trans = &state->trans[transno]; 04270 if (trans->to < 0) 04271 continue; 04272 atom = trans->atom; 04273 if ((atom == NULL) || (atom->valuep == NULL)) 04274 continue; 04275 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 04276 /* this should not be reached but ... */ 04277 TODO; 04278 } else if (trans->count == REGEXP_ALL_COUNTER) { 04279 /* this should not be reached but ... */ 04280 TODO; 04281 } else if (trans->counter >= 0) { 04282 xmlRegCounterPtr counter = NULL; 04283 int count; 04284 04285 if (err) 04286 count = exec->errCounts[trans->counter]; 04287 else 04288 count = exec->counts[trans->counter]; 04289 if (exec->comp != NULL) 04290 counter = &exec->comp->counters[trans->counter]; 04291 if ((counter == NULL) || (count < counter->max)) { 04292 if (atom->neg) 04293 values[nb++] = (xmlChar *) atom->valuep2; 04294 else 04295 values[nb++] = (xmlChar *) atom->valuep; 04296 (*nbval)++; 04297 } 04298 } else { 04299 if ((exec->comp->states[trans->to] != NULL) && 04300 (exec->comp->states[trans->to]->type != 04301 XML_REGEXP_SINK_STATE)) { 04302 if (atom->neg) 04303 values[nb++] = (xmlChar *) atom->valuep2; 04304 else 04305 values[nb++] = (xmlChar *) atom->valuep; 04306 (*nbval)++; 04307 } 04308 } 04309 } 04310 for (transno = 0; 04311 (transno < state->nbTrans) && (nb < maxval); 04312 transno++) { 04313 trans = &state->trans[transno]; 04314 if (trans->to < 0) 04315 continue; 04316 atom = trans->atom; 04317 if ((atom == NULL) || (atom->valuep == NULL)) 04318 continue; 04319 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 04320 continue; 04321 } else if (trans->count == REGEXP_ALL_COUNTER) { 04322 continue; 04323 } else if (trans->counter >= 0) { 04324 continue; 04325 } else { 04326 if ((exec->comp->states[trans->to] != NULL) && 04327 (exec->comp->states[trans->to]->type == 04328 XML_REGEXP_SINK_STATE)) { 04329 if (atom->neg) 04330 values[nb++] = (xmlChar *) atom->valuep2; 04331 else 04332 values[nb++] = (xmlChar *) atom->valuep; 04333 (*nbneg)++; 04334 } 04335 } 04336 } 04337 } 04338 return(0); 04339 } 04340 04358 int 04359 xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg, 04360 xmlChar **values, int *terminal) { 04361 return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal)); 04362 } 04363 04383 int 04384 xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string, 04385 int *nbval, int *nbneg, xmlChar **values, int *terminal) { 04386 if (exec == NULL) 04387 return(-1); 04388 if (string != NULL) { 04389 if (exec->status != 0) 04390 *string = exec->errString; 04391 else 04392 *string = NULL; 04393 } 04394 return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal)); 04395 } 04396 04397 #ifdef DEBUG_ERR 04398 static void testerr(xmlRegExecCtxtPtr exec) { 04399 const xmlChar *string; 04400 xmlChar *values[5]; 04401 int nb = 5; 04402 int nbneg; 04403 int terminal; 04404 xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal); 04405 } 04406 #endif 04407 04408 #if 0 04409 static int 04410 xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { 04411 xmlRegTransPtr trans; 04412 xmlRegAtomPtr atom; 04413 int ret; 04414 int codepoint, len; 04415 04416 if (exec == NULL) 04417 return(-1); 04418 if (exec->status != 0) 04419 return(exec->status); 04420 04421 while ((exec->status == 0) && 04422 ((exec->inputString[exec->index] != 0) || 04423 (exec->state->type != XML_REGEXP_FINAL_STATE))) { 04424 04425 /* 04426 * End of input on non-terminal state, rollback, however we may 04427 * still have epsilon like transition for counted transitions 04428 * on counters, in that case don't break too early. 04429 */ 04430 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) 04431 goto rollback; 04432 04433 exec->transcount = 0; 04434 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 04435 trans = &exec->state->trans[exec->transno]; 04436 if (trans->to < 0) 04437 continue; 04438 atom = trans->atom; 04439 ret = 0; 04440 if (trans->count >= 0) { 04441 int count; 04442 xmlRegCounterPtr counter; 04443 04444 /* 04445 * A counted transition. 04446 */ 04447 04448 count = exec->counts[trans->count]; 04449 counter = &exec->comp->counters[trans->count]; 04450 #ifdef DEBUG_REGEXP_EXEC 04451 printf("testing count %d: val %d, min %d, max %d\n", 04452 trans->count, count, counter->min, counter->max); 04453 #endif 04454 ret = ((count >= counter->min) && (count <= counter->max)); 04455 } else if (atom == NULL) { 04456 fprintf(stderr, "epsilon transition left at runtime\n"); 04457 exec->status = -2; 04458 break; 04459 } else if (exec->inputString[exec->index] != 0) { 04460 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 04461 ret = xmlRegCheckCharacter(atom, codepoint); 04462 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 04463 xmlRegStatePtr to = exec->comp->states[trans->to]; 04464 04465 /* 04466 * this is a multiple input sequence 04467 */ 04468 if (exec->state->nbTrans > exec->transno + 1) { 04469 xmlFARegExecSave(exec); 04470 } 04471 exec->transcount = 1; 04472 do { 04473 /* 04474 * Try to progress as much as possible on the input 04475 */ 04476 if (exec->transcount == atom->max) { 04477 break; 04478 } 04479 exec->index += len; 04480 /* 04481 * End of input: stop here 04482 */ 04483 if (exec->inputString[exec->index] == 0) { 04484 exec->index -= len; 04485 break; 04486 } 04487 if (exec->transcount >= atom->min) { 04488 int transno = exec->transno; 04489 xmlRegStatePtr state = exec->state; 04490 04491 /* 04492 * The transition is acceptable save it 04493 */ 04494 exec->transno = -1; /* trick */ 04495 exec->state = to; 04496 xmlFARegExecSave(exec); 04497 exec->transno = transno; 04498 exec->state = state; 04499 } 04500 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 04501 len); 04502 ret = xmlRegCheckCharacter(atom, codepoint); 04503 exec->transcount++; 04504 } while (ret == 1); 04505 if (exec->transcount < atom->min) 04506 ret = 0; 04507 04508 /* 04509 * If the last check failed but one transition was found 04510 * possible, rollback 04511 */ 04512 if (ret < 0) 04513 ret = 0; 04514 if (ret == 0) { 04515 goto rollback; 04516 } 04517 } 04518 } 04519 if (ret == 1) { 04520 if (exec->state->nbTrans > exec->transno + 1) { 04521 xmlFARegExecSave(exec); 04522 } 04523 /* 04524 * restart count for expressions like this ((abc){2})* 04525 */ 04526 if (trans->count >= 0) { 04527 #ifdef DEBUG_REGEXP_EXEC 04528 printf("Reset count %d\n", trans->count); 04529 #endif 04530 exec->counts[trans->count] = 0; 04531 } 04532 if (trans->counter >= 0) { 04533 #ifdef DEBUG_REGEXP_EXEC 04534 printf("Increasing count %d\n", trans->counter); 04535 #endif 04536 exec->counts[trans->counter]++; 04537 } 04538 #ifdef DEBUG_REGEXP_EXEC 04539 printf("entering state %d\n", trans->to); 04540 #endif 04541 exec->state = exec->comp->states[trans->to]; 04542 exec->transno = 0; 04543 if (trans->atom != NULL) { 04544 exec->index += len; 04545 } 04546 goto progress; 04547 } else if (ret < 0) { 04548 exec->status = -4; 04549 break; 04550 } 04551 } 04552 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 04553 rollback: 04554 /* 04555 * Failed to find a way out 04556 */ 04557 exec->determinist = 0; 04558 xmlFARegExecRollBack(exec); 04559 } 04560 progress: 04561 continue; 04562 } 04563 } 04564 #endif 04565 /************************************************************************ 04566 * * 04567 * Parser for the Schemas Datatype Regular Expressions * 04568 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * 04569 * * 04570 ************************************************************************/ 04571 04578 static int 04579 xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { 04580 int cur; 04581 int len; 04582 04583 cur = CUR_SCHAR(ctxt->cur, len); 04584 if ((cur == '.') || (cur == '\\') || (cur == '?') || 04585 (cur == '*') || (cur == '+') || (cur == '(') || 04586 (cur == ')') || (cur == '|') || (cur == 0x5B) || 04587 (cur == 0x5D) || (cur == 0)) 04588 return(-1); 04589 return(cur); 04590 } 04591 04608 static void 04609 xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { 04610 int cur; 04611 xmlRegAtomType type = (xmlRegAtomType) 0; 04612 xmlChar *blockName = NULL; 04613 04614 cur = CUR; 04615 if (cur == 'L') { 04616 NEXT; 04617 cur = CUR; 04618 if (cur == 'u') { 04619 NEXT; 04620 type = XML_REGEXP_LETTER_UPPERCASE; 04621 } else if (cur == 'l') { 04622 NEXT; 04623 type = XML_REGEXP_LETTER_LOWERCASE; 04624 } else if (cur == 't') { 04625 NEXT; 04626 type = XML_REGEXP_LETTER_TITLECASE; 04627 } else if (cur == 'm') { 04628 NEXT; 04629 type = XML_REGEXP_LETTER_MODIFIER; 04630 } else if (cur == 'o') { 04631 NEXT; 04632 type = XML_REGEXP_LETTER_OTHERS; 04633 } else { 04634 type = XML_REGEXP_LETTER; 04635 } 04636 } else if (cur == 'M') { 04637 NEXT; 04638 cur = CUR; 04639 if (cur == 'n') { 04640 NEXT; 04641 /* nonspacing */ 04642 type = XML_REGEXP_MARK_NONSPACING; 04643 } else if (cur == 'c') { 04644 NEXT; 04645 /* spacing combining */ 04646 type = XML_REGEXP_MARK_SPACECOMBINING; 04647 } else if (cur == 'e') { 04648 NEXT; 04649 /* enclosing */ 04650 type = XML_REGEXP_MARK_ENCLOSING; 04651 } else { 04652 /* all marks */ 04653 type = XML_REGEXP_MARK; 04654 } 04655 } else if (cur == 'N') { 04656 NEXT; 04657 cur = CUR; 04658 if (cur == 'd') { 04659 NEXT; 04660 /* digital */ 04661 type = XML_REGEXP_NUMBER_DECIMAL; 04662 } else if (cur == 'l') { 04663 NEXT; 04664 /* letter */ 04665 type = XML_REGEXP_NUMBER_LETTER; 04666 } else if (cur == 'o') { 04667 NEXT; 04668 /* other */ 04669 type = XML_REGEXP_NUMBER_OTHERS; 04670 } else { 04671 /* all numbers */ 04672 type = XML_REGEXP_NUMBER; 04673 } 04674 } else if (cur == 'P') { 04675 NEXT; 04676 cur = CUR; 04677 if (cur == 'c') { 04678 NEXT; 04679 /* connector */ 04680 type = XML_REGEXP_PUNCT_CONNECTOR; 04681 } else if (cur == 'd') { 04682 NEXT; 04683 /* dash */ 04684 type = XML_REGEXP_PUNCT_DASH; 04685 } else if (cur == 's') { 04686 NEXT; 04687 /* open */ 04688 type = XML_REGEXP_PUNCT_OPEN; 04689 } else if (cur == 'e') { 04690 NEXT; 04691 /* close */ 04692 type = XML_REGEXP_PUNCT_CLOSE; 04693 } else if (cur == 'i') { 04694 NEXT; 04695 /* initial quote */ 04696 type = XML_REGEXP_PUNCT_INITQUOTE; 04697 } else if (cur == 'f') { 04698 NEXT; 04699 /* final quote */ 04700 type = XML_REGEXP_PUNCT_FINQUOTE; 04701 } else if (cur == 'o') { 04702 NEXT; 04703 /* other */ 04704 type = XML_REGEXP_PUNCT_OTHERS; 04705 } else { 04706 /* all punctuation */ 04707 type = XML_REGEXP_PUNCT; 04708 } 04709 } else if (cur == 'Z') { 04710 NEXT; 04711 cur = CUR; 04712 if (cur == 's') { 04713 NEXT; 04714 /* space */ 04715 type = XML_REGEXP_SEPAR_SPACE; 04716 } else if (cur == 'l') { 04717 NEXT; 04718 /* line */ 04719 type = XML_REGEXP_SEPAR_LINE; 04720 } else if (cur == 'p') { 04721 NEXT; 04722 /* paragraph */ 04723 type = XML_REGEXP_SEPAR_PARA; 04724 } else { 04725 /* all separators */ 04726 type = XML_REGEXP_SEPAR; 04727 } 04728 } else if (cur == 'S') { 04729 NEXT; 04730 cur = CUR; 04731 if (cur == 'm') { 04732 NEXT; 04733 type = XML_REGEXP_SYMBOL_MATH; 04734 /* math */ 04735 } else if (cur == 'c') { 04736 NEXT; 04737 type = XML_REGEXP_SYMBOL_CURRENCY; 04738 /* currency */ 04739 } else if (cur == 'k') { 04740 NEXT; 04741 type = XML_REGEXP_SYMBOL_MODIFIER; 04742 /* modifiers */ 04743 } else if (cur == 'o') { 04744 NEXT; 04745 type = XML_REGEXP_SYMBOL_OTHERS; 04746 /* other */ 04747 } else { 04748 /* all symbols */ 04749 type = XML_REGEXP_SYMBOL; 04750 } 04751 } else if (cur == 'C') { 04752 NEXT; 04753 cur = CUR; 04754 if (cur == 'c') { 04755 NEXT; 04756 /* control */ 04757 type = XML_REGEXP_OTHER_CONTROL; 04758 } else if (cur == 'f') { 04759 NEXT; 04760 /* format */ 04761 type = XML_REGEXP_OTHER_FORMAT; 04762 } else if (cur == 'o') { 04763 NEXT; 04764 /* private use */ 04765 type = XML_REGEXP_OTHER_PRIVATE; 04766 } else if (cur == 'n') { 04767 NEXT; 04768 /* not assigned */ 04769 type = XML_REGEXP_OTHER_NA; 04770 } else { 04771 /* all others */ 04772 type = XML_REGEXP_OTHER; 04773 } 04774 } else if (cur == 'I') { 04775 const xmlChar *start; 04776 NEXT; 04777 cur = CUR; 04778 if (cur != 's') { 04779 ERROR("IsXXXX expected"); 04780 return; 04781 } 04782 NEXT; 04783 start = ctxt->cur; 04784 cur = CUR; 04785 if (((cur >= 'a') && (cur <= 'z')) || 04786 ((cur >= 'A') && (cur <= 'Z')) || 04787 ((cur >= '0') && (cur <= '9')) || 04788 (cur == 0x2D)) { 04789 NEXT; 04790 cur = CUR; 04791 while (((cur >= 'a') && (cur <= 'z')) || 04792 ((cur >= 'A') && (cur <= 'Z')) || 04793 ((cur >= '0') && (cur <= '9')) || 04794 (cur == 0x2D)) { 04795 NEXT; 04796 cur = CUR; 04797 } 04798 } 04799 type = XML_REGEXP_BLOCK_NAME; 04800 blockName = xmlStrndup(start, ctxt->cur - start); 04801 } else { 04802 ERROR("Unknown char property"); 04803 return; 04804 } 04805 if (ctxt->atom == NULL) { 04806 ctxt->atom = xmlRegNewAtom(ctxt, type); 04807 if (ctxt->atom != NULL) 04808 ctxt->atom->valuep = blockName; 04809 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 04810 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 04811 type, 0, 0, blockName); 04812 } 04813 } 04814 04825 static void 04826 xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { 04827 int cur; 04828 04829 if (CUR == '.') { 04830 if (ctxt->atom == NULL) { 04831 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); 04832 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 04833 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 04834 XML_REGEXP_ANYCHAR, 0, 0, NULL); 04835 } 04836 NEXT; 04837 return; 04838 } 04839 if (CUR != '\\') { 04840 ERROR("Escaped sequence: expecting \\"); 04841 return; 04842 } 04843 NEXT; 04844 cur = CUR; 04845 if (cur == 'p') { 04846 NEXT; 04847 if (CUR != '{') { 04848 ERROR("Expecting '{'"); 04849 return; 04850 } 04851 NEXT; 04852 xmlFAParseCharProp(ctxt); 04853 if (CUR != '}') { 04854 ERROR("Expecting '}'"); 04855 return; 04856 } 04857 NEXT; 04858 } else if (cur == 'P') { 04859 NEXT; 04860 if (CUR != '{') { 04861 ERROR("Expecting '{'"); 04862 return; 04863 } 04864 NEXT; 04865 xmlFAParseCharProp(ctxt); 04866 ctxt->atom->neg = 1; 04867 if (CUR != '}') { 04868 ERROR("Expecting '}'"); 04869 return; 04870 } 04871 NEXT; 04872 } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || 04873 (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || 04874 (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || 04875 (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || 04876 (cur == 0x5E)) { 04877 if (ctxt->atom == NULL) { 04878 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 04879 if (ctxt->atom != NULL) { 04880 switch (cur) { 04881 case 'n': 04882 ctxt->atom->codepoint = '\n'; 04883 break; 04884 case 'r': 04885 ctxt->atom->codepoint = '\r'; 04886 break; 04887 case 't': 04888 ctxt->atom->codepoint = '\t'; 04889 break; 04890 default: 04891 ctxt->atom->codepoint = cur; 04892 } 04893 } 04894 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 04895 switch (cur) { 04896 case 'n': 04897 cur = '\n'; 04898 break; 04899 case 'r': 04900 cur = '\r'; 04901 break; 04902 case 't': 04903 cur = '\t'; 04904 break; 04905 } 04906 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 04907 XML_REGEXP_CHARVAL, cur, cur, NULL); 04908 } 04909 NEXT; 04910 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || 04911 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || 04912 (cur == 'w') || (cur == 'W')) { 04913 xmlRegAtomType type = XML_REGEXP_ANYSPACE; 04914 04915 switch (cur) { 04916 case 's': 04917 type = XML_REGEXP_ANYSPACE; 04918 break; 04919 case 'S': 04920 type = XML_REGEXP_NOTSPACE; 04921 break; 04922 case 'i': 04923 type = XML_REGEXP_INITNAME; 04924 break; 04925 case 'I': 04926 type = XML_REGEXP_NOTINITNAME; 04927 break; 04928 case 'c': 04929 type = XML_REGEXP_NAMECHAR; 04930 break; 04931 case 'C': 04932 type = XML_REGEXP_NOTNAMECHAR; 04933 break; 04934 case 'd': 04935 type = XML_REGEXP_DECIMAL; 04936 break; 04937 case 'D': 04938 type = XML_REGEXP_NOTDECIMAL; 04939 break; 04940 case 'w': 04941 type = XML_REGEXP_REALCHAR; 04942 break; 04943 case 'W': 04944 type = XML_REGEXP_NOTREALCHAR; 04945 break; 04946 } 04947 NEXT; 04948 if (ctxt->atom == NULL) { 04949 ctxt->atom = xmlRegNewAtom(ctxt, type); 04950 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 04951 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 04952 type, 0, 0, NULL); 04953 } 04954 } else { 04955 ERROR("Wrong escape sequence, misuse of character '\\'"); 04956 } 04957 } 04958 04969 static void 04970 xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { 04971 int cur, len; 04972 int start = -1; 04973 int end = -1; 04974 04975 if (CUR == '\0') { 04976 ERROR("Expecting ']'"); 04977 return; 04978 } 04979 04980 cur = CUR; 04981 if (cur == '\\') { 04982 NEXT; 04983 cur = CUR; 04984 switch (cur) { 04985 case 'n': start = 0xA; break; 04986 case 'r': start = 0xD; break; 04987 case 't': start = 0x9; break; 04988 case '\\': case '|': case '.': case '-': case '^': case '?': 04989 case '*': case '+': case '{': case '}': case '(': case ')': 04990 case '[': case ']': 04991 start = cur; break; 04992 default: 04993 ERROR("Invalid escape value"); 04994 return; 04995 } 04996 end = start; 04997 len = 1; 04998 } else if ((cur != 0x5B) && (cur != 0x5D)) { 04999 end = start = CUR_SCHAR(ctxt->cur, len); 05000 } else { 05001 ERROR("Expecting a char range"); 05002 return; 05003 } 05004 /* 05005 * Since we are "inside" a range, we can assume ctxt->cur is past 05006 * the start of ctxt->string, and PREV should be safe 05007 */ 05008 if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) { 05009 NEXTL(len); 05010 return; 05011 } 05012 NEXTL(len); 05013 cur = CUR; 05014 if ((cur != '-') || (NXT(1) == ']')) { 05015 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 05016 XML_REGEXP_CHARVAL, start, end, NULL); 05017 return; 05018 } 05019 NEXT; 05020 cur = CUR; 05021 if (cur == '\\') { 05022 NEXT; 05023 cur = CUR; 05024 switch (cur) { 05025 case 'n': end = 0xA; break; 05026 case 'r': end = 0xD; break; 05027 case 't': end = 0x9; break; 05028 case '\\': case '|': case '.': case '-': case '^': case '?': 05029 case '*': case '+': case '{': case '}': case '(': case ')': 05030 case '[': case ']': 05031 end = cur; break; 05032 default: 05033 ERROR("Invalid escape value"); 05034 return; 05035 } 05036 len = 1; 05037 } else if ((cur != 0x5B) && (cur != 0x5D)) { 05038 end = CUR_SCHAR(ctxt->cur, len); 05039 } else { 05040 ERROR("Expecting the end of a char range"); 05041 return; 05042 } 05043 NEXTL(len); 05044 /* TODO check that the values are acceptable character ranges for XML */ 05045 if (end < start) { 05046 ERROR("End of range is before start of range"); 05047 } else { 05048 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 05049 XML_REGEXP_CHARVAL, start, end, NULL); 05050 } 05051 return; 05052 } 05053 05060 static void 05061 xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { 05062 do { 05063 if (CUR == '\\') { 05064 xmlFAParseCharClassEsc(ctxt); 05065 } else { 05066 xmlFAParseCharRange(ctxt); 05067 } 05068 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && 05069 (CUR != 0) && (ctxt->error == 0)); 05070 } 05071 05081 static void 05082 xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { 05083 int n = ctxt->neg; 05084 while ((CUR != ']') && (ctxt->error == 0)) { 05085 if (CUR == '^') { 05086 int neg = ctxt->neg; 05087 05088 NEXT; 05089 ctxt->neg = !ctxt->neg; 05090 xmlFAParsePosCharGroup(ctxt); 05091 ctxt->neg = neg; 05092 } else if ((CUR == '-') && (NXT(1) == '[')) { 05093 int neg = ctxt->neg; 05094 ctxt->neg = 2; 05095 NEXT; /* eat the '-' */ 05096 NEXT; /* eat the '[' */ 05097 xmlFAParseCharGroup(ctxt); 05098 if (CUR == ']') { 05099 NEXT; 05100 } else { 05101 ERROR("charClassExpr: ']' expected"); 05102 break; 05103 } 05104 ctxt->neg = neg; 05105 break; 05106 } else if (CUR != ']') { 05107 xmlFAParsePosCharGroup(ctxt); 05108 } 05109 } 05110 ctxt->neg = n; 05111 } 05112 05120 static void 05121 xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { 05122 if (CUR == '[') { 05123 NEXT; 05124 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); 05125 if (ctxt->atom == NULL) 05126 return; 05127 xmlFAParseCharGroup(ctxt); 05128 if (CUR == ']') { 05129 NEXT; 05130 } else { 05131 ERROR("xmlFAParseCharClass: ']' expected"); 05132 } 05133 } else { 05134 xmlFAParseCharClassEsc(ctxt); 05135 } 05136 } 05137 05146 static int 05147 xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { 05148 int ret = 0; 05149 int ok = 0; 05150 05151 while ((CUR >= '0') && (CUR <= '9')) { 05152 ret = ret * 10 + (CUR - '0'); 05153 ok = 1; 05154 NEXT; 05155 } 05156 if (ok != 1) { 05157 return(-1); 05158 } 05159 return(ret); 05160 } 05161 05172 static int 05173 xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { 05174 int cur; 05175 05176 cur = CUR; 05177 if ((cur == '?') || (cur == '*') || (cur == '+')) { 05178 if (ctxt->atom != NULL) { 05179 if (cur == '?') 05180 ctxt->atom->quant = XML_REGEXP_QUANT_OPT; 05181 else if (cur == '*') 05182 ctxt->atom->quant = XML_REGEXP_QUANT_MULT; 05183 else if (cur == '+') 05184 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; 05185 } 05186 NEXT; 05187 return(1); 05188 } 05189 if (cur == '{') { 05190 int min = 0, max = 0; 05191 05192 NEXT; 05193 cur = xmlFAParseQuantExact(ctxt); 05194 if (cur >= 0) 05195 min = cur; 05196 if (CUR == ',') { 05197 NEXT; 05198 if (CUR == '}') 05199 max = INT_MAX; 05200 else { 05201 cur = xmlFAParseQuantExact(ctxt); 05202 if (cur >= 0) 05203 max = cur; 05204 else { 05205 ERROR("Improper quantifier"); 05206 } 05207 } 05208 } 05209 if (CUR == '}') { 05210 NEXT; 05211 } else { 05212 ERROR("Unterminated quantifier"); 05213 } 05214 if (max == 0) 05215 max = min; 05216 if (ctxt->atom != NULL) { 05217 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; 05218 ctxt->atom->min = min; 05219 ctxt->atom->max = max; 05220 } 05221 return(1); 05222 } 05223 return(0); 05224 } 05225 05232 static int 05233 xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { 05234 int codepoint, len; 05235 05236 codepoint = xmlFAIsChar(ctxt); 05237 if (codepoint > 0) { 05238 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 05239 if (ctxt->atom == NULL) 05240 return(-1); 05241 codepoint = CUR_SCHAR(ctxt->cur, len); 05242 ctxt->atom->codepoint = codepoint; 05243 NEXTL(len); 05244 return(1); 05245 } else if (CUR == '|') { 05246 return(0); 05247 } else if (CUR == 0) { 05248 return(0); 05249 } else if (CUR == ')') { 05250 return(0); 05251 } else if (CUR == '(') { 05252 xmlRegStatePtr start, oldend, start0; 05253 05254 NEXT; 05255 /* 05256 * this extra Epsilon transition is needed if we count with 0 allowed 05257 * unfortunately this can't be known at that point 05258 */ 05259 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); 05260 start0 = ctxt->state; 05261 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); 05262 start = ctxt->state; 05263 oldend = ctxt->end; 05264 ctxt->end = NULL; 05265 ctxt->atom = NULL; 05266 xmlFAParseRegExp(ctxt, 0); 05267 if (CUR == ')') { 05268 NEXT; 05269 } else { 05270 ERROR("xmlFAParseAtom: expecting ')'"); 05271 } 05272 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); 05273 if (ctxt->atom == NULL) 05274 return(-1); 05275 ctxt->atom->start = start; 05276 ctxt->atom->start0 = start0; 05277 ctxt->atom->stop = ctxt->state; 05278 ctxt->end = oldend; 05279 return(1); 05280 } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { 05281 xmlFAParseCharClass(ctxt); 05282 return(1); 05283 } 05284 return(0); 05285 } 05286 05293 static int 05294 xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { 05295 int ret; 05296 05297 ctxt->atom = NULL; 05298 ret = xmlFAParseAtom(ctxt); 05299 if (ret == 0) 05300 return(0); 05301 if (ctxt->atom == NULL) { 05302 ERROR("internal: no atom generated"); 05303 } 05304 xmlFAParseQuantifier(ctxt); 05305 return(1); 05306 } 05307 05318 static int 05319 xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { 05320 xmlRegStatePtr previous; 05321 int ret; 05322 05323 previous = ctxt->state; 05324 ret = xmlFAParsePiece(ctxt); 05325 if (ret != 0) { 05326 if (xmlFAGenerateTransitions(ctxt, previous, 05327 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) 05328 return(-1); 05329 previous = ctxt->state; 05330 ctxt->atom = NULL; 05331 } 05332 while ((ret != 0) && (ctxt->error == 0)) { 05333 ret = xmlFAParsePiece(ctxt); 05334 if (ret != 0) { 05335 if (xmlFAGenerateTransitions(ctxt, previous, 05336 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) 05337 return(-1); 05338 previous = ctxt->state; 05339 ctxt->atom = NULL; 05340 } 05341 } 05342 return(0); 05343 } 05344 05352 static void 05353 xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { 05354 xmlRegStatePtr start, end; 05355 05356 /* if not top start should have been generated by an epsilon trans */ 05357 start = ctxt->state; 05358 ctxt->end = NULL; 05359 xmlFAParseBranch(ctxt, NULL); 05360 if (top) { 05361 #ifdef DEBUG_REGEXP_GRAPH 05362 printf("State %d is final\n", ctxt->state->no); 05363 #endif 05364 ctxt->state->type = XML_REGEXP_FINAL_STATE; 05365 } 05366 if (CUR != '|') { 05367 ctxt->end = ctxt->state; 05368 return; 05369 } 05370 end = ctxt->state; 05371 while ((CUR == '|') && (ctxt->error == 0)) { 05372 NEXT; 05373 ctxt->state = start; 05374 ctxt->end = NULL; 05375 xmlFAParseBranch(ctxt, end); 05376 } 05377 if (!top) { 05378 ctxt->state = end; 05379 ctxt->end = end; 05380 } 05381 } 05382 05383 /************************************************************************ 05384 * * 05385 * The basic API * 05386 * * 05387 ************************************************************************/ 05388 05396 void 05397 xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { 05398 int i; 05399 05400 if (output == NULL) 05401 return; 05402 fprintf(output, " regexp: "); 05403 if (regexp == NULL) { 05404 fprintf(output, "NULL\n"); 05405 return; 05406 } 05407 fprintf(output, "'%s' ", regexp->string); 05408 fprintf(output, "\n"); 05409 fprintf(output, "%d atoms:\n", regexp->nbAtoms); 05410 for (i = 0;i < regexp->nbAtoms; i++) { 05411 fprintf(output, " %02d ", i); 05412 xmlRegPrintAtom(output, regexp->atoms[i]); 05413 } 05414 fprintf(output, "%d states:", regexp->nbStates); 05415 fprintf(output, "\n"); 05416 for (i = 0;i < regexp->nbStates; i++) { 05417 xmlRegPrintState(output, regexp->states[i]); 05418 } 05419 fprintf(output, "%d counters:\n", regexp->nbCounters); 05420 for (i = 0;i < regexp->nbCounters; i++) { 05421 fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, 05422 regexp->counters[i].max); 05423 } 05424 } 05425 05436 xmlRegexpPtr 05437 xmlRegexpCompile(const xmlChar *regexp) { 05438 xmlRegexpPtr ret; 05439 xmlRegParserCtxtPtr ctxt; 05440 05441 ctxt = xmlRegNewParserCtxt(regexp); 05442 if (ctxt == NULL) 05443 return(NULL); 05444 05445 /* initialize the parser */ 05446 ctxt->end = NULL; 05447 ctxt->start = ctxt->state = xmlRegNewState(ctxt); 05448 xmlRegStatePush(ctxt, ctxt->start); 05449 05450 /* parse the expression building an automata */ 05451 xmlFAParseRegExp(ctxt, 1); 05452 if (CUR != 0) { 05453 ERROR("xmlFAParseRegExp: extra characters"); 05454 } 05455 if (ctxt->error != 0) { 05456 xmlRegFreeParserCtxt(ctxt); 05457 return(NULL); 05458 } 05459 ctxt->end = ctxt->state; 05460 ctxt->start->type = XML_REGEXP_START_STATE; 05461 ctxt->end->type = XML_REGEXP_FINAL_STATE; 05462 05463 /* remove the Epsilon except for counted transitions */ 05464 xmlFAEliminateEpsilonTransitions(ctxt); 05465 05466 05467 if (ctxt->error != 0) { 05468 xmlRegFreeParserCtxt(ctxt); 05469 return(NULL); 05470 } 05471 ret = xmlRegEpxFromParse(ctxt); 05472 xmlRegFreeParserCtxt(ctxt); 05473 return(ret); 05474 } 05475 05485 int 05486 xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { 05487 if ((comp == NULL) || (content == NULL)) 05488 return(-1); 05489 return(xmlFARegExec(comp, content)); 05490 } 05491 05500 int 05501 xmlRegexpIsDeterminist(xmlRegexpPtr comp) { 05502 xmlAutomataPtr am; 05503 int ret; 05504 05505 if (comp == NULL) 05506 return(-1); 05507 if (comp->determinist != -1) 05508 return(comp->determinist); 05509 05510 am = xmlNewAutomata(); 05511 if (am->states != NULL) { 05512 int i; 05513 05514 for (i = 0;i < am->nbStates;i++) 05515 xmlRegFreeState(am->states[i]); 05516 xmlFree(am->states); 05517 } 05518 am->nbAtoms = comp->nbAtoms; 05519 am->atoms = comp->atoms; 05520 am->nbStates = comp->nbStates; 05521 am->states = comp->states; 05522 am->determinist = -1; 05523 am->flags = comp->flags; 05524 ret = xmlFAComputesDeterminism(am); 05525 am->atoms = NULL; 05526 am->states = NULL; 05527 xmlFreeAutomata(am); 05528 comp->determinist = ret; 05529 return(ret); 05530 } 05531 05538 void 05539 xmlRegFreeRegexp(xmlRegexpPtr regexp) { 05540 int i; 05541 if (regexp == NULL) 05542 return; 05543 05544 if (regexp->string != NULL) 05545 xmlFree(regexp->string); 05546 if (regexp->states != NULL) { 05547 for (i = 0;i < regexp->nbStates;i++) 05548 xmlRegFreeState(regexp->states[i]); 05549 xmlFree(regexp->states); 05550 } 05551 if (regexp->atoms != NULL) { 05552 for (i = 0;i < regexp->nbAtoms;i++) 05553 xmlRegFreeAtom(regexp->atoms[i]); 05554 xmlFree(regexp->atoms); 05555 } 05556 if (regexp->counters != NULL) 05557 xmlFree(regexp->counters); 05558 if (regexp->compact != NULL) 05559 xmlFree(regexp->compact); 05560 if (regexp->transdata != NULL) 05561 xmlFree(regexp->transdata); 05562 if (regexp->stringMap != NULL) { 05563 for (i = 0; i < regexp->nbstrings;i++) 05564 xmlFree(regexp->stringMap[i]); 05565 xmlFree(regexp->stringMap); 05566 } 05567 05568 xmlFree(regexp); 05569 } 05570 05571 #ifdef LIBXML_AUTOMATA_ENABLED 05572 /************************************************************************ 05573 * * 05574 * The Automata interface * 05575 * * 05576 ************************************************************************/ 05577 05585 xmlAutomataPtr 05586 xmlNewAutomata(void) { 05587 xmlAutomataPtr ctxt; 05588 05589 ctxt = xmlRegNewParserCtxt(NULL); 05590 if (ctxt == NULL) 05591 return(NULL); 05592 05593 /* initialize the parser */ 05594 ctxt->end = NULL; 05595 ctxt->start = ctxt->state = xmlRegNewState(ctxt); 05596 if (ctxt->start == NULL) { 05597 xmlFreeAutomata(ctxt); 05598 return(NULL); 05599 } 05600 ctxt->start->type = XML_REGEXP_START_STATE; 05601 if (xmlRegStatePush(ctxt, ctxt->start) < 0) { 05602 xmlRegFreeState(ctxt->start); 05603 xmlFreeAutomata(ctxt); 05604 return(NULL); 05605 } 05606 ctxt->flags = 0; 05607 05608 return(ctxt); 05609 } 05610 05617 void 05618 xmlFreeAutomata(xmlAutomataPtr am) { 05619 if (am == NULL) 05620 return; 05621 xmlRegFreeParserCtxt(am); 05622 } 05623 05631 void 05632 xmlAutomataSetFlags(xmlAutomataPtr am, int flags) { 05633 if (am == NULL) 05634 return; 05635 am->flags |= flags; 05636 } 05637 05646 xmlAutomataStatePtr 05647 xmlAutomataGetInitState(xmlAutomataPtr am) { 05648 if (am == NULL) 05649 return(NULL); 05650 return(am->start); 05651 } 05652 05662 int 05663 xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { 05664 if ((am == NULL) || (state == NULL)) 05665 return(-1); 05666 state->type = XML_REGEXP_FINAL_STATE; 05667 return(0); 05668 } 05669 05684 xmlAutomataStatePtr 05685 xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, 05686 xmlAutomataStatePtr to, const xmlChar *token, 05687 void *data) { 05688 xmlRegAtomPtr atom; 05689 05690 if ((am == NULL) || (from == NULL) || (token == NULL)) 05691 return(NULL); 05692 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 05693 if (atom == NULL) 05694 return(NULL); 05695 atom->data = data; 05696 if (atom == NULL) 05697 return(NULL); 05698 atom->valuep = xmlStrdup(token); 05699 05700 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 05701 xmlRegFreeAtom(atom); 05702 return(NULL); 05703 } 05704 if (to == NULL) 05705 return(am->state); 05706 return(to); 05707 } 05708 05724 xmlAutomataStatePtr 05725 xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from, 05726 xmlAutomataStatePtr to, const xmlChar *token, 05727 const xmlChar *token2, void *data) { 05728 xmlRegAtomPtr atom; 05729 05730 if ((am == NULL) || (from == NULL) || (token == NULL)) 05731 return(NULL); 05732 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 05733 if (atom == NULL) 05734 return(NULL); 05735 atom->data = data; 05736 if ((token2 == NULL) || (*token2 == 0)) { 05737 atom->valuep = xmlStrdup(token); 05738 } else { 05739 int lenn, lenp; 05740 xmlChar *str; 05741 05742 lenn = strlen((char *) token2); 05743 lenp = strlen((char *) token); 05744 05745 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 05746 if (str == NULL) { 05747 xmlRegFreeAtom(atom); 05748 return(NULL); 05749 } 05750 memcpy(&str[0], token, lenp); 05751 str[lenp] = '|'; 05752 memcpy(&str[lenp + 1], token2, lenn); 05753 str[lenn + lenp + 1] = 0; 05754 05755 atom->valuep = str; 05756 } 05757 05758 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 05759 xmlRegFreeAtom(atom); 05760 return(NULL); 05761 } 05762 if (to == NULL) 05763 return(am->state); 05764 return(to); 05765 } 05766 05784 xmlAutomataStatePtr 05785 xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 05786 xmlAutomataStatePtr to, const xmlChar *token, 05787 const xmlChar *token2, void *data) { 05788 xmlRegAtomPtr atom; 05789 xmlChar err_msg[200]; 05790 05791 if ((am == NULL) || (from == NULL) || (token == NULL)) 05792 return(NULL); 05793 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 05794 if (atom == NULL) 05795 return(NULL); 05796 atom->data = data; 05797 atom->neg = 1; 05798 if ((token2 == NULL) || (*token2 == 0)) { 05799 atom->valuep = xmlStrdup(token); 05800 } else { 05801 int lenn, lenp; 05802 xmlChar *str; 05803 05804 lenn = strlen((char *) token2); 05805 lenp = strlen((char *) token); 05806 05807 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 05808 if (str == NULL) { 05809 xmlRegFreeAtom(atom); 05810 return(NULL); 05811 } 05812 memcpy(&str[0], token, lenp); 05813 str[lenp] = '|'; 05814 memcpy(&str[lenp + 1], token2, lenn); 05815 str[lenn + lenp + 1] = 0; 05816 05817 atom->valuep = str; 05818 } 05819 snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep); 05820 err_msg[199] = 0; 05821 atom->valuep2 = xmlStrdup(err_msg); 05822 05823 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 05824 xmlRegFreeAtom(atom); 05825 return(NULL); 05826 } 05827 am->negs++; 05828 if (to == NULL) 05829 return(am->state); 05830 return(to); 05831 } 05832 05851 xmlAutomataStatePtr 05852 xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 05853 xmlAutomataStatePtr to, const xmlChar *token, 05854 const xmlChar *token2, 05855 int min, int max, void *data) { 05856 xmlRegAtomPtr atom; 05857 int counter; 05858 05859 if ((am == NULL) || (from == NULL) || (token == NULL)) 05860 return(NULL); 05861 if (min < 0) 05862 return(NULL); 05863 if ((max < min) || (max < 1)) 05864 return(NULL); 05865 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 05866 if (atom == NULL) 05867 return(NULL); 05868 if ((token2 == NULL) || (*token2 == 0)) { 05869 atom->valuep = xmlStrdup(token); 05870 } else { 05871 int lenn, lenp; 05872 xmlChar *str; 05873 05874 lenn = strlen((char *) token2); 05875 lenp = strlen((char *) token); 05876 05877 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 05878 if (str == NULL) { 05879 xmlRegFreeAtom(atom); 05880 return(NULL); 05881 } 05882 memcpy(&str[0], token, lenp); 05883 str[lenp] = '|'; 05884 memcpy(&str[lenp + 1], token2, lenn); 05885 str[lenn + lenp + 1] = 0; 05886 05887 atom->valuep = str; 05888 } 05889 atom->data = data; 05890 if (min == 0) 05891 atom->min = 1; 05892 else 05893 atom->min = min; 05894 atom->max = max; 05895 05896 /* 05897 * associate a counter to the transition. 05898 */ 05899 counter = xmlRegGetCounter(am); 05900 am->counters[counter].min = min; 05901 am->counters[counter].max = max; 05902 05903 /* xmlFAGenerateTransitions(am, from, to, atom); */ 05904 if (to == NULL) { 05905 to = xmlRegNewState(am); 05906 xmlRegStatePush(am, to); 05907 } 05908 xmlRegStateAddTrans(am, from, atom, to, counter, -1); 05909 xmlRegAtomPush(am, atom); 05910 am->state = to; 05911 05912 if (to == NULL) 05913 to = am->state; 05914 if (to == NULL) 05915 return(NULL); 05916 if (min == 0) 05917 xmlFAGenerateEpsilonTransition(am, from, to); 05918 return(to); 05919 } 05920 05938 xmlAutomataStatePtr 05939 xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 05940 xmlAutomataStatePtr to, const xmlChar *token, 05941 int min, int max, void *data) { 05942 xmlRegAtomPtr atom; 05943 int counter; 05944 05945 if ((am == NULL) || (from == NULL) || (token == NULL)) 05946 return(NULL); 05947 if (min < 0) 05948 return(NULL); 05949 if ((max < min) || (max < 1)) 05950 return(NULL); 05951 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 05952 if (atom == NULL) 05953 return(NULL); 05954 atom->valuep = xmlStrdup(token); 05955 atom->data = data; 05956 if (min == 0) 05957 atom->min = 1; 05958 else 05959 atom->min = min; 05960 atom->max = max; 05961 05962 /* 05963 * associate a counter to the transition. 05964 */ 05965 counter = xmlRegGetCounter(am); 05966 am->counters[counter].min = min; 05967 am->counters[counter].max = max; 05968 05969 /* xmlFAGenerateTransitions(am, from, to, atom); */ 05970 if (to == NULL) { 05971 to = xmlRegNewState(am); 05972 xmlRegStatePush(am, to); 05973 } 05974 xmlRegStateAddTrans(am, from, atom, to, counter, -1); 05975 xmlRegAtomPush(am, atom); 05976 am->state = to; 05977 05978 if (to == NULL) 05979 to = am->state; 05980 if (to == NULL) 05981 return(NULL); 05982 if (min == 0) 05983 xmlFAGenerateEpsilonTransition(am, from, to); 05984 return(to); 05985 } 05986 06006 xmlAutomataStatePtr 06007 xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 06008 xmlAutomataStatePtr to, const xmlChar *token, 06009 const xmlChar *token2, 06010 int min, int max, void *data) { 06011 xmlRegAtomPtr atom; 06012 int counter; 06013 06014 if ((am == NULL) || (from == NULL) || (token == NULL)) 06015 return(NULL); 06016 if (min < 1) 06017 return(NULL); 06018 if ((max < min) || (max < 1)) 06019 return(NULL); 06020 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 06021 if (atom == NULL) 06022 return(NULL); 06023 if ((token2 == NULL) || (*token2 == 0)) { 06024 atom->valuep = xmlStrdup(token); 06025 } else { 06026 int lenn, lenp; 06027 xmlChar *str; 06028 06029 lenn = strlen((char *) token2); 06030 lenp = strlen((char *) token); 06031 06032 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 06033 if (str == NULL) { 06034 xmlRegFreeAtom(atom); 06035 return(NULL); 06036 } 06037 memcpy(&str[0], token, lenp); 06038 str[lenp] = '|'; 06039 memcpy(&str[lenp + 1], token2, lenn); 06040 str[lenn + lenp + 1] = 0; 06041 06042 atom->valuep = str; 06043 } 06044 atom->data = data; 06045 atom->quant = XML_REGEXP_QUANT_ONCEONLY; 06046 atom->min = min; 06047 atom->max = max; 06048 /* 06049 * associate a counter to the transition. 06050 */ 06051 counter = xmlRegGetCounter(am); 06052 am->counters[counter].min = 1; 06053 am->counters[counter].max = 1; 06054 06055 /* xmlFAGenerateTransitions(am, from, to, atom); */ 06056 if (to == NULL) { 06057 to = xmlRegNewState(am); 06058 xmlRegStatePush(am, to); 06059 } 06060 xmlRegStateAddTrans(am, from, atom, to, counter, -1); 06061 xmlRegAtomPush(am, atom); 06062 am->state = to; 06063 return(to); 06064 } 06065 06066 06067 06086 xmlAutomataStatePtr 06087 xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 06088 xmlAutomataStatePtr to, const xmlChar *token, 06089 int min, int max, void *data) { 06090 xmlRegAtomPtr atom; 06091 int counter; 06092 06093 if ((am == NULL) || (from == NULL) || (token == NULL)) 06094 return(NULL); 06095 if (min < 1) 06096 return(NULL); 06097 if ((max < min) || (max < 1)) 06098 return(NULL); 06099 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 06100 if (atom == NULL) 06101 return(NULL); 06102 atom->valuep = xmlStrdup(token); 06103 atom->data = data; 06104 atom->quant = XML_REGEXP_QUANT_ONCEONLY; 06105 atom->min = min; 06106 atom->max = max; 06107 /* 06108 * associate a counter to the transition. 06109 */ 06110 counter = xmlRegGetCounter(am); 06111 am->counters[counter].min = 1; 06112 am->counters[counter].max = 1; 06113 06114 /* xmlFAGenerateTransitions(am, from, to, atom); */ 06115 if (to == NULL) { 06116 to = xmlRegNewState(am); 06117 xmlRegStatePush(am, to); 06118 } 06119 xmlRegStateAddTrans(am, from, atom, to, counter, -1); 06120 xmlRegAtomPush(am, atom); 06121 am->state = to; 06122 return(to); 06123 } 06124 06133 xmlAutomataStatePtr 06134 xmlAutomataNewState(xmlAutomataPtr am) { 06135 xmlAutomataStatePtr to; 06136 06137 if (am == NULL) 06138 return(NULL); 06139 to = xmlRegNewState(am); 06140 xmlRegStatePush(am, to); 06141 return(to); 06142 } 06143 06156 xmlAutomataStatePtr 06157 xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, 06158 xmlAutomataStatePtr to) { 06159 if ((am == NULL) || (from == NULL)) 06160 return(NULL); 06161 xmlFAGenerateEpsilonTransition(am, from, to); 06162 if (to == NULL) 06163 return(am->state); 06164 return(to); 06165 } 06166 06181 xmlAutomataStatePtr 06182 xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 06183 xmlAutomataStatePtr to, int lax) { 06184 if ((am == NULL) || (from == NULL)) 06185 return(NULL); 06186 xmlFAGenerateAllTransition(am, from, to, lax); 06187 if (to == NULL) 06188 return(am->state); 06189 return(to); 06190 } 06191 06202 int 06203 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { 06204 int ret; 06205 06206 if (am == NULL) 06207 return(-1); 06208 06209 ret = xmlRegGetCounter(am); 06210 if (ret < 0) 06211 return(-1); 06212 am->counters[ret].min = min; 06213 am->counters[ret].max = max; 06214 return(ret); 06215 } 06216 06230 xmlAutomataStatePtr 06231 xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 06232 xmlAutomataStatePtr to, int counter) { 06233 if ((am == NULL) || (from == NULL) || (counter < 0)) 06234 return(NULL); 06235 xmlFAGenerateCountedEpsilonTransition(am, from, to, counter); 06236 if (to == NULL) 06237 return(am->state); 06238 return(to); 06239 } 06240 06254 xmlAutomataStatePtr 06255 xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 06256 xmlAutomataStatePtr to, int counter) { 06257 if ((am == NULL) || (from == NULL) || (counter < 0)) 06258 return(NULL); 06259 xmlFAGenerateCountedTransition(am, from, to, counter); 06260 if (to == NULL) 06261 return(am->state); 06262 return(to); 06263 } 06264 06274 xmlRegexpPtr 06275 xmlAutomataCompile(xmlAutomataPtr am) { 06276 xmlRegexpPtr ret; 06277 06278 if ((am == NULL) || (am->error != 0)) return(NULL); 06279 xmlFAEliminateEpsilonTransitions(am); 06280 /* xmlFAComputesDeterminism(am); */ 06281 ret = xmlRegEpxFromParse(am); 06282 06283 return(ret); 06284 } 06285 06294 int 06295 xmlAutomataIsDeterminist(xmlAutomataPtr am) { 06296 int ret; 06297 06298 if (am == NULL) 06299 return(-1); 06300 06301 ret = xmlFAComputesDeterminism(am); 06302 return(ret); 06303 } 06304 #endif /* LIBXML_AUTOMATA_ENABLED */ 06305 06306 #ifdef LIBXML_EXPR_ENABLED 06307 /************************************************************************ 06308 * * 06309 * Formal Expression handling code * 06310 * * 06311 ************************************************************************/ 06312 /************************************************************************ 06313 * * 06314 * Expression handling context * 06315 * * 06316 ************************************************************************/ 06317 06318 struct _xmlExpCtxt { 06319 xmlDictPtr dict; 06320 xmlExpNodePtr *table; 06321 int size; 06322 int nbElems; 06323 int nb_nodes; 06324 int maxNodes; 06325 const char *expr; 06326 const char *cur; 06327 int nb_cons; 06328 int tabSize; 06329 }; 06330 06340 xmlExpCtxtPtr 06341 xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) { 06342 xmlExpCtxtPtr ret; 06343 int size = 256; 06344 06345 if (maxNodes <= 4096) 06346 maxNodes = 4096; 06347 06348 ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt)); 06349 if (ret == NULL) 06350 return(NULL); 06351 memset(ret, 0, sizeof(xmlExpCtxt)); 06352 ret->size = size; 06353 ret->nbElems = 0; 06354 ret->maxNodes = maxNodes; 06355 ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr)); 06356 if (ret->table == NULL) { 06357 xmlFree(ret); 06358 return(NULL); 06359 } 06360 memset(ret->table, 0, size * sizeof(xmlExpNodePtr)); 06361 if (dict == NULL) { 06362 ret->dict = xmlDictCreate(); 06363 if (ret->dict == NULL) { 06364 xmlFree(ret->table); 06365 xmlFree(ret); 06366 return(NULL); 06367 } 06368 } else { 06369 ret->dict = dict; 06370 xmlDictReference(ret->dict); 06371 } 06372 return(ret); 06373 } 06374 06381 void 06382 xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) { 06383 if (ctxt == NULL) 06384 return; 06385 xmlDictFree(ctxt->dict); 06386 if (ctxt->table != NULL) 06387 xmlFree(ctxt->table); 06388 xmlFree(ctxt); 06389 } 06390 06391 /************************************************************************ 06392 * * 06393 * Structure associated to an expression node * 06394 * * 06395 ************************************************************************/ 06396 #define MAX_NODES 10000 06397 06398 /* #define DEBUG_DERIV */ 06399 06400 /* 06401 * TODO: 06402 * - Wildcards 06403 * - public API for creation 06404 * 06405 * Started 06406 * - regression testing 06407 * 06408 * Done 06409 * - split into module and test tool 06410 * - memleaks 06411 */ 06412 06413 typedef enum { 06414 XML_EXP_NILABLE = (1 << 0) 06415 } xmlExpNodeInfo; 06416 06417 #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE) 06418 06419 struct _xmlExpNode { 06420 unsigned char type;/* xmlExpNodeType */ 06421 unsigned char info;/* OR of xmlExpNodeInfo */ 06422 unsigned short key; /* the hash key */ 06423 unsigned int ref; /* The number of references */ 06424 int c_max; /* the maximum length it can consume */ 06425 xmlExpNodePtr exp_left; 06426 xmlExpNodePtr next;/* the next node in the hash table or free list */ 06427 union { 06428 struct { 06429 int f_min; 06430 int f_max; 06431 } count; 06432 struct { 06433 xmlExpNodePtr f_right; 06434 } children; 06435 const xmlChar *f_str; 06436 } field; 06437 }; 06438 06439 #define exp_min field.count.f_min 06440 #define exp_max field.count.f_max 06441 /* #define exp_left field.children.f_left */ 06442 #define exp_right field.children.f_right 06443 #define exp_str field.f_str 06444 06445 static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type); 06446 static xmlExpNode forbiddenExpNode = { 06447 XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}} 06448 }; 06449 xmlExpNodePtr forbiddenExp = &forbiddenExpNode; 06450 static xmlExpNode emptyExpNode = { 06451 XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}} 06452 }; 06453 xmlExpNodePtr emptyExp = &emptyExpNode; 06454 06455 /************************************************************************ 06456 * * 06457 * The custom hash table for unicity and canonicalization * 06458 * of sub-expressions pointers * 06459 * * 06460 ************************************************************************/ 06461 /* 06462 * xmlExpHashNameComputeKey: 06463 * Calculate the hash key for a token 06464 */ 06465 static unsigned short 06466 xmlExpHashNameComputeKey(const xmlChar *name) { 06467 unsigned short value = 0L; 06468 char ch; 06469 06470 if (name != NULL) { 06471 value += 30 * (*name); 06472 while ((ch = *name++) != 0) { 06473 value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch); 06474 } 06475 } 06476 return (value); 06477 } 06478 06479 /* 06480 * xmlExpHashComputeKey: 06481 * Calculate the hash key for a compound expression 06482 */ 06483 static unsigned short 06484 xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left, 06485 xmlExpNodePtr right) { 06486 unsigned long value; 06487 unsigned short ret; 06488 06489 switch (type) { 06490 case XML_EXP_SEQ: 06491 value = left->key; 06492 value += right->key; 06493 value *= 3; 06494 ret = (unsigned short) value; 06495 break; 06496 case XML_EXP_OR: 06497 value = left->key; 06498 value += right->key; 06499 value *= 7; 06500 ret = (unsigned short) value; 06501 break; 06502 case XML_EXP_COUNT: 06503 value = left->key; 06504 value += right->key; 06505 ret = (unsigned short) value; 06506 break; 06507 default: 06508 ret = 0; 06509 } 06510 return(ret); 06511 } 06512 06513 06514 static xmlExpNodePtr 06515 xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) { 06516 xmlExpNodePtr ret; 06517 06518 if (ctxt->nb_nodes >= MAX_NODES) 06519 return(NULL); 06520 ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode)); 06521 if (ret == NULL) 06522 return(NULL); 06523 memset(ret, 0, sizeof(xmlExpNode)); 06524 ret->type = type; 06525 ret->next = NULL; 06526 ctxt->nb_nodes++; 06527 ctxt->nb_cons++; 06528 return(ret); 06529 } 06530 06541 static xmlExpNodePtr 06542 xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type, 06543 xmlExpNodePtr left, xmlExpNodePtr right, 06544 const xmlChar *name, int min, int max) { 06545 unsigned short kbase, key; 06546 xmlExpNodePtr entry; 06547 xmlExpNodePtr insert; 06548 06549 if (ctxt == NULL) 06550 return(NULL); 06551 06552 /* 06553 * Check for duplicate and insertion location. 06554 */ 06555 if (type == XML_EXP_ATOM) { 06556 kbase = xmlExpHashNameComputeKey(name); 06557 } else if (type == XML_EXP_COUNT) { 06558 /* COUNT reduction rule 1 */ 06559 /* a{1} -> a */ 06560 if (min == max) { 06561 if (min == 1) { 06562 return(left); 06563 } 06564 if (min == 0) { 06565 xmlExpFree(ctxt, left); 06566 return(emptyExp); 06567 } 06568 } 06569 if (min < 0) { 06570 xmlExpFree(ctxt, left); 06571 return(forbiddenExp); 06572 } 06573 if (max == -1) 06574 kbase = min + 79; 06575 else 06576 kbase = max - min; 06577 kbase += left->key; 06578 } else if (type == XML_EXP_OR) { 06579 /* Forbid reduction rules */ 06580 if (left->type == XML_EXP_FORBID) { 06581 xmlExpFree(ctxt, left); 06582 return(right); 06583 } 06584 if (right->type == XML_EXP_FORBID) { 06585 xmlExpFree(ctxt, right); 06586 return(left); 06587 } 06588 06589 /* OR reduction rule 1 */ 06590 /* a | a reduced to a */ 06591 if (left == right) { 06592 left->ref--; 06593 return(left); 06594 } 06595 /* OR canonicalization rule 1 */ 06596 /* linearize (a | b) | c into a | (b | c) */ 06597 if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) { 06598 xmlExpNodePtr tmp = left; 06599 left = right; 06600 right = tmp; 06601 } 06602 /* OR reduction rule 2 */ 06603 /* a | (a | b) and b | (a | b) are reduced to a | b */ 06604 if (right->type == XML_EXP_OR) { 06605 if ((left == right->exp_left) || 06606 (left == right->exp_right)) { 06607 xmlExpFree(ctxt, left); 06608 return(right); 06609 } 06610 } 06611 /* OR canonicalization rule 2 */ 06612 /* linearize (a | b) | c into a | (b | c) */ 06613 if (left->type == XML_EXP_OR) { 06614 xmlExpNodePtr tmp; 06615 06616 /* OR canonicalization rule 2 */ 06617 if ((left->exp_right->type != XML_EXP_OR) && 06618 (left->exp_right->key < left->exp_left->key)) { 06619 tmp = left->exp_right; 06620 left->exp_right = left->exp_left; 06621 left->exp_left = tmp; 06622 } 06623 left->exp_right->ref++; 06624 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right, 06625 NULL, 0, 0); 06626 left->exp_left->ref++; 06627 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp, 06628 NULL, 0, 0); 06629 06630 xmlExpFree(ctxt, left); 06631 return(tmp); 06632 } 06633 if (right->type == XML_EXP_OR) { 06634 /* Ordering in the tree */ 06635 /* C | (A | B) -> A | (B | C) */ 06636 if (left->key > right->exp_right->key) { 06637 xmlExpNodePtr tmp; 06638 right->exp_right->ref++; 06639 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right, 06640 left, NULL, 0, 0); 06641 right->exp_left->ref++; 06642 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left, 06643 tmp, NULL, 0, 0); 06644 xmlExpFree(ctxt, right); 06645 return(tmp); 06646 } 06647 /* Ordering in the tree */ 06648 /* B | (A | C) -> A | (B | C) */ 06649 if (left->key > right->exp_left->key) { 06650 xmlExpNodePtr tmp; 06651 right->exp_right->ref++; 06652 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, 06653 right->exp_right, NULL, 0, 0); 06654 right->exp_left->ref++; 06655 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left, 06656 tmp, NULL, 0, 0); 06657 xmlExpFree(ctxt, right); 06658 return(tmp); 06659 } 06660 } 06661 /* we know both types are != XML_EXP_OR here */ 06662 else if (left->key > right->key) { 06663 xmlExpNodePtr tmp = left; 06664 left = right; 06665 right = tmp; 06666 } 06667 kbase = xmlExpHashComputeKey(type, left, right); 06668 } else if (type == XML_EXP_SEQ) { 06669 /* Forbid reduction rules */ 06670 if (left->type == XML_EXP_FORBID) { 06671 xmlExpFree(ctxt, right); 06672 return(left); 06673 } 06674 if (right->type == XML_EXP_FORBID) { 06675 xmlExpFree(ctxt, left); 06676 return(right); 06677 } 06678 /* Empty reduction rules */ 06679 if (right->type == XML_EXP_EMPTY) { 06680 return(left); 06681 } 06682 if (left->type == XML_EXP_EMPTY) { 06683 return(right); 06684 } 06685 kbase = xmlExpHashComputeKey(type, left, right); 06686 } else 06687 return(NULL); 06688 06689 key = kbase % ctxt->size; 06690 if (ctxt->table[key] != NULL) { 06691 for (insert = ctxt->table[key]; insert != NULL; 06692 insert = insert->next) { 06693 if ((insert->key == kbase) && 06694 (insert->type == type)) { 06695 if (type == XML_EXP_ATOM) { 06696 if (name == insert->exp_str) { 06697 insert->ref++; 06698 return(insert); 06699 } 06700 } else if (type == XML_EXP_COUNT) { 06701 if ((insert->exp_min == min) && (insert->exp_max == max) && 06702 (insert->exp_left == left)) { 06703 insert->ref++; 06704 left->ref--; 06705 return(insert); 06706 } 06707 } else if ((insert->exp_left == left) && 06708 (insert->exp_right == right)) { 06709 insert->ref++; 06710 left->ref--; 06711 right->ref--; 06712 return(insert); 06713 } 06714 } 06715 } 06716 } 06717 06718 entry = xmlExpNewNode(ctxt, type); 06719 if (entry == NULL) 06720 return(NULL); 06721 entry->key = kbase; 06722 if (type == XML_EXP_ATOM) { 06723 entry->exp_str = name; 06724 entry->c_max = 1; 06725 } else if (type == XML_EXP_COUNT) { 06726 entry->exp_min = min; 06727 entry->exp_max = max; 06728 entry->exp_left = left; 06729 if ((min == 0) || (IS_NILLABLE(left))) 06730 entry->info |= XML_EXP_NILABLE; 06731 if (max < 0) 06732 entry->c_max = -1; 06733 else 06734 entry->c_max = max * entry->exp_left->c_max; 06735 } else { 06736 entry->exp_left = left; 06737 entry->exp_right = right; 06738 if (type == XML_EXP_OR) { 06739 if ((IS_NILLABLE(left)) || (IS_NILLABLE(right))) 06740 entry->info |= XML_EXP_NILABLE; 06741 if ((entry->exp_left->c_max == -1) || 06742 (entry->exp_right->c_max == -1)) 06743 entry->c_max = -1; 06744 else if (entry->exp_left->c_max > entry->exp_right->c_max) 06745 entry->c_max = entry->exp_left->c_max; 06746 else 06747 entry->c_max = entry->exp_right->c_max; 06748 } else { 06749 if ((IS_NILLABLE(left)) && (IS_NILLABLE(right))) 06750 entry->info |= XML_EXP_NILABLE; 06751 if ((entry->exp_left->c_max == -1) || 06752 (entry->exp_right->c_max == -1)) 06753 entry->c_max = -1; 06754 else 06755 entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max; 06756 } 06757 } 06758 entry->ref = 1; 06759 if (ctxt->table[key] != NULL) 06760 entry->next = ctxt->table[key]; 06761 06762 ctxt->table[key] = entry; 06763 ctxt->nbElems++; 06764 06765 return(entry); 06766 } 06767 06775 void 06776 xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) { 06777 if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp)) 06778 return; 06779 exp->ref--; 06780 if (exp->ref == 0) { 06781 unsigned short key; 06782 06783 /* Unlink it first from the hash table */ 06784 key = exp->key % ctxt->size; 06785 if (ctxt->table[key] == exp) { 06786 ctxt->table[key] = exp->next; 06787 } else { 06788 xmlExpNodePtr tmp; 06789 06790 tmp = ctxt->table[key]; 06791 while (tmp != NULL) { 06792 if (tmp->next == exp) { 06793 tmp->next = exp->next; 06794 break; 06795 } 06796 tmp = tmp->next; 06797 } 06798 } 06799 06800 if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) { 06801 xmlExpFree(ctxt, exp->exp_left); 06802 xmlExpFree(ctxt, exp->exp_right); 06803 } else if (exp->type == XML_EXP_COUNT) { 06804 xmlExpFree(ctxt, exp->exp_left); 06805 } 06806 xmlFree(exp); 06807 ctxt->nb_nodes--; 06808 } 06809 } 06810 06817 void 06818 xmlExpRef(xmlExpNodePtr exp) { 06819 if (exp != NULL) 06820 exp->ref++; 06821 } 06822 06833 xmlExpNodePtr 06834 xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) { 06835 if ((ctxt == NULL) || (name == NULL)) 06836 return(NULL); 06837 name = xmlDictLookup(ctxt->dict, name, len); 06838 if (name == NULL) 06839 return(NULL); 06840 return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0)); 06841 } 06842 06856 xmlExpNodePtr 06857 xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) { 06858 if (ctxt == NULL) 06859 return(NULL); 06860 if ((left == NULL) || (right == NULL)) { 06861 xmlExpFree(ctxt, left); 06862 xmlExpFree(ctxt, right); 06863 return(NULL); 06864 } 06865 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0)); 06866 } 06867 06881 xmlExpNodePtr 06882 xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) { 06883 if (ctxt == NULL) 06884 return(NULL); 06885 if ((left == NULL) || (right == NULL)) { 06886 xmlExpFree(ctxt, left); 06887 xmlExpFree(ctxt, right); 06888 return(NULL); 06889 } 06890 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0)); 06891 } 06892 06907 xmlExpNodePtr 06908 xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) { 06909 if (ctxt == NULL) 06910 return(NULL); 06911 if ((subset == NULL) || (min < 0) || (max < -1) || 06912 ((max >= 0) && (min > max))) { 06913 xmlExpFree(ctxt, subset); 06914 return(NULL); 06915 } 06916 return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset, 06917 NULL, NULL, min, max)); 06918 } 06919 06920 /************************************************************************ 06921 * * 06922 * Public API for operations on expressions * 06923 * * 06924 ************************************************************************/ 06925 06926 static int 06927 xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 06928 const xmlChar**list, int len, int nb) { 06929 int tmp, tmp2; 06930 tail: 06931 switch (exp->type) { 06932 case XML_EXP_EMPTY: 06933 return(0); 06934 case XML_EXP_ATOM: 06935 for (tmp = 0;tmp < nb;tmp++) 06936 if (list[tmp] == exp->exp_str) 06937 return(0); 06938 if (nb >= len) 06939 return(-2); 06940 list[nb] = exp->exp_str; 06941 return(1); 06942 case XML_EXP_COUNT: 06943 exp = exp->exp_left; 06944 goto tail; 06945 case XML_EXP_SEQ: 06946 case XML_EXP_OR: 06947 tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb); 06948 if (tmp < 0) 06949 return(tmp); 06950 tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len, 06951 nb + tmp); 06952 if (tmp2 < 0) 06953 return(tmp2); 06954 return(tmp + tmp2); 06955 } 06956 return(-1); 06957 } 06958 06971 int 06972 xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 06973 const xmlChar**langList, int len) { 06974 if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0)) 06975 return(-1); 06976 return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0)); 06977 } 06978 06979 static int 06980 xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 06981 const xmlChar**list, int len, int nb) { 06982 int tmp, tmp2; 06983 tail: 06984 switch (exp->type) { 06985 case XML_EXP_FORBID: 06986 return(0); 06987 case XML_EXP_EMPTY: 06988 return(0); 06989 case XML_EXP_ATOM: 06990 for (tmp = 0;tmp < nb;tmp++) 06991 if (list[tmp] == exp->exp_str) 06992 return(0); 06993 if (nb >= len) 06994 return(-2); 06995 list[nb] = exp->exp_str; 06996 return(1); 06997 case XML_EXP_COUNT: 06998 exp = exp->exp_left; 06999 goto tail; 07000 case XML_EXP_SEQ: 07001 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb); 07002 if (tmp < 0) 07003 return(tmp); 07004 if (IS_NILLABLE(exp->exp_left)) { 07005 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len, 07006 nb + tmp); 07007 if (tmp2 < 0) 07008 return(tmp2); 07009 tmp += tmp2; 07010 } 07011 return(tmp); 07012 case XML_EXP_OR: 07013 tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb); 07014 if (tmp < 0) 07015 return(tmp); 07016 tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len, 07017 nb + tmp); 07018 if (tmp2 < 0) 07019 return(tmp2); 07020 return(tmp + tmp2); 07021 } 07022 return(-1); 07023 } 07024 07039 int 07040 xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 07041 const xmlChar**tokList, int len) { 07042 if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0)) 07043 return(-1); 07044 return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0)); 07045 } 07046 07055 int 07056 xmlExpIsNillable(xmlExpNodePtr exp) { 07057 if (exp == NULL) 07058 return(-1); 07059 return(IS_NILLABLE(exp) != 0); 07060 } 07061 07062 static xmlExpNodePtr 07063 xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str) 07064 { 07065 xmlExpNodePtr ret; 07066 07067 switch (exp->type) { 07068 case XML_EXP_EMPTY: 07069 return(forbiddenExp); 07070 case XML_EXP_FORBID: 07071 return(forbiddenExp); 07072 case XML_EXP_ATOM: 07073 if (exp->exp_str == str) { 07074 #ifdef DEBUG_DERIV 07075 printf("deriv atom: equal => Empty\n"); 07076 #endif 07077 ret = emptyExp; 07078 } else { 07079 #ifdef DEBUG_DERIV 07080 printf("deriv atom: mismatch => forbid\n"); 07081 #endif 07082 /* TODO wildcards here */ 07083 ret = forbiddenExp; 07084 } 07085 return(ret); 07086 case XML_EXP_OR: { 07087 xmlExpNodePtr tmp; 07088 07089 #ifdef DEBUG_DERIV 07090 printf("deriv or: => or(derivs)\n"); 07091 #endif 07092 tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str); 07093 if (tmp == NULL) { 07094 return(NULL); 07095 } 07096 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str); 07097 if (ret == NULL) { 07098 xmlExpFree(ctxt, tmp); 07099 return(NULL); 07100 } 07101 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, 07102 NULL, 0, 0); 07103 return(ret); 07104 } 07105 case XML_EXP_SEQ: 07106 #ifdef DEBUG_DERIV 07107 printf("deriv seq: starting with left\n"); 07108 #endif 07109 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str); 07110 if (ret == NULL) { 07111 return(NULL); 07112 } else if (ret == forbiddenExp) { 07113 if (IS_NILLABLE(exp->exp_left)) { 07114 #ifdef DEBUG_DERIV 07115 printf("deriv seq: left failed but nillable\n"); 07116 #endif 07117 ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str); 07118 } 07119 } else { 07120 #ifdef DEBUG_DERIV 07121 printf("deriv seq: left match => sequence\n"); 07122 #endif 07123 exp->exp_right->ref++; 07124 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right, 07125 NULL, 0, 0); 07126 } 07127 return(ret); 07128 case XML_EXP_COUNT: { 07129 int min, max; 07130 xmlExpNodePtr tmp; 07131 07132 if (exp->exp_max == 0) 07133 return(forbiddenExp); 07134 ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str); 07135 if (ret == NULL) 07136 return(NULL); 07137 if (ret == forbiddenExp) { 07138 #ifdef DEBUG_DERIV 07139 printf("deriv count: pattern mismatch => forbid\n"); 07140 #endif 07141 return(ret); 07142 } 07143 if (exp->exp_max == 1) 07144 return(ret); 07145 if (exp->exp_max < 0) /* unbounded */ 07146 max = -1; 07147 else 07148 max = exp->exp_max - 1; 07149 if (exp->exp_min > 0) 07150 min = exp->exp_min - 1; 07151 else 07152 min = 0; 07153 exp->exp_left->ref++; 07154 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL, 07155 NULL, min, max); 07156 if (ret == emptyExp) { 07157 #ifdef DEBUG_DERIV 07158 printf("deriv count: match to empty => new count\n"); 07159 #endif 07160 return(tmp); 07161 } 07162 #ifdef DEBUG_DERIV 07163 printf("deriv count: match => sequence with new count\n"); 07164 #endif 07165 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp, 07166 NULL, 0, 0)); 07167 } 07168 } 07169 return(NULL); 07170 } 07171 07184 xmlExpNodePtr 07185 xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 07186 const xmlChar *str, int len) { 07187 const xmlChar *input; 07188 07189 if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) { 07190 return(NULL); 07191 } 07192 /* 07193 * check the string is in the dictionnary, if yes use an interned 07194 * copy, otherwise we know it's not an acceptable input 07195 */ 07196 input = xmlDictExists(ctxt->dict, str, len); 07197 if (input == NULL) { 07198 return(forbiddenExp); 07199 } 07200 return(xmlExpStringDeriveInt(ctxt, exp, input)); 07201 } 07202 07203 static int 07204 xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) { 07205 int ret = 1; 07206 07207 if (sub->c_max == -1) { 07208 if (exp->c_max != -1) 07209 ret = 0; 07210 } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) { 07211 ret = 0; 07212 } 07213 #if 0 07214 if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp))) 07215 ret = 0; 07216 #endif 07217 return(ret); 07218 } 07219 07220 static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 07221 xmlExpNodePtr sub); 07237 static int 07238 xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub, 07239 xmlExpNodePtr *mult, xmlExpNodePtr *remain) { 07240 int i; 07241 xmlExpNodePtr tmp, tmp2; 07242 07243 if (mult != NULL) *mult = NULL; 07244 if (remain != NULL) *remain = NULL; 07245 if (exp->c_max == -1) return(0); 07246 if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0); 07247 07248 for (i = 1;i <= exp->c_max;i++) { 07249 sub->ref++; 07250 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, 07251 sub, NULL, NULL, i, i); 07252 if (tmp == NULL) { 07253 return(-1); 07254 } 07255 if (!xmlExpCheckCard(tmp, exp)) { 07256 xmlExpFree(ctxt, tmp); 07257 continue; 07258 } 07259 tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp); 07260 if (tmp2 == NULL) { 07261 xmlExpFree(ctxt, tmp); 07262 return(-1); 07263 } 07264 if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) { 07265 if (remain != NULL) 07266 *remain = tmp2; 07267 else 07268 xmlExpFree(ctxt, tmp2); 07269 if (mult != NULL) 07270 *mult = tmp; 07271 else 07272 xmlExpFree(ctxt, tmp); 07273 #ifdef DEBUG_DERIV 07274 printf("Divide succeeded %d\n", i); 07275 #endif 07276 return(i); 07277 } 07278 xmlExpFree(ctxt, tmp); 07279 xmlExpFree(ctxt, tmp2); 07280 } 07281 #ifdef DEBUG_DERIV 07282 printf("Divide failed\n"); 07283 #endif 07284 return(0); 07285 } 07286 07298 static xmlExpNodePtr 07299 xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { 07300 xmlExpNodePtr ret, tmp, tmp2, tmp3; 07301 const xmlChar **tab; 07302 int len, i; 07303 07304 /* 07305 * In case of equality and if the expression can only consume a finite 07306 * amount, then the derivation is empty 07307 */ 07308 if ((exp == sub) && (exp->c_max >= 0)) { 07309 #ifdef DEBUG_DERIV 07310 printf("Equal(exp, sub) and finite -> Empty\n"); 07311 #endif 07312 return(emptyExp); 07313 } 07314 /* 07315 * decompose sub sequence first 07316 */ 07317 if (sub->type == XML_EXP_EMPTY) { 07318 #ifdef DEBUG_DERIV 07319 printf("Empty(sub) -> Empty\n"); 07320 #endif 07321 exp->ref++; 07322 return(exp); 07323 } 07324 if (sub->type == XML_EXP_SEQ) { 07325 #ifdef DEBUG_DERIV 07326 printf("Seq(sub) -> decompose\n"); 07327 #endif 07328 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left); 07329 if (tmp == NULL) 07330 return(NULL); 07331 if (tmp == forbiddenExp) 07332 return(tmp); 07333 ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right); 07334 xmlExpFree(ctxt, tmp); 07335 return(ret); 07336 } 07337 if (sub->type == XML_EXP_OR) { 07338 #ifdef DEBUG_DERIV 07339 printf("Or(sub) -> decompose\n"); 07340 #endif 07341 tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left); 07342 if (tmp == forbiddenExp) 07343 return(tmp); 07344 if (tmp == NULL) 07345 return(NULL); 07346 ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right); 07347 if ((ret == NULL) || (ret == forbiddenExp)) { 07348 xmlExpFree(ctxt, tmp); 07349 return(ret); 07350 } 07351 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0)); 07352 } 07353 if (!xmlExpCheckCard(exp, sub)) { 07354 #ifdef DEBUG_DERIV 07355 printf("CheckCard(exp, sub) failed -> Forbid\n"); 07356 #endif 07357 return(forbiddenExp); 07358 } 07359 switch (exp->type) { 07360 case XML_EXP_EMPTY: 07361 if (sub == emptyExp) 07362 return(emptyExp); 07363 #ifdef DEBUG_DERIV 07364 printf("Empty(exp) -> Forbid\n"); 07365 #endif 07366 return(forbiddenExp); 07367 case XML_EXP_FORBID: 07368 #ifdef DEBUG_DERIV 07369 printf("Forbid(exp) -> Forbid\n"); 07370 #endif 07371 return(forbiddenExp); 07372 case XML_EXP_ATOM: 07373 if (sub->type == XML_EXP_ATOM) { 07374 /* TODO: handle wildcards */ 07375 if (exp->exp_str == sub->exp_str) { 07376 #ifdef DEBUG_DERIV 07377 printf("Atom match -> Empty\n"); 07378 #endif 07379 return(emptyExp); 07380 } 07381 #ifdef DEBUG_DERIV 07382 printf("Atom mismatch -> Forbid\n"); 07383 #endif 07384 return(forbiddenExp); 07385 } 07386 if ((sub->type == XML_EXP_COUNT) && 07387 (sub->exp_max == 1) && 07388 (sub->exp_left->type == XML_EXP_ATOM)) { 07389 /* TODO: handle wildcards */ 07390 if (exp->exp_str == sub->exp_left->exp_str) { 07391 #ifdef DEBUG_DERIV 07392 printf("Atom match -> Empty\n"); 07393 #endif 07394 return(emptyExp); 07395 } 07396 #ifdef DEBUG_DERIV 07397 printf("Atom mismatch -> Forbid\n"); 07398 #endif 07399 return(forbiddenExp); 07400 } 07401 #ifdef DEBUG_DERIV 07402 printf("Compex exp vs Atom -> Forbid\n"); 07403 #endif 07404 return(forbiddenExp); 07405 case XML_EXP_SEQ: 07406 /* try to get the sequence consumed only if possible */ 07407 if (xmlExpCheckCard(exp->exp_left, sub)) { 07408 /* See if the sequence can be consumed directly */ 07409 #ifdef DEBUG_DERIV 07410 printf("Seq trying left only\n"); 07411 #endif 07412 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub); 07413 if ((ret != forbiddenExp) && (ret != NULL)) { 07414 #ifdef DEBUG_DERIV 07415 printf("Seq trying left only worked\n"); 07416 #endif 07417 /* 07418 * TODO: assumption here that we are determinist 07419 * i.e. we won't get to a nillable exp left 07420 * subset which could be matched by the right 07421 * part too. 07422 * e.g.: (a | b)+,(a | c) and 'a+,a' 07423 */ 07424 exp->exp_right->ref++; 07425 return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, 07426 exp->exp_right, NULL, 0, 0)); 07427 } 07428 #ifdef DEBUG_DERIV 07429 } else { 07430 printf("Seq: left too short\n"); 07431 #endif 07432 } 07433 /* Try instead to decompose */ 07434 if (sub->type == XML_EXP_COUNT) { 07435 int min, max; 07436 07437 #ifdef DEBUG_DERIV 07438 printf("Seq: sub is a count\n"); 07439 #endif 07440 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left); 07441 if (ret == NULL) 07442 return(NULL); 07443 if (ret != forbiddenExp) { 07444 #ifdef DEBUG_DERIV 07445 printf("Seq , Count match on left\n"); 07446 #endif 07447 if (sub->exp_max < 0) 07448 max = -1; 07449 else 07450 max = sub->exp_max -1; 07451 if (sub->exp_min > 0) 07452 min = sub->exp_min -1; 07453 else 07454 min = 0; 07455 exp->exp_right->ref++; 07456 tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, 07457 exp->exp_right, NULL, 0, 0); 07458 if (tmp == NULL) 07459 return(NULL); 07460 07461 sub->exp_left->ref++; 07462 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, 07463 sub->exp_left, NULL, NULL, min, max); 07464 if (tmp2 == NULL) { 07465 xmlExpFree(ctxt, tmp); 07466 return(NULL); 07467 } 07468 ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2); 07469 xmlExpFree(ctxt, tmp); 07470 xmlExpFree(ctxt, tmp2); 07471 return(ret); 07472 } 07473 } 07474 /* we made no progress on structured operations */ 07475 break; 07476 case XML_EXP_OR: 07477 #ifdef DEBUG_DERIV 07478 printf("Or , trying both side\n"); 07479 #endif 07480 ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub); 07481 if (ret == NULL) 07482 return(NULL); 07483 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub); 07484 if (tmp == NULL) { 07485 xmlExpFree(ctxt, ret); 07486 return(NULL); 07487 } 07488 return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0)); 07489 case XML_EXP_COUNT: { 07490 int min, max; 07491 07492 if (sub->type == XML_EXP_COUNT) { 07493 /* 07494 * Try to see if the loop is completely subsumed 07495 */ 07496 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left); 07497 if (tmp == NULL) 07498 return(NULL); 07499 if (tmp == forbiddenExp) { 07500 int mult; 07501 07502 #ifdef DEBUG_DERIV 07503 printf("Count, Count inner don't subsume\n"); 07504 #endif 07505 mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left, 07506 NULL, &tmp); 07507 if (mult <= 0) { 07508 #ifdef DEBUG_DERIV 07509 printf("Count, Count not multiple => forbidden\n"); 07510 #endif 07511 return(forbiddenExp); 07512 } 07513 if (sub->exp_max == -1) { 07514 max = -1; 07515 if (exp->exp_max == -1) { 07516 if (exp->exp_min <= sub->exp_min * mult) 07517 min = 0; 07518 else 07519 min = exp->exp_min - sub->exp_min * mult; 07520 } else { 07521 #ifdef DEBUG_DERIV 07522 printf("Count, Count finite can't subsume infinite\n"); 07523 #endif 07524 xmlExpFree(ctxt, tmp); 07525 return(forbiddenExp); 07526 } 07527 } else { 07528 if (exp->exp_max == -1) { 07529 #ifdef DEBUG_DERIV 07530 printf("Infinite loop consume mult finite loop\n"); 07531 #endif 07532 if (exp->exp_min > sub->exp_min * mult) { 07533 max = -1; 07534 min = exp->exp_min - sub->exp_min * mult; 07535 } else { 07536 max = -1; 07537 min = 0; 07538 } 07539 } else { 07540 if (exp->exp_max < sub->exp_max * mult) { 07541 #ifdef DEBUG_DERIV 07542 printf("loops max mult mismatch => forbidden\n"); 07543 #endif 07544 xmlExpFree(ctxt, tmp); 07545 return(forbiddenExp); 07546 } 07547 if (sub->exp_max * mult > exp->exp_min) 07548 min = 0; 07549 else 07550 min = exp->exp_min - sub->exp_max * mult; 07551 max = exp->exp_max - sub->exp_max * mult; 07552 } 07553 } 07554 } else if (!IS_NILLABLE(tmp)) { 07555 /* 07556 * TODO: loop here to try to grow if working on finite 07557 * blocks. 07558 */ 07559 #ifdef DEBUG_DERIV 07560 printf("Count, Count remain not nillable => forbidden\n"); 07561 #endif 07562 xmlExpFree(ctxt, tmp); 07563 return(forbiddenExp); 07564 } else if (sub->exp_max == -1) { 07565 if (exp->exp_max == -1) { 07566 if (exp->exp_min <= sub->exp_min) { 07567 #ifdef DEBUG_DERIV 07568 printf("Infinite loops Okay => COUNT(0,Inf)\n"); 07569 #endif 07570 max = -1; 07571 min = 0; 07572 } else { 07573 #ifdef DEBUG_DERIV 07574 printf("Infinite loops min => Count(X,Inf)\n"); 07575 #endif 07576 max = -1; 07577 min = exp->exp_min - sub->exp_min; 07578 } 07579 } else if (exp->exp_min > sub->exp_min) { 07580 #ifdef DEBUG_DERIV 07581 printf("loops min mismatch 1 => forbidden ???\n"); 07582 #endif 07583 xmlExpFree(ctxt, tmp); 07584 return(forbiddenExp); 07585 } else { 07586 max = -1; 07587 min = 0; 07588 } 07589 } else { 07590 if (exp->exp_max == -1) { 07591 #ifdef DEBUG_DERIV 07592 printf("Infinite loop consume finite loop\n"); 07593 #endif 07594 if (exp->exp_min > sub->exp_min) { 07595 max = -1; 07596 min = exp->exp_min - sub->exp_min; 07597 } else { 07598 max = -1; 07599 min = 0; 07600 } 07601 } else { 07602 if (exp->exp_max < sub->exp_max) { 07603 #ifdef DEBUG_DERIV 07604 printf("loops max mismatch => forbidden\n"); 07605 #endif 07606 xmlExpFree(ctxt, tmp); 07607 return(forbiddenExp); 07608 } 07609 if (sub->exp_max > exp->exp_min) 07610 min = 0; 07611 else 07612 min = exp->exp_min - sub->exp_max; 07613 max = exp->exp_max - sub->exp_max; 07614 } 07615 } 07616 #ifdef DEBUG_DERIV 07617 printf("loops match => SEQ(COUNT())\n"); 07618 #endif 07619 exp->exp_left->ref++; 07620 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, 07621 NULL, NULL, min, max); 07622 if (tmp2 == NULL) { 07623 return(NULL); 07624 } 07625 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2, 07626 NULL, 0, 0); 07627 return(ret); 07628 } 07629 tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub); 07630 if (tmp == NULL) 07631 return(NULL); 07632 if (tmp == forbiddenExp) { 07633 #ifdef DEBUG_DERIV 07634 printf("loop mismatch => forbidden\n"); 07635 #endif 07636 return(forbiddenExp); 07637 } 07638 if (exp->exp_min > 0) 07639 min = exp->exp_min - 1; 07640 else 07641 min = 0; 07642 if (exp->exp_max < 0) 07643 max = -1; 07644 else 07645 max = exp->exp_max - 1; 07646 07647 #ifdef DEBUG_DERIV 07648 printf("loop match => SEQ(COUNT())\n"); 07649 #endif 07650 exp->exp_left->ref++; 07651 tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, 07652 NULL, NULL, min, max); 07653 if (tmp2 == NULL) 07654 return(NULL); 07655 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2, 07656 NULL, 0, 0); 07657 return(ret); 07658 } 07659 } 07660 07661 #ifdef DEBUG_DERIV 07662 printf("Fallback to derivative\n"); 07663 #endif 07664 if (IS_NILLABLE(sub)) { 07665 if (!(IS_NILLABLE(exp))) 07666 return(forbiddenExp); 07667 else 07668 ret = emptyExp; 07669 } else 07670 ret = NULL; 07671 /* 07672 * here the structured derivation made no progress so 07673 * we use the default token based derivation to force one more step 07674 */ 07675 if (ctxt->tabSize == 0) 07676 ctxt->tabSize = 40; 07677 07678 tab = (const xmlChar **) xmlMalloc(ctxt->tabSize * 07679 sizeof(const xmlChar *)); 07680 if (tab == NULL) { 07681 return(NULL); 07682 } 07683 07684 /* 07685 * collect all the strings accepted by the subexpression on input 07686 */ 07687 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0); 07688 while (len < 0) { 07689 const xmlChar **temp; 07690 temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 * 07691 sizeof(const xmlChar *)); 07692 if (temp == NULL) { 07693 xmlFree((xmlChar **) tab); 07694 return(NULL); 07695 } 07696 tab = temp; 07697 ctxt->tabSize *= 2; 07698 len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0); 07699 } 07700 for (i = 0;i < len;i++) { 07701 tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]); 07702 if ((tmp == NULL) || (tmp == forbiddenExp)) { 07703 xmlExpFree(ctxt, ret); 07704 xmlFree((xmlChar **) tab); 07705 return(tmp); 07706 } 07707 tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]); 07708 if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) { 07709 xmlExpFree(ctxt, tmp); 07710 xmlExpFree(ctxt, ret); 07711 xmlFree((xmlChar **) tab); 07712 return(tmp); 07713 } 07714 tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2); 07715 xmlExpFree(ctxt, tmp); 07716 xmlExpFree(ctxt, tmp2); 07717 07718 if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) { 07719 xmlExpFree(ctxt, ret); 07720 xmlFree((xmlChar **) tab); 07721 return(tmp3); 07722 } 07723 07724 if (ret == NULL) 07725 ret = tmp3; 07726 else { 07727 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0); 07728 if (ret == NULL) { 07729 xmlFree((xmlChar **) tab); 07730 return(NULL); 07731 } 07732 } 07733 } 07734 xmlFree((xmlChar **) tab); 07735 return(ret); 07736 } 07737 07752 xmlExpNodePtr 07753 xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { 07754 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) 07755 return(NULL); 07756 07757 /* 07758 * O(1) speedups 07759 */ 07760 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) { 07761 #ifdef DEBUG_DERIV 07762 printf("Sub nillable and not exp : can't subsume\n"); 07763 #endif 07764 return(forbiddenExp); 07765 } 07766 if (xmlExpCheckCard(exp, sub) == 0) { 07767 #ifdef DEBUG_DERIV 07768 printf("sub generate longuer sequances than exp : can't subsume\n"); 07769 #endif 07770 return(forbiddenExp); 07771 } 07772 return(xmlExpExpDeriveInt(ctxt, exp, sub)); 07773 } 07774 07786 int 07787 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { 07788 xmlExpNodePtr tmp; 07789 07790 if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) 07791 return(-1); 07792 07793 /* 07794 * TODO: speedup by checking the language of sub is a subset of the 07795 * language of exp 07796 */ 07797 /* 07798 * O(1) speedups 07799 */ 07800 if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) { 07801 #ifdef DEBUG_DERIV 07802 printf("Sub nillable and not exp : can't subsume\n"); 07803 #endif 07804 return(0); 07805 } 07806 if (xmlExpCheckCard(exp, sub) == 0) { 07807 #ifdef DEBUG_DERIV 07808 printf("sub generate longuer sequances than exp : can't subsume\n"); 07809 #endif 07810 return(0); 07811 } 07812 tmp = xmlExpExpDeriveInt(ctxt, exp, sub); 07813 #ifdef DEBUG_DERIV 07814 printf("Result derivation :\n"); 07815 PRINT_EXP(tmp); 07816 #endif 07817 if (tmp == NULL) 07818 return(-1); 07819 if (tmp == forbiddenExp) 07820 return(0); 07821 if (tmp == emptyExp) 07822 return(1); 07823 if ((tmp != NULL) && (IS_NILLABLE(tmp))) { 07824 xmlExpFree(ctxt, tmp); 07825 return(1); 07826 } 07827 xmlExpFree(ctxt, tmp); 07828 return(0); 07829 } 07830 07831 /************************************************************************ 07832 * * 07833 * Parsing expression * 07834 * * 07835 ************************************************************************/ 07836 07837 static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt); 07838 07839 #undef CUR 07840 #define CUR (*ctxt->cur) 07841 #undef NEXT 07842 #define NEXT ctxt->cur++; 07843 #undef IS_BLANK 07844 #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t')) 07845 #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++; 07846 07847 static int 07848 xmlExpParseNumber(xmlExpCtxtPtr ctxt) { 07849 int ret = 0; 07850 07851 SKIP_BLANKS 07852 if (CUR == '*') { 07853 NEXT 07854 return(-1); 07855 } 07856 if ((CUR < '0') || (CUR > '9')) 07857 return(-1); 07858 while ((CUR >= '0') && (CUR <= '9')) { 07859 ret = ret * 10 + (CUR - '0'); 07860 NEXT 07861 } 07862 return(ret); 07863 } 07864 07865 static xmlExpNodePtr 07866 xmlExpParseOr(xmlExpCtxtPtr ctxt) { 07867 const char *base; 07868 xmlExpNodePtr ret; 07869 const xmlChar *val; 07870 07871 SKIP_BLANKS 07872 base = ctxt->cur; 07873 if (*ctxt->cur == '(') { 07874 NEXT 07875 ret = xmlExpParseExpr(ctxt); 07876 SKIP_BLANKS 07877 if (*ctxt->cur != ')') { 07878 fprintf(stderr, "unbalanced '(' : %s\n", base); 07879 xmlExpFree(ctxt, ret); 07880 return(NULL); 07881 } 07882 NEXT; 07883 SKIP_BLANKS 07884 goto parse_quantifier; 07885 } 07886 while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') && 07887 (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') && 07888 (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}')) 07889 NEXT; 07890 val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base); 07891 if (val == NULL) 07892 return(NULL); 07893 ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0); 07894 if (ret == NULL) 07895 return(NULL); 07896 SKIP_BLANKS 07897 parse_quantifier: 07898 if (CUR == '{') { 07899 int min, max; 07900 07901 NEXT 07902 min = xmlExpParseNumber(ctxt); 07903 if (min < 0) { 07904 xmlExpFree(ctxt, ret); 07905 return(NULL); 07906 } 07907 SKIP_BLANKS 07908 if (CUR == ',') { 07909 NEXT 07910 max = xmlExpParseNumber(ctxt); 07911 SKIP_BLANKS 07912 } else 07913 max = min; 07914 if (CUR != '}') { 07915 xmlExpFree(ctxt, ret); 07916 return(NULL); 07917 } 07918 NEXT 07919 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 07920 min, max); 07921 SKIP_BLANKS 07922 } else if (CUR == '?') { 07923 NEXT 07924 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 07925 0, 1); 07926 SKIP_BLANKS 07927 } else if (CUR == '+') { 07928 NEXT 07929 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 07930 1, -1); 07931 SKIP_BLANKS 07932 } else if (CUR == '*') { 07933 NEXT 07934 ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL, 07935 0, -1); 07936 SKIP_BLANKS 07937 } 07938 return(ret); 07939 } 07940 07941 07942 static xmlExpNodePtr 07943 xmlExpParseSeq(xmlExpCtxtPtr ctxt) { 07944 xmlExpNodePtr ret, right; 07945 07946 ret = xmlExpParseOr(ctxt); 07947 SKIP_BLANKS 07948 while (CUR == '|') { 07949 NEXT 07950 right = xmlExpParseOr(ctxt); 07951 if (right == NULL) { 07952 xmlExpFree(ctxt, ret); 07953 return(NULL); 07954 } 07955 ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0); 07956 if (ret == NULL) 07957 return(NULL); 07958 } 07959 return(ret); 07960 } 07961 07962 static xmlExpNodePtr 07963 xmlExpParseExpr(xmlExpCtxtPtr ctxt) { 07964 xmlExpNodePtr ret, right; 07965 07966 ret = xmlExpParseSeq(ctxt); 07967 SKIP_BLANKS 07968 while (CUR == ',') { 07969 NEXT 07970 right = xmlExpParseSeq(ctxt); 07971 if (right == NULL) { 07972 xmlExpFree(ctxt, ret); 07973 return(NULL); 07974 } 07975 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0); 07976 if (ret == NULL) 07977 return(NULL); 07978 } 07979 return(ret); 07980 } 07981 07999 xmlExpNodePtr 08000 xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) { 08001 xmlExpNodePtr ret; 08002 08003 ctxt->expr = expr; 08004 ctxt->cur = expr; 08005 08006 ret = xmlExpParseExpr(ctxt); 08007 SKIP_BLANKS 08008 if (*ctxt->cur != 0) { 08009 xmlExpFree(ctxt, ret); 08010 return(NULL); 08011 } 08012 return(ret); 08013 } 08014 08015 static void 08016 xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) { 08017 xmlExpNodePtr c; 08018 08019 if (expr == NULL) return; 08020 if (glob) xmlBufferWriteChar(buf, "("); 08021 switch (expr->type) { 08022 case XML_EXP_EMPTY: 08023 xmlBufferWriteChar(buf, "empty"); 08024 break; 08025 case XML_EXP_FORBID: 08026 xmlBufferWriteChar(buf, "forbidden"); 08027 break; 08028 case XML_EXP_ATOM: 08029 xmlBufferWriteCHAR(buf, expr->exp_str); 08030 break; 08031 case XML_EXP_SEQ: 08032 c = expr->exp_left; 08033 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 08034 xmlExpDumpInt(buf, c, 1); 08035 else 08036 xmlExpDumpInt(buf, c, 0); 08037 xmlBufferWriteChar(buf, " , "); 08038 c = expr->exp_right; 08039 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 08040 xmlExpDumpInt(buf, c, 1); 08041 else 08042 xmlExpDumpInt(buf, c, 0); 08043 break; 08044 case XML_EXP_OR: 08045 c = expr->exp_left; 08046 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 08047 xmlExpDumpInt(buf, c, 1); 08048 else 08049 xmlExpDumpInt(buf, c, 0); 08050 xmlBufferWriteChar(buf, " | "); 08051 c = expr->exp_right; 08052 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 08053 xmlExpDumpInt(buf, c, 1); 08054 else 08055 xmlExpDumpInt(buf, c, 0); 08056 break; 08057 case XML_EXP_COUNT: { 08058 char rep[40]; 08059 08060 c = expr->exp_left; 08061 if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR)) 08062 xmlExpDumpInt(buf, c, 1); 08063 else 08064 xmlExpDumpInt(buf, c, 0); 08065 if ((expr->exp_min == 0) && (expr->exp_max == 1)) { 08066 rep[0] = '?'; 08067 rep[1] = 0; 08068 } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) { 08069 rep[0] = '*'; 08070 rep[1] = 0; 08071 } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) { 08072 rep[0] = '+'; 08073 rep[1] = 0; 08074 } else if (expr->exp_max == expr->exp_min) { 08075 snprintf(rep, 39, "{%d}", expr->exp_min); 08076 } else if (expr->exp_max < 0) { 08077 snprintf(rep, 39, "{%d,inf}", expr->exp_min); 08078 } else { 08079 snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max); 08080 } 08081 rep[39] = 0; 08082 xmlBufferWriteChar(buf, rep); 08083 break; 08084 } 08085 default: 08086 fprintf(stderr, "Error in tree\n"); 08087 } 08088 if (glob) 08089 xmlBufferWriteChar(buf, ")"); 08090 } 08098 void 08099 xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) { 08100 if ((buf == NULL) || (expr == NULL)) 08101 return; 08102 xmlExpDumpInt(buf, expr, 0); 08103 } 08104 08113 int 08114 xmlExpMaxToken(xmlExpNodePtr expr) { 08115 if (expr == NULL) 08116 return(-1); 08117 return(expr->c_max); 08118 } 08119 08128 int 08129 xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) { 08130 if (ctxt == NULL) 08131 return(-1); 08132 return(ctxt->nb_nodes); 08133 } 08134 08143 int 08144 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) { 08145 if (ctxt == NULL) 08146 return(-1); 08147 return(ctxt->nb_cons); 08148 } 08149 08150 #endif /* LIBXML_EXPR_ENABLED */ 08151 #define bottom_xmlregexp 08152 #include "elfgcchack.h" 08153 #endif /* LIBXML_REGEXP_ENABLED */ Generated on Fri May 25 2012 04:33:17 for ReactOS by
1.7.6.1
|