Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenregex.c
Go to the documentation of this file.
00001 /* This is a modified version of the GNU C Library regular expression 00002 matching library. It is modified to meet the requirements the NGS 00003 JS interpreter has for its extension functions (re-entrancy, etc.). 00004 All modifications are isolated with `JS' defines. You can enable 00005 the original features by defining the pre-processor constant JS to 00006 value 0. */ 00007 #define JS 1 00008 00009 /* Extended regular expression matching and search library, 00010 version 0.12. 00011 (Implements POSIX draft P1003.2/D11.2, except for some of the 00012 internationalization features.) 00013 Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc. 00014 00015 The GNU C Library is free software; you can redistribute it and/or 00016 modify it under the terms of the GNU Library General Public License as 00017 published by the Free Software Foundation; either version 2 of the 00018 License, or (at your option) any later version. 00019 00020 The GNU C Library is distributed in the hope that it will be useful, 00021 but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00023 Library General Public License for more details. 00024 00025 You should have received a copy of the GNU Library General Public 00026 License along with the GNU C Library; see the file COPYING.LIB. If not, 00027 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00028 Boston, MA 02111-1307, USA. */ 00029 00030 /* AIX requires this to be the first thing in the file. */ 00031 #if defined _AIX && !defined REGEX_MALLOC 00032 #pragma alloca 00033 #endif 00034 00035 #undef _GNU_SOURCE 00036 #define _GNU_SOURCE 00037 00038 #if JS 00039 //#include <jsconfig.h> 00040 #else /* not JS */ 00041 #ifdef HAVE_CONFIG_H 00042 # include <config.h> 00043 #endif 00044 #endif /* not JS */ 00045 00046 #ifndef PARAMS 00047 # if defined __GNUC__ || (defined __STDC__ && __STDC__) 00048 # define PARAMS(args) args 00049 # else 00050 # define PARAMS(args) () 00051 # endif /* GCC. */ 00052 #endif /* Not PARAMS. */ 00053 00054 #if defined STDC_HEADERS && !defined emacs 00055 # include <stddef.h> 00056 #else 00057 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 00058 # include <sys/types.h> 00059 #endif 00060 00061 #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) 00062 00063 /* For platform which support the ISO C amendement 1 functionality we 00064 support user defined character classes. */ 00065 #if defined _LIBC || WIDE_CHAR_SUPPORT 00066 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 00067 # include <wchar.h> 00068 # include <wctype.h> 00069 #endif 00070 00071 #ifdef _LIBC 00072 /* We have to keep the namespace clean. */ 00073 # define regfree(preg) __regfree (preg) 00074 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 00075 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 00076 # define regerror(errcode, preg, errbuf, errbuf_size) \ 00077 __regerror(errcode, preg, errbuf, errbuf_size) 00078 # define re_set_registers(bu, re, nu, st, en) \ 00079 __re_set_registers (bu, re, nu, st, en) 00080 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 00081 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 00082 # define re_match(bufp, string, size, pos, regs) \ 00083 __re_match (bufp, string, size, pos, regs) 00084 # define re_search(bufp, string, size, startpos, range, regs) \ 00085 __re_search (bufp, string, size, startpos, range, regs) 00086 # define re_compile_pattern(pattern, length, bufp) \ 00087 __re_compile_pattern (pattern, length, bufp) 00088 # define re_set_syntax(syntax) __re_set_syntax (syntax) 00089 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 00090 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 00091 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 00092 00093 #define btowc __btowc 00094 #endif 00095 00096 /* This is for other GNU distributions with internationalized messages. */ 00097 #if HAVE_LIBINTL_H || defined _LIBC 00098 # include <libintl.h> 00099 #else 00100 # define gettext(msgid) (msgid) 00101 #endif 00102 00103 #ifndef gettext_noop 00104 /* This define is so xgettext can find the internationalizable 00105 strings. */ 00106 # define gettext_noop(String) String 00107 #endif 00108 00109 /* The `emacs' switch turns on certain matching commands 00110 that make sense only in Emacs. */ 00111 #ifdef emacs 00112 00113 # include "lisp.h" 00114 # include "buffer.h" 00115 # include "syntax.h" 00116 00117 #else /* not emacs */ 00118 00119 /* If we are not linking with Emacs proper, 00120 we can't use the relocating allocator 00121 even if config.h says that we can. */ 00122 # undef REL_ALLOC 00123 00124 # if defined STDC_HEADERS || defined _LIBC || defined _WIN32 00125 # include <stdlib.h> 00126 # else 00127 char *malloc (); 00128 char *realloc (); 00129 # endif 00130 00131 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 00132 If nothing else has been done, use the method below. */ 00133 # ifdef INHIBIT_STRING_HEADER 00134 # if !(defined HAVE_BZERO && defined HAVE_BCOPY) 00135 # if !defined bzero && !defined bcopy 00136 # undef INHIBIT_STRING_HEADER 00137 # endif 00138 # endif 00139 # endif 00140 00141 /* This is the normal way of making sure we have a bcopy and a bzero. 00142 This is used in most programs--a few other programs avoid this 00143 by defining INHIBIT_STRING_HEADER. */ 00144 # ifndef INHIBIT_STRING_HEADER 00145 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 00146 # include <string.h> 00147 # ifndef bzero 00148 //# ifndef _LIBC 00149 //# define bzero(s, n) (memset (s, '\0', n), (s)) 00150 //# else 00151 //# define bzero(s, n) __bzero (s, n) 00152 //# endif 00153 # endif 00154 # else 00155 # include <strings.h> 00156 //# ifndef memcmp 00157 //# define memcmp(s1, s2, n) bcmp (s1, s2, n) 00158 //# endif 00159 //# ifndef memcpy 00160 //# define memcpy(d, s, n) (bcopy (s, d, n), (d)) 00161 //# endif 00162 #define bzero(d,l) memset(d,0,l) 00163 # endif 00164 # endif 00165 00166 /* Define the syntax stuff for <, >, etc. */ 00167 00168 /* This must be nonzero for the wordchar and notwordchar pattern 00169 commands in re_match_2. */ 00170 # ifndef Sword 00171 # define Sword 1 00172 # endif 00173 00174 # ifdef SWITCH_ENUM_BUG 00175 # define SWITCH_ENUM_CAST(x) ((int)(x)) 00176 # else 00177 # define SWITCH_ENUM_CAST(x) (x) 00178 # endif 00179 00180 /* How many characters in the character set. */ 00181 # define CHAR_SET_SIZE 256 00182 00183 # ifdef SYNTAX_TABLE 00184 00185 extern char *re_syntax_table; 00186 00187 # else /* not SYNTAX_TABLE */ 00188 00189 #if JS 00190 static char re_syntax_table[CHAR_SET_SIZE] = 00191 { 00192 /* 00193 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 00194 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x0f */ 00195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 - 0x1f */ 00196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2f */ 00197 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3f */ 00198 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 - 0x4f */ 00199 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 - 0x5f */ 00200 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 - 0x6f */ 00201 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70 - 0x7f */ 00202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8f */ 00203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9f */ 00204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0 - 0xaf */ 00205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0 - 0xbf */ 00206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 - 0xcf */ 00207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0 - 0xdf */ 00208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0 - 0xef */ 00209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xf0 - 0xff */ 00210 }; 00211 00212 static void 00213 init_syntax_once () 00214 { 00215 /* Nothing here. */ 00216 } 00217 #else /* not JS */ 00218 static char re_syntax_table[CHAR_SET_SIZE]; 00219 00220 static void 00221 init_syntax_once () 00222 { 00223 register int c; 00224 static int done = 0; 00225 00226 if (done) 00227 return; 00228 00229 bzero (re_syntax_table, sizeof re_syntax_table); 00230 00231 for (c = 'a'; c <= 'z'; c++) 00232 re_syntax_table[c] = Sword; 00233 00234 for (c = 'A'; c <= 'Z'; c++) 00235 re_syntax_table[c] = Sword; 00236 00237 for (c = '0'; c <= '9'; c++) 00238 re_syntax_table[c] = Sword; 00239 00240 re_syntax_table['_'] = Sword; 00241 00242 done = 1; 00243 } 00244 #endif /* not JS */ 00245 00246 # endif /* not SYNTAX_TABLE */ 00247 00248 # define SYNTAX(c) re_syntax_table[c] 00249 00250 #endif /* not emacs */ 00251 00252 /* Get the interface, including the syntax bits. */ 00253 #include "regex.h" 00254 00255 /* isalpha etc. are used for the character classes. */ 00256 #include <ctype.h> 00257 00258 /* Jim Meyering writes: 00259 00260 "... Some ctype macros are valid only for character codes that 00261 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 00262 using /bin/cc or gcc but without giving an ansi option). So, all 00263 ctype uses should be through macros like ISPRINT... If 00264 STDC_HEADERS is defined, then autoconf has verified that the ctype 00265 macros don't need to be guarded with references to isascii. ... 00266 Defining isascii to 1 should let any compiler worth its salt 00267 eliminate the && through constant folding." 00268 Solaris defines some of these symbols so we must undefine them first. */ 00269 00270 #undef ISASCII 00271 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 00272 # define ISASCII(c) 1 00273 #else 00274 # define ISASCII(c) isascii(c) 00275 #endif 00276 00277 #ifdef isblank 00278 # define ISBLANK(c) (ISASCII (c) && isblank (c)) 00279 #else 00280 # define ISBLANK(c) ((c) == ' ' || (c) == '\t') 00281 #endif 00282 #ifdef isgraph 00283 # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 00284 #else 00285 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 00286 #endif 00287 00288 #undef ISPRINT 00289 #define ISPRINT(c) (ISASCII (c) && isprint (c)) 00290 #define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 00291 #define ISALNUM(c) (ISASCII (c) && isalnum (c)) 00292 #define ISALPHA(c) (ISASCII (c) && isalpha (c)) 00293 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 00294 #define ISLOWER(c) (ISASCII (c) && islower (c)) 00295 #define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 00296 #define ISSPACE(c) (ISASCII (c) && isspace (c)) 00297 #define ISUPPER(c) (ISASCII (c) && isupper (c)) 00298 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 00299 00300 #ifndef NULL 00301 # define NULL (void *)0 00302 #endif 00303 00304 /* We remove any previous definition of `SIGN_EXTEND_CHAR', 00305 since ours (we hope) works properly with all combinations of 00306 machines, compilers, `char' and `unsigned char' argument types. 00307 (Per Bothner suggested the basic approach.) */ 00308 #undef SIGN_EXTEND_CHAR 00309 #if __STDC__ 00310 # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 00311 #else /* not __STDC__ */ 00312 /* As in Harbison and Steele. */ 00313 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 00314 #endif 00315 00316 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 00317 use `alloca' instead of `malloc'. This is because using malloc in 00318 re_search* or re_match* could cause memory leaks when C-g is used in 00319 Emacs; also, malloc is slower and causes storage fragmentation. On 00320 the other hand, malloc is more portable, and easier to debug. 00321 00322 Because we sometimes use alloca, some routines have to be macros, 00323 not functions -- `alloca'-allocated space disappears at the end of the 00324 function it is called in. */ 00325 00326 #ifdef REGEX_MALLOC 00327 00328 # define REGEX_ALLOCATE malloc 00329 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 00330 # define REGEX_FREE free 00331 00332 #else /* not REGEX_MALLOC */ 00333 00334 /* Emacs already defines alloca, sometimes. */ 00335 # ifndef alloca 00336 00337 /* Make alloca work the best possible way. */ 00338 # ifdef __GNUC__ 00339 # define alloca __builtin_alloca 00340 # else /* not __GNUC__ */ 00341 # if HAVE_ALLOCA_H 00342 # include <alloca.h> 00343 # endif /* HAVE_ALLOCA_H */ 00344 # endif /* not __GNUC__ */ 00345 00346 # endif /* not alloca */ 00347 00348 # define REGEX_ALLOCATE alloca 00349 00350 /* Assumes a `char *destination' variable. */ 00351 # define REGEX_REALLOCATE(source, osize, nsize) \ 00352 (destination = (char *) alloca (nsize), \ 00353 memcpy (destination, source, osize)) 00354 00355 /* No need to do anything to free, after alloca. */ 00356 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 00357 00358 #endif /* not REGEX_MALLOC */ 00359 00360 /* Define how to allocate the failure stack. */ 00361 00362 #if defined REL_ALLOC && defined REGEX_MALLOC 00363 00364 # define REGEX_ALLOCATE_STACK(size) \ 00365 r_alloc (&failure_stack_ptr, (size)) 00366 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 00367 r_re_alloc (&failure_stack_ptr, (nsize)) 00368 # define REGEX_FREE_STACK(ptr) \ 00369 r_alloc_free (&failure_stack_ptr) 00370 00371 #else /* not using relocating allocator */ 00372 00373 # ifdef REGEX_MALLOC 00374 00375 # define REGEX_ALLOCATE_STACK malloc 00376 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 00377 # define REGEX_FREE_STACK free 00378 00379 # else /* not REGEX_MALLOC */ 00380 00381 # define REGEX_ALLOCATE_STACK alloca 00382 00383 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 00384 REGEX_REALLOCATE (source, osize, nsize) 00385 /* No need to explicitly free anything. */ 00386 # define REGEX_FREE_STACK(arg) 00387 00388 # endif /* not REGEX_MALLOC */ 00389 #endif /* not using relocating allocator */ 00390 00391 00392 /* True if `size1' is non-NULL and PTR is pointing anywhere inside 00393 `string1' or just past its end. This works if PTR is NULL, which is 00394 a good thing. */ 00395 #define FIRST_STRING_P(ptr) \ 00396 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 00397 00398 /* (Re)Allocate N items of type T using malloc, or fail. */ 00399 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 00400 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 00401 #define RETALLOC_IF(addr, n, t) \ 00402 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 00403 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 00404 00405 #define BYTEWIDTH 8 /* In bits. */ 00406 00407 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 00408 00409 #undef MAX 00410 #undef MIN 00411 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 00412 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 00413 00414 typedef char boolean; 00415 #define false 0 00416 #define true 1 00417 00418 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, 00419 const char *string1, int size1, 00420 const char *string2, int size2, 00421 int pos, 00422 struct re_registers *regs, 00423 int stop)); 00424 00425 /* These are the command codes that appear in compiled regular 00426 expressions. Some opcodes are followed by argument bytes. A 00427 command code can specify any interpretation whatsoever for its 00428 arguments. Zero bytes may appear in the compiled regular expression. */ 00429 00430 typedef enum 00431 { 00432 no_op = 0, 00433 00434 /* Succeed right away--no more backtracking. */ 00435 succeed, 00436 00437 /* Followed by one byte giving n, then by n literal bytes. */ 00438 exactn, 00439 00440 /* Matches any (more or less) character. */ 00441 anychar, 00442 00443 /* Matches any one char belonging to specified set. First 00444 following byte is number of bitmap bytes. Then come bytes 00445 for a bitmap saying which chars are in. Bits in each byte 00446 are ordered low-bit-first. A character is in the set if its 00447 bit is 1. A character too large to have a bit in the map is 00448 automatically not in the set. */ 00449 charset, 00450 00451 /* Same parameters as charset, but match any character that is 00452 not one of those specified. */ 00453 charset_not, 00454 00455 /* Start remembering the text that is matched, for storing in a 00456 register. Followed by one byte with the register number, in 00457 the range 0 to one less than the pattern buffer's re_nsub 00458 field. Then followed by one byte with the number of groups 00459 inner to this one. (This last has to be part of the 00460 start_memory only because we need it in the on_failure_jump 00461 of re_match_2.) */ 00462 start_memory, 00463 00464 /* Stop remembering the text that is matched and store it in a 00465 memory register. Followed by one byte with the register 00466 number, in the range 0 to one less than `re_nsub' in the 00467 pattern buffer, and one byte with the number of inner groups, 00468 just like `start_memory'. (We need the number of inner 00469 groups here because we don't have any easy way of finding the 00470 corresponding start_memory when we're at a stop_memory.) */ 00471 stop_memory, 00472 00473 /* Match a duplicate of something remembered. Followed by one 00474 byte containing the register number. */ 00475 duplicate, 00476 00477 /* Fail unless at beginning of line. */ 00478 begline, 00479 00480 /* Fail unless at end of line. */ 00481 endline, 00482 00483 /* Succeeds if at beginning of buffer (if emacs) or at beginning 00484 of string to be matched (if not). */ 00485 begbuf, 00486 00487 /* Analogously, for end of buffer/string. */ 00488 endbuf, 00489 00490 /* Followed by two byte relative address to which to jump. */ 00491 jump, 00492 00493 /* Same as jump, but marks the end of an alternative. */ 00494 jump_past_alt, 00495 00496 /* Followed by two-byte relative address of place to resume at 00497 in case of failure. */ 00498 on_failure_jump, 00499 00500 /* Like on_failure_jump, but pushes a placeholder instead of the 00501 current string position when executed. */ 00502 on_failure_keep_string_jump, 00503 00504 /* Throw away latest failure point and then jump to following 00505 two-byte relative address. */ 00506 pop_failure_jump, 00507 00508 /* Change to pop_failure_jump if know won't have to backtrack to 00509 match; otherwise change to jump. This is used to jump 00510 back to the beginning of a repeat. If what follows this jump 00511 clearly won't match what the repeat does, such that we can be 00512 sure that there is no use backtracking out of repetitions 00513 already matched, then we change it to a pop_failure_jump. 00514 Followed by two-byte address. */ 00515 maybe_pop_jump, 00516 00517 /* Jump to following two-byte address, and push a dummy failure 00518 point. This failure point will be thrown away if an attempt 00519 is made to use it for a failure. A `+' construct makes this 00520 before the first repeat. Also used as an intermediary kind 00521 of jump when compiling an alternative. */ 00522 dummy_failure_jump, 00523 00524 /* Push a dummy failure point and continue. Used at the end of 00525 alternatives. */ 00526 push_dummy_failure, 00527 00528 /* Followed by two-byte relative address and two-byte number n. 00529 After matching N times, jump to the address upon failure. */ 00530 succeed_n, 00531 00532 /* Followed by two-byte relative address, and two-byte number n. 00533 Jump to the address N times, then fail. */ 00534 jump_n, 00535 00536 /* Set the following two-byte relative address to the 00537 subsequent two-byte number. The address *includes* the two 00538 bytes of number. */ 00539 set_number_at, 00540 00541 wordchar, /* Matches any word-constituent character. */ 00542 notwordchar, /* Matches any char that is not a word-constituent. */ 00543 00544 wordbeg, /* Succeeds if at word beginning. */ 00545 wordend, /* Succeeds if at word end. */ 00546 00547 wordbound, /* Succeeds if at a word boundary. */ 00548 notwordbound /* Succeeds if not at a word boundary. */ 00549 00550 #ifdef emacs 00551 ,before_dot, /* Succeeds if before point. */ 00552 at_dot, /* Succeeds if at point. */ 00553 after_dot, /* Succeeds if after point. */ 00554 00555 /* Matches any character whose syntax is specified. Followed by 00556 a byte which contains a syntax code, e.g., Sword. */ 00557 syntaxspec, 00558 00559 /* Matches any character whose syntax is not that specified. */ 00560 notsyntaxspec 00561 #endif /* emacs */ 00562 } re_opcode_t; 00563 00564 /* Common operations on the compiled pattern. */ 00565 00566 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 00567 00568 #define STORE_NUMBER(destination, number) \ 00569 do { \ 00570 (destination)[0] = (number) & 0377; \ 00571 (destination)[1] = (number) >> 8; \ 00572 } while (0) 00573 00574 /* Same as STORE_NUMBER, except increment DESTINATION to 00575 the byte after where the number is stored. Therefore, DESTINATION 00576 must be an lvalue. */ 00577 00578 #define STORE_NUMBER_AND_INCR(destination, number) \ 00579 do { \ 00580 STORE_NUMBER (destination, number); \ 00581 (destination) += 2; \ 00582 } while (0) 00583 00584 /* Put into DESTINATION a number stored in two contiguous bytes starting 00585 at SOURCE. */ 00586 00587 #define EXTRACT_NUMBER(destination, source) \ 00588 do { \ 00589 (destination) = *(source) & 0377; \ 00590 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 00591 } while (0) 00592 00593 #ifdef DEBUG 00594 static void extract_number _RE_ARGS ((int *dest, unsigned char *source)); 00595 static void 00596 extract_number (dest, source) 00597 int *dest; 00598 unsigned char *source; 00599 { 00600 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 00601 *dest = *source & 0377; 00602 *dest += temp << 8; 00603 } 00604 00605 # ifndef EXTRACT_MACROS /* To debug the macros. */ 00606 # undef EXTRACT_NUMBER 00607 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 00608 # endif /* not EXTRACT_MACROS */ 00609 00610 #endif /* DEBUG */ 00611 00612 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 00613 SOURCE must be an lvalue. */ 00614 00615 #define EXTRACT_NUMBER_AND_INCR(destination, source) \ 00616 do { \ 00617 EXTRACT_NUMBER (destination, source); \ 00618 (source) += 2; \ 00619 } while (0) 00620 00621 #ifdef DEBUG 00622 static void extract_number_and_incr _RE_ARGS ((int *destination, 00623 unsigned char **source)); 00624 static void 00625 extract_number_and_incr (destination, source) 00626 int *destination; 00627 unsigned char **source; 00628 { 00629 extract_number (destination, *source); 00630 *source += 2; 00631 } 00632 00633 # ifndef EXTRACT_MACROS 00634 # undef EXTRACT_NUMBER_AND_INCR 00635 # define EXTRACT_NUMBER_AND_INCR(dest, src) \ 00636 extract_number_and_incr (&dest, &src) 00637 # endif /* not EXTRACT_MACROS */ 00638 00639 #endif /* DEBUG */ 00640 00641 /* If DEBUG is defined, Regex prints many voluminous messages about what 00642 it is doing (if the variable `debug' is nonzero). If linked with the 00643 main program in `iregex.c', you can enter patterns and strings 00644 interactively. And if linked with the main program in `main.c' and 00645 the other test files, you can run the already-written tests. */ 00646 00647 #ifdef DEBUG 00648 00649 /* We use standard I/O for debugging. */ 00650 # include <stdio.h> 00651 00652 /* It is useful to test things that ``must'' be true when debugging. */ 00653 # include <assert.h> 00654 00655 static int debug = 0; 00656 00657 # define DEBUG_STATEMENT(e) e 00658 # define DEBUG_PRINT1(x) if (debug) printf (x) 00659 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 00660 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 00661 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 00662 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 00663 if (debug) print_partial_compiled_pattern (s, e) 00664 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 00665 if (debug) print_double_string (w, s1, sz1, s2, sz2) 00666 00667 00668 /* Print the fastmap in human-readable form. */ 00669 00670 void 00671 print_fastmap (fastmap) 00672 char *fastmap; 00673 { 00674 unsigned was_a_range = 0; 00675 unsigned i = 0; 00676 00677 while (i < (1 << BYTEWIDTH)) 00678 { 00679 if (fastmap[i++]) 00680 { 00681 was_a_range = 0; 00682 putchar (i - 1); 00683 while (i < (1 << BYTEWIDTH) && fastmap[i]) 00684 { 00685 was_a_range = 1; 00686 i++; 00687 } 00688 if (was_a_range) 00689 { 00690 printf ("-"); 00691 putchar (i - 1); 00692 } 00693 } 00694 } 00695 putchar ('\n'); 00696 } 00697 00698 00699 /* Print a compiled pattern string in human-readable form, starting at 00700 the START pointer into it and ending just before the pointer END. */ 00701 00702 void 00703 print_partial_compiled_pattern (start, end) 00704 unsigned char *start; 00705 unsigned char *end; 00706 { 00707 int mcnt, mcnt2; 00708 unsigned char *p1; 00709 unsigned char *p = start; 00710 unsigned char *pend = end; 00711 00712 if (start == NULL) 00713 { 00714 printf ("(null)\n"); 00715 return; 00716 } 00717 00718 /* Loop over pattern commands. */ 00719 while (p < pend) 00720 { 00721 printf ("%d:\t", p - start); 00722 00723 switch ((re_opcode_t) *p++) 00724 { 00725 case no_op: 00726 printf ("/no_op"); 00727 break; 00728 00729 case exactn: 00730 mcnt = *p++; 00731 printf ("/exactn/%d", mcnt); 00732 do 00733 { 00734 putchar ('/'); 00735 putchar (*p++); 00736 } 00737 while (--mcnt); 00738 break; 00739 00740 case start_memory: 00741 mcnt = *p++; 00742 printf ("/start_memory/%d/%d", mcnt, *p++); 00743 break; 00744 00745 case stop_memory: 00746 mcnt = *p++; 00747 printf ("/stop_memory/%d/%d", mcnt, *p++); 00748 break; 00749 00750 case duplicate: 00751 printf ("/duplicate/%d", *p++); 00752 break; 00753 00754 case anychar: 00755 printf ("/anychar"); 00756 break; 00757 00758 case charset: 00759 case charset_not: 00760 { 00761 register int c, last = -100; 00762 register int in_range = 0; 00763 00764 printf ("/charset [%s", 00765 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 00766 00767 assert (p + *p < pend); 00768 00769 for (c = 0; c < 256; c++) 00770 if (c / 8 < *p 00771 && (p[1 + (c/8)] & (1 << (c % 8)))) 00772 { 00773 /* Are we starting a range? */ 00774 if (last + 1 == c && ! in_range) 00775 { 00776 putchar ('-'); 00777 in_range = 1; 00778 } 00779 /* Have we broken a range? */ 00780 else if (last + 1 != c && in_range) 00781 { 00782 putchar (last); 00783 in_range = 0; 00784 } 00785 00786 if (! in_range) 00787 putchar (c); 00788 00789 last = c; 00790 } 00791 00792 if (in_range) 00793 putchar (last); 00794 00795 putchar (']'); 00796 00797 p += 1 + *p; 00798 } 00799 break; 00800 00801 case begline: 00802 printf ("/begline"); 00803 break; 00804 00805 case endline: 00806 printf ("/endline"); 00807 break; 00808 00809 case on_failure_jump: 00810 extract_number_and_incr (&mcnt, &p); 00811 printf ("/on_failure_jump to %d", p + mcnt - start); 00812 break; 00813 00814 case on_failure_keep_string_jump: 00815 extract_number_and_incr (&mcnt, &p); 00816 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 00817 break; 00818 00819 case dummy_failure_jump: 00820 extract_number_and_incr (&mcnt, &p); 00821 printf ("/dummy_failure_jump to %d", p + mcnt - start); 00822 break; 00823 00824 case push_dummy_failure: 00825 printf ("/push_dummy_failure"); 00826 break; 00827 00828 case maybe_pop_jump: 00829 extract_number_and_incr (&mcnt, &p); 00830 printf ("/maybe_pop_jump to %d", p + mcnt - start); 00831 break; 00832 00833 case pop_failure_jump: 00834 extract_number_and_incr (&mcnt, &p); 00835 printf ("/pop_failure_jump to %d", p + mcnt - start); 00836 break; 00837 00838 case jump_past_alt: 00839 extract_number_and_incr (&mcnt, &p); 00840 printf ("/jump_past_alt to %d", p + mcnt - start); 00841 break; 00842 00843 case jump: 00844 extract_number_and_incr (&mcnt, &p); 00845 printf ("/jump to %d", p + mcnt - start); 00846 break; 00847 00848 case succeed_n: 00849 extract_number_and_incr (&mcnt, &p); 00850 p1 = p + mcnt; 00851 extract_number_and_incr (&mcnt2, &p); 00852 printf ("/succeed_n to %d, %d times", p1 - start, mcnt2); 00853 break; 00854 00855 case jump_n: 00856 extract_number_and_incr (&mcnt, &p); 00857 p1 = p + mcnt; 00858 extract_number_and_incr (&mcnt2, &p); 00859 printf ("/jump_n to %d, %d times", p1 - start, mcnt2); 00860 break; 00861 00862 case set_number_at: 00863 extract_number_and_incr (&mcnt, &p); 00864 p1 = p + mcnt; 00865 extract_number_and_incr (&mcnt2, &p); 00866 printf ("/set_number_at location %d to %d", p1 - start, mcnt2); 00867 break; 00868 00869 case wordbound: 00870 printf ("/wordbound"); 00871 break; 00872 00873 case notwordbound: 00874 printf ("/notwordbound"); 00875 break; 00876 00877 case wordbeg: 00878 printf ("/wordbeg"); 00879 break; 00880 00881 case wordend: 00882 printf ("/wordend"); 00883 00884 # ifdef emacs 00885 case before_dot: 00886 printf ("/before_dot"); 00887 break; 00888 00889 case at_dot: 00890 printf ("/at_dot"); 00891 break; 00892 00893 case after_dot: 00894 printf ("/after_dot"); 00895 break; 00896 00897 case syntaxspec: 00898 printf ("/syntaxspec"); 00899 mcnt = *p++; 00900 printf ("/%d", mcnt); 00901 break; 00902 00903 case notsyntaxspec: 00904 printf ("/notsyntaxspec"); 00905 mcnt = *p++; 00906 printf ("/%d", mcnt); 00907 break; 00908 # endif /* emacs */ 00909 00910 case wordchar: 00911 printf ("/wordchar"); 00912 break; 00913 00914 case notwordchar: 00915 printf ("/notwordchar"); 00916 break; 00917 00918 case begbuf: 00919 printf ("/begbuf"); 00920 break; 00921 00922 case endbuf: 00923 printf ("/endbuf"); 00924 break; 00925 00926 default: 00927 printf ("?%d", *(p-1)); 00928 } 00929 00930 putchar ('\n'); 00931 } 00932 00933 printf ("%d:\tend of pattern.\n", p - start); 00934 } 00935 00936 00937 void 00938 print_compiled_pattern (bufp) 00939 struct re_pattern_buffer *bufp; 00940 { 00941 unsigned char *buffer = bufp->buffer; 00942 00943 print_partial_compiled_pattern (buffer, buffer + bufp->used); 00944 printf ("%ld bytes used/%ld bytes allocated.\n", 00945 bufp->used, bufp->allocated); 00946 00947 if (bufp->fastmap_accurate && bufp->fastmap) 00948 { 00949 printf ("fastmap: "); 00950 print_fastmap (bufp->fastmap); 00951 } 00952 00953 printf ("re_nsub: %d\t", bufp->re_nsub); 00954 printf ("regs_alloc: %d\t", bufp->regs_allocated); 00955 printf ("can_be_null: %d\t", bufp->can_be_null); 00956 printf ("newline_anchor: %d\n", bufp->newline_anchor); 00957 printf ("no_sub: %d\t", bufp->no_sub); 00958 printf ("not_bol: %d\t", bufp->not_bol); 00959 printf ("not_eol: %d\t", bufp->not_eol); 00960 printf ("syntax: %lx\n", bufp->syntax); 00961 /* Perhaps we should print the translate table? */ 00962 } 00963 00964 00965 void 00966 print_double_string (where, string1, size1, string2, size2) 00967 const char *where; 00968 const char *string1; 00969 const char *string2; 00970 int size1; 00971 int size2; 00972 { 00973 int this_char; 00974 00975 if (where == NULL) 00976 printf ("(null)"); 00977 else 00978 { 00979 if (FIRST_STRING_P (where)) 00980 { 00981 for (this_char = where - string1; this_char < size1; this_char++) 00982 putchar (string1[this_char]); 00983 00984 where = string2; 00985 } 00986 00987 for (this_char = where - string2; this_char < size2; this_char++) 00988 putchar (string2[this_char]); 00989 } 00990 } 00991 00992 void 00993 printchar (c) 00994 int c; 00995 { 00996 putc (c, stderr); 00997 } 00998 00999 #else /* not DEBUG */ 01000 01001 # undef assert 01002 # define assert(e) 01003 01004 # define DEBUG_STATEMENT(e) 01005 # define DEBUG_PRINT1(x) 01006 # define DEBUG_PRINT2(x1, x2) 01007 # define DEBUG_PRINT3(x1, x2, x3) 01008 # define DEBUG_PRINT4(x1, x2, x3, x4) 01009 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 01010 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 01011 01012 #endif /* not DEBUG */ 01013 01014 #if JS 01015 reg_syntax_t re_syntax_options = RE_SYNTAX_GNU_AWK; 01016 #else /* not JS */ 01017 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 01018 also be assigned to arbitrarily: each pattern buffer stores its own 01019 syntax, so it can be changed between regex compilations. */ 01020 /* This has no initializer because initialized variables in Emacs 01021 become read-only after dumping. */ 01022 reg_syntax_t re_syntax_options; 01023 #endif /* not JS */ 01024 01025 01026 /* Specify the precise syntax of regexps for compilation. This provides 01027 for compatibility for various utilities which historically have 01028 different, incompatible syntaxes. 01029 01030 The argument SYNTAX is a bit mask comprised of the various bits 01031 defined in regex.h. We return the old syntax. */ 01032 01033 reg_syntax_t 01034 re_set_syntax (syntax) 01035 reg_syntax_t syntax; 01036 { 01037 reg_syntax_t ret = re_syntax_options; 01038 01039 re_syntax_options = syntax; 01040 #ifdef DEBUG 01041 if (syntax & RE_DEBUG) 01042 debug = 1; 01043 else if (debug) /* was on but now is not */ 01044 debug = 0; 01045 #endif /* DEBUG */ 01046 return ret; 01047 } 01048 #ifdef _LIBC 01049 weak_alias (__re_set_syntax, re_set_syntax) 01050 #endif 01051 01052 /* This table gives an error message for each of the error codes listed 01053 in regex.h. Obviously the order here has to be same as there. 01054 POSIX doesn't require that we do anything for REG_NOERROR, 01055 but why not be nice? */ 01056 01057 static const char *re_error_msgid[] = 01058 { 01059 gettext_noop ("Success"), /* REG_NOERROR */ 01060 gettext_noop ("No match"), /* REG_NOMATCH */ 01061 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 01062 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 01063 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 01064 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 01065 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 01066 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 01067 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 01068 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 01069 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 01070 gettext_noop ("Invalid range end"), /* REG_ERANGE */ 01071 gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 01072 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 01073 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 01074 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 01075 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 01076 }; 01077 01078 /* Avoiding alloca during matching, to placate r_alloc. */ 01079 01080 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 01081 searching and matching functions should not call alloca. On some 01082 systems, alloca is implemented in terms of malloc, and if we're 01083 using the relocating allocator routines, then malloc could cause a 01084 relocation, which might (if the strings being searched are in the 01085 ralloc heap) shift the data out from underneath the regexp 01086 routines. 01087 01088 Here's another reason to avoid allocation: Emacs 01089 processes input from X in a signal handler; processing X input may 01090 call malloc; if input arrives while a matching routine is calling 01091 malloc, then we're scrod. But Emacs can't just block input while 01092 calling matching routines; then we don't notice interrupts when 01093 they come in. So, Emacs blocks input around all regexp calls 01094 except the matching calls, which it leaves unprotected, in the 01095 faith that they will not malloc. */ 01096 01097 /* Normally, this is fine. */ 01098 #define MATCH_MAY_ALLOCATE 01099 01100 /* When using GNU C, we are not REALLY using the C alloca, no matter 01101 what config.h may say. So don't take precautions for it. */ 01102 #ifdef __GNUC__ 01103 # undef C_ALLOCA 01104 #endif 01105 01106 /* The match routines may not allocate if (1) they would do it with malloc 01107 and (2) it's not safe for them to use malloc. 01108 Note that if REL_ALLOC is defined, matching would not use malloc for the 01109 failure stack, but we would still use it for the register vectors; 01110 so REL_ALLOC should not affect this. */ 01111 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 01112 # undef MATCH_MAY_ALLOCATE 01113 #endif 01114 01115 01116 /* Failure stack declarations and macros; both re_compile_fastmap and 01117 re_match_2 use a failure stack. These have to be macros because of 01118 REGEX_ALLOCATE_STACK. */ 01119 01120 01121 /* Number of failure points for which to initially allocate space 01122 when matching. If this number is exceeded, we allocate more 01123 space, so it is not a hard limit. */ 01124 #ifndef INIT_FAILURE_ALLOC 01125 # define INIT_FAILURE_ALLOC 5 01126 #endif 01127 01128 /* Roughly the maximum number of failure points on the stack. Would be 01129 exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 01130 This is a variable only so users of regex can assign to it; we never 01131 change it ourselves. */ 01132 01133 #ifdef INT_IS_16BIT 01134 01135 # if defined MATCH_MAY_ALLOCATE 01136 /* 4400 was enough to cause a crash on Alpha OSF/1, 01137 whose default stack limit is 2mb. */ 01138 long int re_max_failures = 4000; 01139 # else 01140 long int re_max_failures = 2000; 01141 # endif 01142 01143 union fail_stack_elt 01144 { 01145 unsigned char *pointer; 01146 long int integer; 01147 }; 01148 01149 typedef union fail_stack_elt fail_stack_elt_t; 01150 01151 typedef struct 01152 { 01153 fail_stack_elt_t *stack; 01154 unsigned long int size; 01155 unsigned long int avail; /* Offset of next open position. */ 01156 } fail_stack_type; 01157 01158 #else /* not INT_IS_16BIT */ 01159 01160 # if defined MATCH_MAY_ALLOCATE 01161 /* 4400 was enough to cause a crash on Alpha OSF/1, 01162 whose default stack limit is 2mb. */ 01163 int re_max_failures = 20000; 01164 # else 01165 int re_max_failures = 2000; 01166 # endif 01167 01168 union fail_stack_elt 01169 { 01170 unsigned char *pointer; 01171 int integer; 01172 }; 01173 01174 typedef union fail_stack_elt fail_stack_elt_t; 01175 01176 typedef struct 01177 { 01178 fail_stack_elt_t *stack; 01179 unsigned size; 01180 unsigned avail; /* Offset of next open position. */ 01181 } fail_stack_type; 01182 01183 #endif /* INT_IS_16BIT */ 01184 01185 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 01186 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 01187 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 01188 01189 01190 /* Define macros to initialize and free the failure stack. 01191 Do `return -2' if the alloc fails. */ 01192 01193 #ifdef MATCH_MAY_ALLOCATE 01194 # define INIT_FAIL_STACK() \ 01195 do { \ 01196 fail_stack.stack = (fail_stack_elt_t *) \ 01197 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ 01198 \ 01199 if (fail_stack.stack == NULL) \ 01200 return -2; \ 01201 \ 01202 fail_stack.size = INIT_FAILURE_ALLOC; \ 01203 fail_stack.avail = 0; \ 01204 } while (0) 01205 01206 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 01207 #else 01208 # define INIT_FAIL_STACK() \ 01209 do { \ 01210 fail_stack.avail = 0; \ 01211 } while (0) 01212 01213 # define RESET_FAIL_STACK() 01214 #endif 01215 01216 01217 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 01218 01219 Return 1 if succeeds, and 0 if either ran out of memory 01220 allocating space for it or it was already too large. 01221 01222 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 01223 01224 #define DOUBLE_FAIL_STACK(fail_stack) \ 01225 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ 01226 ? 0 \ 01227 : ((fail_stack).stack = (fail_stack_elt_t *) \ 01228 REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 01229 (fail_stack).size * sizeof (fail_stack_elt_t), \ 01230 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ 01231 \ 01232 (fail_stack).stack == NULL \ 01233 ? 0 \ 01234 : ((fail_stack).size <<= 1, \ 01235 1))) 01236 01237 01238 /* Push pointer POINTER on FAIL_STACK. 01239 Return 1 if was able to do so and 0 if ran out of memory allocating 01240 space to do so. */ 01241 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 01242 ((FAIL_STACK_FULL () \ 01243 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 01244 ? 0 \ 01245 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 01246 1)) 01247 01248 /* Push a pointer value onto the failure stack. 01249 Assumes the variable `fail_stack'. Probably should only 01250 be called from within `PUSH_FAILURE_POINT'. */ 01251 #define PUSH_FAILURE_POINTER(item) \ 01252 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 01253 01254 /* This pushes an integer-valued item onto the failure stack. 01255 Assumes the variable `fail_stack'. Probably should only 01256 be called from within `PUSH_FAILURE_POINT'. */ 01257 #define PUSH_FAILURE_INT(item) \ 01258 fail_stack.stack[fail_stack.avail++].integer = (item) 01259 01260 /* Push a fail_stack_elt_t value onto the failure stack. 01261 Assumes the variable `fail_stack'. Probably should only 01262 be called from within `PUSH_FAILURE_POINT'. */ 01263 #define PUSH_FAILURE_ELT(item) \ 01264 fail_stack.stack[fail_stack.avail++] = (item) 01265 01266 /* These three POP... operations complement the three PUSH... operations. 01267 All assume that `fail_stack' is nonempty. */ 01268 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 01269 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 01270 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 01271 01272 /* Used to omit pushing failure point id's when we're not debugging. */ 01273 #ifdef DEBUG 01274 # define DEBUG_PUSH PUSH_FAILURE_INT 01275 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 01276 #else 01277 # define DEBUG_PUSH(item) 01278 # define DEBUG_POP(item_addr) 01279 #endif 01280 01281 01282 /* Push the information about the state we will need 01283 if we ever fail back to it. 01284 01285 Requires variables fail_stack, regstart, regend, reg_info, and 01286 num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' 01287 be declared. 01288 01289 Does `return FAILURE_CODE' if runs out of memory. */ 01290 01291 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 01292 do { \ 01293 char *destination; \ 01294 /* Must be int, so when we don't save any registers, the arithmetic \ 01295 of 0 + -1 isn't done as unsigned. */ \ 01296 /* Can't be int, since there is not a shred of a guarantee that int \ 01297 is wide enough to hold a value of something to which pointer can \ 01298 be assigned */ \ 01299 active_reg_t this_reg; \ 01300 \ 01301 DEBUG_STATEMENT (failure_id++); \ 01302 DEBUG_STATEMENT (nfailure_points_pushed++); \ 01303 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 01304 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 01305 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 01306 \ 01307 DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ 01308 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 01309 \ 01310 /* Ensure we have enough space allocated for what we will push. */ \ 01311 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 01312 { \ 01313 if (!DOUBLE_FAIL_STACK (fail_stack)) \ 01314 return failure_code; \ 01315 \ 01316 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 01317 (fail_stack).size); \ 01318 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 01319 } \ 01320 \ 01321 /* Push the info, starting with the registers. */ \ 01322 DEBUG_PRINT1 ("\n"); \ 01323 \ 01324 if (1) \ 01325 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 01326 this_reg++) \ 01327 { \ 01328 DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ 01329 DEBUG_STATEMENT (num_regs_pushed++); \ 01330 \ 01331 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 01332 PUSH_FAILURE_POINTER (regstart[this_reg]); \ 01333 \ 01334 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 01335 PUSH_FAILURE_POINTER (regend[this_reg]); \ 01336 \ 01337 DEBUG_PRINT2 (" info: %p\n ", \ 01338 reg_info[this_reg].word.pointer); \ 01339 DEBUG_PRINT2 (" match_null=%d", \ 01340 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 01341 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 01342 DEBUG_PRINT2 (" matched_something=%d", \ 01343 MATCHED_SOMETHING (reg_info[this_reg])); \ 01344 DEBUG_PRINT2 (" ever_matched=%d", \ 01345 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 01346 DEBUG_PRINT1 ("\n"); \ 01347 PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 01348 } \ 01349 \ 01350 DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ 01351 PUSH_FAILURE_INT (lowest_active_reg); \ 01352 \ 01353 DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ 01354 PUSH_FAILURE_INT (highest_active_reg); \ 01355 \ 01356 DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ 01357 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 01358 PUSH_FAILURE_POINTER (pattern_place); \ 01359 \ 01360 DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ 01361 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 01362 size2); \ 01363 DEBUG_PRINT1 ("'\n"); \ 01364 PUSH_FAILURE_POINTER (string_place); \ 01365 \ 01366 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 01367 DEBUG_PUSH (failure_id); \ 01368 } while (0) 01369 01370 /* This is the number of items that are pushed and popped on the stack 01371 for each register. */ 01372 #define NUM_REG_ITEMS 3 01373 01374 /* Individual items aside from the registers. */ 01375 #ifdef DEBUG 01376 # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 01377 #else 01378 # define NUM_NONREG_ITEMS 4 01379 #endif 01380 01381 /* We push at most this many items on the stack. */ 01382 /* We used to use (num_regs - 1), which is the number of registers 01383 this regexp will save; but that was changed to 5 01384 to avoid stack overflow for a regexp with lots of parens. */ 01385 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) 01386 01387 /* We actually push this many items. */ 01388 #define NUM_FAILURE_ITEMS \ 01389 (((0 \ 01390 ? 0 : highest_active_reg - lowest_active_reg + 1) \ 01391 * NUM_REG_ITEMS) \ 01392 + NUM_NONREG_ITEMS) 01393 01394 /* How many items can still be added to the stack without overflowing it. */ 01395 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 01396 01397 01398 /* Pops what PUSH_FAIL_STACK pushes. 01399 01400 We restore into the parameters, all of which should be lvalues: 01401 STR -- the saved data position. 01402 PAT -- the saved pattern position. 01403 LOW_REG, HIGH_REG -- the highest and lowest active registers. 01404 REGSTART, REGEND -- arrays of string positions. 01405 REG_INFO -- array of information about each subexpression. 01406 01407 Also assumes the variables `fail_stack' and (if debugging), `bufp', 01408 `pend', `string1', `size1', `string2', and `size2'. */ 01409 01410 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 01411 { \ 01412 DEBUG_STATEMENT (unsigned failure_id;) \ 01413 active_reg_t this_reg; \ 01414 const unsigned char *string_temp; \ 01415 \ 01416 assert (!FAIL_STACK_EMPTY ()); \ 01417 \ 01418 /* Remove failure points and point to how many regs pushed. */ \ 01419 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 01420 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 01421 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 01422 \ 01423 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 01424 \ 01425 DEBUG_POP (&failure_id); \ 01426 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 01427 \ 01428 /* If the saved string location is NULL, it came from an \ 01429 on_failure_keep_string_jump opcode, and we want to throw away the \ 01430 saved NULL, thus retaining our current position in the string. */ \ 01431 string_temp = POP_FAILURE_POINTER (); \ 01432 if (string_temp != NULL) \ 01433 str = (const char *) string_temp; \ 01434 \ 01435 DEBUG_PRINT2 (" Popping string %p: `", str); \ 01436 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 01437 DEBUG_PRINT1 ("'\n"); \ 01438 \ 01439 pat = (unsigned char *) POP_FAILURE_POINTER (); \ 01440 DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ 01441 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 01442 \ 01443 /* Restore register info. */ \ 01444 high_reg = (active_reg_t) POP_FAILURE_INT (); \ 01445 DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ 01446 \ 01447 low_reg = (active_reg_t) POP_FAILURE_INT (); \ 01448 DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ 01449 \ 01450 if (1) \ 01451 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 01452 { \ 01453 DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ 01454 \ 01455 reg_info[this_reg].word = POP_FAILURE_ELT (); \ 01456 DEBUG_PRINT2 (" info: %p\n", \ 01457 reg_info[this_reg].word.pointer); \ 01458 \ 01459 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 01460 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 01461 \ 01462 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 01463 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 01464 } \ 01465 else \ 01466 { \ 01467 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 01468 { \ 01469 reg_info[this_reg].word.integer = 0; \ 01470 regend[this_reg] = 0; \ 01471 regstart[this_reg] = 0; \ 01472 } \ 01473 highest_active_reg = high_reg; \ 01474 } \ 01475 \ 01476 set_regs_matched_done = 0; \ 01477 DEBUG_STATEMENT (nfailure_points_popped++); \ 01478 } /* POP_FAILURE_POINT */ 01479 01480 01481 01482 /* Structure for per-register (a.k.a. per-group) information. 01483 Other register information, such as the 01484 starting and ending positions (which are addresses), and the list of 01485 inner groups (which is a bits list) are maintained in separate 01486 variables. 01487 01488 We are making a (strictly speaking) nonportable assumption here: that 01489 the compiler will pack our bit fields into something that fits into 01490 the type of `word', i.e., is something that fits into one item on the 01491 failure stack. */ 01492 01493 01494 /* Declarations and macros for re_match_2. */ 01495 01496 typedef union 01497 { 01498 fail_stack_elt_t word; 01499 struct 01500 { 01501 /* This field is one if this group can match the empty string, 01502 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 01503 #define MATCH_NULL_UNSET_VALUE 3 01504 unsigned match_null_string_p : 2; 01505 unsigned is_active : 1; 01506 unsigned matched_something : 1; 01507 unsigned ever_matched_something : 1; 01508 } bits; 01509 } register_info_type; 01510 01511 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 01512 #define IS_ACTIVE(R) ((R).bits.is_active) 01513 #define MATCHED_SOMETHING(R) ((R).bits.matched_something) 01514 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 01515 01516 01517 /* Call this when have matched a real character; it sets `matched' flags 01518 for the subexpressions which we are currently inside. Also records 01519 that those subexprs have matched. */ 01520 #define SET_REGS_MATCHED() \ 01521 do \ 01522 { \ 01523 if (!set_regs_matched_done) \ 01524 { \ 01525 active_reg_t r; \ 01526 set_regs_matched_done = 1; \ 01527 for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 01528 { \ 01529 MATCHED_SOMETHING (reg_info[r]) \ 01530 = EVER_MATCHED_SOMETHING (reg_info[r]) \ 01531 = 1; \ 01532 } \ 01533 } \ 01534 } \ 01535 while (0) 01536 01537 /* Registers are set to a sentinel when they haven't yet matched. */ 01538 static char reg_unset_dummy; 01539 #define REG_UNSET_VALUE (®_unset_dummy) 01540 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 01541 01542 /* Subroutine declarations and macros for regex_compile. */ 01543 01544 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, 01545 reg_syntax_t syntax, 01546 struct re_pattern_buffer *bufp)); 01547 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); 01548 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, 01549 int arg1, int arg2)); 01550 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, 01551 int arg, unsigned char *end)); 01552 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, 01553 int arg1, int arg2, unsigned char *end)); 01554 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, 01555 reg_syntax_t syntax)); 01556 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, 01557 reg_syntax_t syntax)); 01558 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, 01559 const char *pend, 01560 char *translate, 01561 reg_syntax_t syntax, 01562 unsigned char *b)); 01563 01564 /* Fetch the next character in the uncompiled pattern---translating it 01565 if necessary. Also cast from a signed character in the constant 01566 string passed to us by the user to an unsigned char that we can use 01567 as an array index (in, e.g., `translate'). */ 01568 #ifndef PATFETCH 01569 # define PATFETCH(c) \ 01570 do {if (p == pend) return REG_EEND; \ 01571 c = (unsigned char) *p++; \ 01572 if (translate) c = (unsigned char) translate[c]; \ 01573 } while (0) 01574 #endif 01575 01576 /* Fetch the next character in the uncompiled pattern, with no 01577 translation. */ 01578 #define PATFETCH_RAW(c) \ 01579 do {if (p == pend) return REG_EEND; \ 01580 c = (unsigned char) *p++; \ 01581 } while (0) 01582 01583 /* Go backwards one character in the pattern. */ 01584 #define PATUNFETCH p-- 01585 01586 01587 /* If `translate' is non-null, return translate[D], else just D. We 01588 cast the subscript to translate because some data is declared as 01589 `char *', to avoid warnings when a string constant is passed. But 01590 when we use a character as a subscript we must make it unsigned. */ 01591 #ifndef TRANSLATE 01592 # define TRANSLATE(d) \ 01593 (translate ? (char) translate[(unsigned char) (d)] : (d)) 01594 #endif 01595 01596 01597 /* Macros for outputting the compiled pattern into `buffer'. */ 01598 01599 /* If the buffer isn't allocated when it comes in, use this. */ 01600 #define INIT_BUF_SIZE 32 01601 01602 /* Make sure we have at least N more bytes of space in buffer. */ 01603 #define GET_BUFFER_SPACE(n) \ 01604 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 01605 EXTEND_BUFFER () 01606 01607 /* Make sure we have one more byte of buffer space and then add C to it. */ 01608 #define BUF_PUSH(c) \ 01609 do { \ 01610 GET_BUFFER_SPACE (1); \ 01611 *b++ = (unsigned char) (c); \ 01612 } while (0) 01613 01614 01615 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 01616 #define BUF_PUSH_2(c1, c2) \ 01617 do { \ 01618 GET_BUFFER_SPACE (2); \ 01619 *b++ = (unsigned char) (c1); \ 01620 *b++ = (unsigned char) (c2); \ 01621 } while (0) 01622 01623 01624 /* As with BUF_PUSH_2, except for three bytes. */ 01625 #define BUF_PUSH_3(c1, c2, c3) \ 01626 do { \ 01627 GET_BUFFER_SPACE (3); \ 01628 *b++ = (unsigned char) (c1); \ 01629 *b++ = (unsigned char) (c2); \ 01630 *b++ = (unsigned char) (c3); \ 01631 } while (0) 01632 01633 01634 /* Store a jump with opcode OP at LOC to location TO. We store a 01635 relative address offset by the three bytes the jump itself occupies. */ 01636 #define STORE_JUMP(op, loc, to) \ 01637 store_op1 (op, loc, (int) ((to) - (loc) - 3)) 01638 01639 /* Likewise, for a two-argument jump. */ 01640 #define STORE_JUMP2(op, loc, to, arg) \ 01641 store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) 01642 01643 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 01644 #define INSERT_JUMP(op, loc, to) \ 01645 insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) 01646 01647 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 01648 #define INSERT_JUMP2(op, loc, to, arg) \ 01649 insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) 01650 01651 01652 /* This is not an arbitrary limit: the arguments which represent offsets 01653 into the pattern are two bytes long. So if 2^16 bytes turns out to 01654 be too small, many things would have to change. */ 01655 /* Any other compiler which, like MSC, has allocation limit below 2^16 01656 bytes will have to use approach similar to what was done below for 01657 MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up 01658 reallocating to 0 bytes. Such thing is not going to work too well. 01659 You have been warned!! */ 01660 #if defined _MSC_VER && !defined WIN32 01661 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. 01662 The REALLOC define eliminates a flurry of conversion warnings, 01663 but is not required. */ 01664 # define MAX_BUF_SIZE 65500L 01665 # define REALLOC(p,s) realloc ((p), (size_t) (s)) 01666 #else 01667 # define MAX_BUF_SIZE (1L << 16) 01668 # define REALLOC(p,s) realloc ((p), (s)) 01669 #endif 01670 01671 /* Extend the buffer by twice its current size via realloc and 01672 reset the pointers that pointed into the old block to point to the 01673 correct places in the new one. If extending the buffer results in it 01674 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 01675 #define EXTEND_BUFFER() \ 01676 do { \ 01677 unsigned char *old_buffer = bufp->buffer; \ 01678 if (bufp->allocated == MAX_BUF_SIZE) \ 01679 return REG_ESIZE; \ 01680 bufp->allocated <<= 1; \ 01681 if (bufp->allocated > MAX_BUF_SIZE) \ 01682 bufp->allocated = MAX_BUF_SIZE; \ 01683 bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ 01684 if (bufp->buffer == NULL) \ 01685 return REG_ESPACE; \ 01686 /* If the buffer moved, move all the pointers into it. */ \ 01687 if (old_buffer != bufp->buffer) \ 01688 { \ 01689 b = (b - old_buffer) + bufp->buffer; \ 01690 begalt = (begalt - old_buffer) + bufp->buffer; \ 01691 if (fixup_alt_jump) \ 01692 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 01693 if (laststart) \ 01694 laststart = (laststart - old_buffer) + bufp->buffer; \ 01695 if (pending_exact) \ 01696 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 01697 } \ 01698 } while (0) 01699 01700 01701 /* Since we have one byte reserved for the register number argument to 01702 {start,stop}_memory, the maximum number of groups we can report 01703 things about is what fits in that byte. */ 01704 #define MAX_REGNUM 255 01705 01706 /* But patterns can have more than `MAX_REGNUM' registers. We just 01707 ignore the excess. */ 01708 typedef unsigned regnum_t; 01709 01710 01711 /* Macros for the compile stack. */ 01712 01713 /* Since offsets can go either forwards or backwards, this type needs to 01714 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 01715 /* int may be not enough when sizeof(int) == 2. */ 01716 typedef long pattern_offset_t; 01717 01718 typedef struct 01719 { 01720 pattern_offset_t begalt_offset; 01721 pattern_offset_t fixup_alt_jump; 01722 pattern_offset_t inner_group_offset; 01723 pattern_offset_t laststart_offset; 01724 regnum_t regnum; 01725 } compile_stack_elt_t; 01726 01727 01728 typedef struct 01729 { 01730 compile_stack_elt_t *stack; 01731 unsigned size; 01732 unsigned avail; /* Offset of next open position. */ 01733 } compile_stack_type; 01734 01735 01736 #define INIT_COMPILE_STACK_SIZE 32 01737 01738 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 01739 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 01740 01741 /* The next available element. */ 01742 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 01743 01744 01745 /* Set the bit for character C in a list. */ 01746 #define SET_LIST_BIT(c) \ 01747 (b[((unsigned char) (c)) / BYTEWIDTH] \ 01748 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 01749 01750 01751 /* Get the next unsigned number in the uncompiled pattern. */ 01752 #define GET_UNSIGNED_NUMBER(num) \ 01753 { if (p != pend) \ 01754 { \ 01755 PATFETCH (c); \ 01756 while (ISDIGIT (c)) \ 01757 { \ 01758 if (num < 0) \ 01759 num = 0; \ 01760 num = num * 10 + c - '0'; \ 01761 if (p == pend) \ 01762 break; \ 01763 PATFETCH (c); \ 01764 } \ 01765 } \ 01766 } 01767 01768 #if defined _LIBC || WIDE_CHAR_SUPPORT 01769 /* The GNU C library provides support for user-defined character classes 01770 and the functions from ISO C amendement 1. */ 01771 # ifdef CHARCLASS_NAME_MAX 01772 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX 01773 # else 01774 /* This shouldn't happen but some implementation might still have this 01775 problem. Use a reasonable default value. */ 01776 # define CHAR_CLASS_MAX_LENGTH 256 01777 # endif 01778 01779 # ifdef _LIBC 01780 # define IS_CHAR_CLASS(string) __wctype (string) 01781 # else 01782 # define IS_CHAR_CLASS(string) wctype (string) 01783 # endif 01784 #else 01785 # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 01786 01787 # define IS_CHAR_CLASS(string) \ 01788 (STREQ (string, "alpha") || STREQ (string, "upper") \ 01789 || STREQ (string, "lower") || STREQ (string, "digit") \ 01790 || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 01791 || STREQ (string, "space") || STREQ (string, "print") \ 01792 || STREQ (string, "punct") || STREQ (string, "graph") \ 01793 || STREQ (string, "cntrl") || STREQ (string, "blank")) 01794 #endif 01795 01796 #ifndef MATCH_MAY_ALLOCATE 01797 01798 /* If we cannot allocate large objects within re_match_2_internal, 01799 we make the fail stack and register vectors global. 01800 The fail stack, we grow to the maximum size when a regexp 01801 is compiled. 01802 The register vectors, we adjust in size each time we 01803 compile a regexp, according to the number of registers it needs. */ 01804 01805 static fail_stack_type fail_stack; 01806 01807 /* Size with which the following vectors are currently allocated. 01808 That is so we can make them bigger as needed, 01809 but never make them smaller. */ 01810 static int regs_allocated_size; 01811 01812 static const char ** regstart, ** regend; 01813 static const char ** old_regstart, ** old_regend; 01814 static const char **best_regstart, **best_regend; 01815 static register_info_type *reg_info; 01816 static const char **reg_dummy; 01817 static register_info_type *reg_info_dummy; 01818 01819 /* Make the register vectors big enough for NUM_REGS registers, 01820 but don't make them smaller. */ 01821 01822 static 01823 regex_grow_registers (num_regs) 01824 int num_regs; 01825 { 01826 if (num_regs > regs_allocated_size) 01827 { 01828 RETALLOC_IF (regstart, num_regs, const char *); 01829 RETALLOC_IF (regend, num_regs, const char *); 01830 RETALLOC_IF (old_regstart, num_regs, const char *); 01831 RETALLOC_IF (old_regend, num_regs, const char *); 01832 RETALLOC_IF (best_regstart, num_regs, const char *); 01833 RETALLOC_IF (best_regend, num_regs, const char *); 01834 RETALLOC_IF (reg_info, num_regs, register_info_type); 01835 RETALLOC_IF (reg_dummy, num_regs, const char *); 01836 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); 01837 01838 regs_allocated_size = num_regs; 01839 } 01840 } 01841 01842 #endif /* not MATCH_MAY_ALLOCATE */ 01843 01844 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type 01845 compile_stack, 01846 regnum_t regnum)); 01847 01848 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 01849 Returns one of error codes defined in `regex.h', or zero for success. 01850 01851 Assumes the `allocated' (and perhaps `buffer') and `translate' 01852 fields are set in BUFP on entry. 01853 01854 If it succeeds, results are put in BUFP (if it returns an error, the 01855 contents of BUFP are undefined): 01856 `buffer' is the compiled pattern; 01857 `syntax' is set to SYNTAX; 01858 `used' is set to the length of the compiled pattern; 01859 `fastmap_accurate' is zero; 01860 `re_nsub' is the number of subexpressions in PATTERN; 01861 `not_bol' and `not_eol' are zero; 01862 01863 The `fastmap' and `newline_anchor' fields are neither 01864 examined nor set. */ 01865 01866 /* Return, freeing storage we allocated. */ 01867 #define FREE_STACK_RETURN(value) \ 01868 return (free (compile_stack.stack), value) 01869 01870 static reg_errcode_t 01871 regex_compile (pattern, size, syntax, bufp) 01872 const char *pattern; 01873 size_t size; 01874 reg_syntax_t syntax; 01875 struct re_pattern_buffer *bufp; 01876 { 01877 /* We fetch characters from PATTERN here. Even though PATTERN is 01878 `char *' (i.e., signed), we declare these variables as unsigned, so 01879 they can be reliably used as array indices. */ 01880 register unsigned char c, c1; 01881 01882 /* A random temporary spot in PATTERN. */ 01883 const char *p1; 01884 01885 /* Points to the end of the buffer, where we should append. */ 01886 register unsigned char *b; 01887 01888 /* Keeps track of unclosed groups. */ 01889 compile_stack_type compile_stack; 01890 01891 /* Points to the current (ending) position in the pattern. */ 01892 const char *p = pattern; 01893 const char *pend = pattern + size; 01894 01895 /* How to translate the characters in the pattern. */ 01896 RE_TRANSLATE_TYPE translate = bufp->translate; 01897 01898 /* Address of the count-byte of the most recently inserted `exactn' 01899 command. This makes it possible to tell if a new exact-match 01900 character can be added to that command or if the character requires 01901 a new `exactn' command. */ 01902 unsigned char *pending_exact = 0; 01903 01904 /* Address of start of the most recently finished expression. 01905 This tells, e.g., postfix * where to find the start of its 01906 operand. Reset at the beginning of groups and alternatives. */ 01907 unsigned char *laststart = 0; 01908 01909 /* Address of beginning of regexp, or inside of last group. */ 01910 unsigned char *begalt; 01911 01912 /* Place in the uncompiled pattern (i.e., the {) to 01913 which to go back if the interval is invalid. */ 01914 const char *beg_interval; 01915 01916 /* Address of the place where a forward jump should go to the end of 01917 the containing expression. Each alternative of an `or' -- except the 01918 last -- ends with a forward jump of this sort. */ 01919 unsigned char *fixup_alt_jump = 0; 01920 01921 /* Counts open-groups as they are encountered. Remembered for the 01922 matching close-group on the compile stack, so the same register 01923 number is put in the stop_memory as the start_memory. */ 01924 regnum_t regnum = 0; 01925 01926 #ifdef DEBUG 01927 DEBUG_PRINT1 ("\nCompiling pattern: "); 01928 if (debug) 01929 { 01930 unsigned debug_count; 01931 01932 for (debug_count = 0; debug_count < size; debug_count++) 01933 putchar (pattern[debug_count]); 01934 putchar ('\n'); 01935 } 01936 #endif /* DEBUG */ 01937 01938 /* Initialize the compile stack. */ 01939 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 01940 if (compile_stack.stack == NULL) 01941 return REG_ESPACE; 01942 01943 compile_stack.size = INIT_COMPILE_STACK_SIZE; 01944 compile_stack.avail = 0; 01945 01946 /* Initialize the pattern buffer. */ 01947 bufp->syntax = syntax; 01948 bufp->fastmap_accurate = 0; 01949 bufp->not_bol = bufp->not_eol = 0; 01950 01951 /* Set `used' to zero, so that if we return an error, the pattern 01952 printer (for debugging) will think there's no pattern. We reset it 01953 at the end. */ 01954 bufp->used = 0; 01955 01956 /* Always count groups, whether or not bufp->no_sub is set. */ 01957 bufp->re_nsub = 0; 01958 01959 #if !defined emacs && !defined SYNTAX_TABLE 01960 /* Initialize the syntax table. */ 01961 init_syntax_once (); 01962 #endif 01963 01964 if (bufp->allocated == 0) 01965 { 01966 if (bufp->buffer) 01967 { /* If zero allocated, but buffer is non-null, try to realloc 01968 enough space. This loses if buffer's address is bogus, but 01969 that is the user's responsibility. */ 01970 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 01971 } 01972 else 01973 { /* Caller did not allocate a buffer. Do it for them. */ 01974 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 01975 } 01976 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 01977 01978 bufp->allocated = INIT_BUF_SIZE; 01979 } 01980 01981 begalt = b = bufp->buffer; 01982 01983 /* Loop through the uncompiled pattern until we're at the end. */ 01984 while (p != pend) 01985 { 01986 PATFETCH (c); 01987 01988 switch (c) 01989 { 01990 case '^': 01991 { 01992 if ( /* If at start of pattern, it's an operator. */ 01993 p == pattern + 1 01994 /* If context independent, it's an operator. */ 01995 || syntax & RE_CONTEXT_INDEP_ANCHORS 01996 /* Otherwise, depends on what's come before. */ 01997 || at_begline_loc_p (pattern, p, syntax)) 01998 BUF_PUSH (begline); 01999 else 02000 goto normal_char; 02001 } 02002 break; 02003 02004 02005 case '$': 02006 { 02007 if ( /* If at end of pattern, it's an operator. */ 02008 p == pend 02009 /* If context independent, it's an operator. */ 02010 || syntax & RE_CONTEXT_INDEP_ANCHORS 02011 /* Otherwise, depends on what's next. */ 02012 || at_endline_loc_p (p, pend, syntax)) 02013 BUF_PUSH (endline); 02014 else 02015 goto normal_char; 02016 } 02017 break; 02018 02019 02020 case '+': 02021 case '?': 02022 if ((syntax & RE_BK_PLUS_QM) 02023 || (syntax & RE_LIMITED_OPS)) 02024 goto normal_char; 02025 handle_plus: 02026 case '*': 02027 /* If there is no previous pattern... */ 02028 if (!laststart) 02029 { 02030 if (syntax & RE_CONTEXT_INVALID_OPS) 02031 FREE_STACK_RETURN (REG_BADRPT); 02032 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 02033 goto normal_char; 02034 } 02035 02036 { 02037 /* Are we optimizing this jump? */ 02038 boolean keep_string_p = false; 02039 02040 /* 1 means zero (many) matches is allowed. */ 02041 char zero_times_ok = 0, many_times_ok = 0; 02042 02043 /* If there is a sequence of repetition chars, collapse it 02044 down to just one (the right one). We can't combine 02045 interval operators with these because of, e.g., `a{2}*', 02046 which should only match an even number of `a's. */ 02047 02048 for (;;) 02049 { 02050 zero_times_ok |= c != '+'; 02051 many_times_ok |= c != '?'; 02052 02053 if (p == pend) 02054 break; 02055 02056 PATFETCH (c); 02057 02058 if (c == '*' 02059 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 02060 ; 02061 02062 else if (syntax & RE_BK_PLUS_QM && c == '\\') 02063 { 02064 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 02065 02066 PATFETCH (c1); 02067 if (!(c1 == '+' || c1 == '?')) 02068 { 02069 PATUNFETCH; 02070 PATUNFETCH; 02071 break; 02072 } 02073 02074 c = c1; 02075 } 02076 else 02077 { 02078 PATUNFETCH; 02079 break; 02080 } 02081 02082 /* If we get here, we found another repeat character. */ 02083 } 02084 02085 /* Star, etc. applied to an empty pattern is equivalent 02086 to an empty pattern. */ 02087 if (!laststart) 02088 break; 02089 02090 /* Now we know whether or not zero matches is allowed 02091 and also whether or not two or more matches is allowed. */ 02092 if (many_times_ok) 02093 { /* More than one repetition is allowed, so put in at the 02094 end a backward relative jump from `b' to before the next 02095 jump we're going to put in below (which jumps from 02096 laststart to after this jump). 02097 02098 But if we are at the `*' in the exact sequence `.*\n', 02099 insert an unconditional jump backwards to the ., 02100 instead of the beginning of the loop. This way we only 02101 push a failure point once, instead of every time 02102 through the loop. */ 02103 assert (p - 1 > pattern); 02104 02105 /* Allocate the space for the jump. */ 02106 GET_BUFFER_SPACE (3); 02107 02108 /* We know we are not at the first character of the pattern, 02109 because laststart was nonzero. And we've already 02110 incremented `p', by the way, to be the character after 02111 the `*'. Do we have to do something analogous here 02112 for null bytes, because of RE_DOT_NOT_NULL? */ 02113 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 02114 && zero_times_ok 02115 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 02116 && !(syntax & RE_DOT_NEWLINE)) 02117 { /* We have .*\n. */ 02118 STORE_JUMP (jump, b, laststart); 02119 keep_string_p = true; 02120 } 02121 else 02122 /* Anything else. */ 02123 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 02124 02125 /* We've added more stuff to the buffer. */ 02126 b += 3; 02127 } 02128 02129 /* On failure, jump from laststart to b + 3, which will be the 02130 end of the buffer after this jump is inserted. */ 02131 GET_BUFFER_SPACE (3); 02132 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 02133 : on_failure_jump, 02134 laststart, b + 3); 02135 pending_exact = 0; 02136 b += 3; 02137 02138 if (!zero_times_ok) 02139 { 02140 /* At least one repetition is required, so insert a 02141 `dummy_failure_jump' before the initial 02142 `on_failure_jump' instruction of the loop. This 02143 effects a skip over that instruction the first time 02144 we hit that loop. */ 02145 GET_BUFFER_SPACE (3); 02146 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 02147 b += 3; 02148 } 02149 } 02150 break; 02151 02152 02153 case '.': 02154 laststart = b; 02155 BUF_PUSH (anychar); 02156 break; 02157 02158 02159 case '[': 02160 { 02161 boolean had_char_class = false; 02162 02163 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02164 02165 /* Ensure that we have enough space to push a charset: the 02166 opcode, the length count, and the bitset; 34 bytes in all. */ 02167 GET_BUFFER_SPACE (34); 02168 02169 laststart = b; 02170 02171 /* We test `*p == '^' twice, instead of using an if 02172 statement, so we only need one BUF_PUSH. */ 02173 BUF_PUSH (*p == '^' ? charset_not : charset); 02174 if (*p == '^') 02175 p++; 02176 02177 /* Remember the first position in the bracket expression. */ 02178 p1 = p; 02179 02180 /* Push the number of bytes in the bitmap. */ 02181 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 02182 02183 /* Clear the whole map. */ 02184 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 02185 02186 /* charset_not matches newline according to a syntax bit. */ 02187 if ((re_opcode_t) b[-2] == charset_not 02188 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 02189 SET_LIST_BIT ('\n'); 02190 02191 /* Read in characters and ranges, setting map bits. */ 02192 for (;;) 02193 { 02194 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02195 02196 PATFETCH (c); 02197 02198 /* \ might escape characters inside [...] and [^...]. */ 02199 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 02200 { 02201 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 02202 02203 PATFETCH (c1); 02204 SET_LIST_BIT (c1); 02205 continue; 02206 } 02207 02208 /* Could be the end of the bracket expression. If it's 02209 not (i.e., when the bracket expression is `[]' so 02210 far), the ']' character bit gets set way below. */ 02211 if (c == ']' && p != p1 + 1) 02212 break; 02213 02214 /* Look ahead to see if it's a range when the last thing 02215 was a character class. */ 02216 if (had_char_class && c == '-' && *p != ']') 02217 FREE_STACK_RETURN (REG_ERANGE); 02218 02219 /* Look ahead to see if it's a range when the last thing 02220 was a character: if this is a hyphen not at the 02221 beginning or the end of a list, then it's the range 02222 operator. */ 02223 if (c == '-' 02224 && !(p - 2 >= pattern && p[-2] == '[') 02225 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 02226 && *p != ']') 02227 { 02228 reg_errcode_t ret 02229 = compile_range (&p, pend, translate, syntax, b); 02230 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 02231 } 02232 02233 else if (p[0] == '-' && p[1] != ']') 02234 { /* This handles ranges made up of characters only. */ 02235 reg_errcode_t ret; 02236 02237 /* Move past the `-'. */ 02238 PATFETCH (c1); 02239 02240 ret = compile_range (&p, pend, translate, syntax, b); 02241 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 02242 } 02243 02244 /* See if we're at the beginning of a possible character 02245 class. */ 02246 02247 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 02248 { /* Leave room for the null. */ 02249 char str[CHAR_CLASS_MAX_LENGTH + 1]; 02250 02251 PATFETCH (c); 02252 c1 = 0; 02253 02254 /* If pattern is `[[:'. */ 02255 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02256 02257 for (;;) 02258 { 02259 PATFETCH (c); 02260 if ((c == ':' && *p == ']') || p == pend 02261 || c1 == CHAR_CLASS_MAX_LENGTH) 02262 break; 02263 str[c1++] = c; 02264 } 02265 str[c1] = '\0'; 02266 02267 /* If isn't a word bracketed by `[:' and `:]': 02268 undo the ending character, the letters, and leave 02269 the leading `:' and `[' (but set bits for them). */ 02270 if (c == ':' && *p == ']') 02271 { 02272 #if defined _LIBC || WIDE_CHAR_SUPPORT 02273 boolean is_lower = STREQ (str, "lower"); 02274 boolean is_upper = STREQ (str, "upper"); 02275 wctype_t wt; 02276 int ch; 02277 02278 wt = IS_CHAR_CLASS (str); 02279 if (wt == 0) 02280 FREE_STACK_RETURN (REG_ECTYPE); 02281 02282 /* Throw away the ] at the end of the character 02283 class. */ 02284 PATFETCH (c); 02285 02286 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02287 02288 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 02289 { 02290 # ifdef _LIBC 02291 if (__iswctype (__btowc (ch), wt)) 02292 SET_LIST_BIT (ch); 02293 # else 02294 if (iswctype (btowc (ch), wt)) 02295 SET_LIST_BIT (ch); 02296 # endif 02297 02298 if (translate && (is_upper || is_lower) 02299 && (ISUPPER (ch) || ISLOWER (ch))) 02300 SET_LIST_BIT (ch); 02301 } 02302 02303 had_char_class = true; 02304 #else 02305 int ch; 02306 boolean is_alnum = STREQ (str, "alnum"); 02307 boolean is_alpha = STREQ (str, "alpha"); 02308 boolean is_blank = STREQ (str, "blank"); 02309 boolean is_cntrl = STREQ (str, "cntrl"); 02310 boolean is_digit = STREQ (str, "digit"); 02311 boolean is_graph = STREQ (str, "graph"); 02312 boolean is_lower = STREQ (str, "lower"); 02313 boolean is_print = STREQ (str, "print"); 02314 boolean is_punct = STREQ (str, "punct"); 02315 boolean is_space = STREQ (str, "space"); 02316 boolean is_upper = STREQ (str, "upper"); 02317 boolean is_xdigit = STREQ (str, "xdigit"); 02318 02319 if (!IS_CHAR_CLASS (str)) 02320 FREE_STACK_RETURN (REG_ECTYPE); 02321 02322 /* Throw away the ] at the end of the character 02323 class. */ 02324 PATFETCH (c); 02325 02326 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02327 02328 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 02329 { 02330 /* This was split into 3 if's to 02331 avoid an arbitrary limit in some compiler. */ 02332 if ( (is_alnum && ISALNUM (ch)) 02333 || (is_alpha && ISALPHA (ch)) 02334 || (is_blank && ISBLANK (ch)) 02335 || (is_cntrl && ISCNTRL (ch))) 02336 SET_LIST_BIT (ch); 02337 if ( (is_digit && ISDIGIT (ch)) 02338 || (is_graph && ISGRAPH (ch)) 02339 || (is_lower && ISLOWER (ch)) 02340 || (is_print && ISPRINT (ch))) 02341 SET_LIST_BIT (ch); 02342 if ( (is_punct && ISPUNCT (ch)) 02343 || (is_space && ISSPACE (ch)) 02344 || (is_upper && ISUPPER (ch)) 02345 || (is_xdigit && ISXDIGIT (ch))) 02346 SET_LIST_BIT (ch); 02347 if ( translate && (is_upper || is_lower) 02348 && (ISUPPER (ch) || ISLOWER (ch))) 02349 SET_LIST_BIT (ch); 02350 } 02351 had_char_class = true; 02352 #endif /* libc || wctype.h */ 02353 } 02354 else 02355 { 02356 c1++; 02357 while (c1--) 02358 PATUNFETCH; 02359 SET_LIST_BIT ('['); 02360 SET_LIST_BIT (':'); 02361 had_char_class = false; 02362 } 02363 } 02364 else 02365 { 02366 had_char_class = false; 02367 SET_LIST_BIT (c); 02368 } 02369 } 02370 02371 /* Discard any (non)matching list bytes that are all 0 at the 02372 end of the map. Decrease the map-length byte too. */ 02373 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 02374 b[-1]--; 02375 b += b[-1]; 02376 } 02377 break; 02378 02379 02380 case '(': 02381 if (syntax & RE_NO_BK_PARENS) 02382 goto handle_open; 02383 else 02384 goto normal_char; 02385 02386 02387 case ')': 02388 if (syntax & RE_NO_BK_PARENS) 02389 goto handle_close; 02390 else 02391 goto normal_char; 02392 02393 02394 case '\n': 02395 if (syntax & RE_NEWLINE_ALT) 02396 goto handle_alt; 02397 else 02398 goto normal_char; 02399 02400 02401 case '|': 02402 if (syntax & RE_NO_BK_VBAR) 02403 goto handle_alt; 02404 else 02405 goto normal_char; 02406 02407 02408 case '{': 02409 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 02410 goto handle_interval; 02411 else 02412 goto normal_char; 02413 02414 02415 case '\\': 02416 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 02417 02418 /* Do not translate the character after the \, so that we can 02419 distinguish, e.g., \B from \b, even if we normally would 02420 translate, e.g., B to b. */ 02421 PATFETCH_RAW (c); 02422 02423 switch (c) 02424 { 02425 case '(': 02426 if (syntax & RE_NO_BK_PARENS) 02427 goto normal_backslash; 02428 02429 handle_open: 02430 bufp->re_nsub++; 02431 regnum++; 02432 02433 if (COMPILE_STACK_FULL) 02434 { 02435 RETALLOC (compile_stack.stack, compile_stack.size << 1, 02436 compile_stack_elt_t); 02437 if (compile_stack.stack == NULL) return REG_ESPACE; 02438 02439 compile_stack.size <<= 1; 02440 } 02441 02442 /* These are the values to restore when we hit end of this 02443 group. They are all relative offsets, so that if the 02444 whole pattern moves because of realloc, they will still 02445 be valid. */ 02446 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 02447 COMPILE_STACK_TOP.fixup_alt_jump 02448 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 02449 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 02450 COMPILE_STACK_TOP.regnum = regnum; 02451 02452 /* We will eventually replace the 0 with the number of 02453 groups inner to this one. But do not push a 02454 start_memory for groups beyond the last one we can 02455 represent in the compiled pattern. */ 02456 if (regnum <= MAX_REGNUM) 02457 { 02458 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 02459 BUF_PUSH_3 (start_memory, regnum, 0); 02460 } 02461 02462 compile_stack.avail++; 02463 02464 fixup_alt_jump = 0; 02465 laststart = 0; 02466 begalt = b; 02467 /* If we've reached MAX_REGNUM groups, then this open 02468 won't actually generate any code, so we'll have to 02469 clear pending_exact explicitly. */ 02470 pending_exact = 0; 02471 break; 02472 02473 02474 case ')': 02475 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 02476 02477 if (COMPILE_STACK_EMPTY) 02478 { 02479 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 02480 goto normal_backslash; 02481 else 02482 FREE_STACK_RETURN (REG_ERPAREN); 02483 } 02484 02485 handle_close: 02486 if (fixup_alt_jump) 02487 { /* Push a dummy failure point at the end of the 02488 alternative for a possible future 02489 `pop_failure_jump' to pop. See comments at 02490 `push_dummy_failure' in `re_match_2'. */ 02491 BUF_PUSH (push_dummy_failure); 02492 02493 /* We allocated space for this jump when we assigned 02494 to `fixup_alt_jump', in the `handle_alt' case below. */ 02495 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 02496 } 02497 02498 /* See similar code for backslashed left paren above. */ 02499 if (COMPILE_STACK_EMPTY) 02500 { 02501 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 02502 goto normal_char; 02503 else 02504 FREE_STACK_RETURN (REG_ERPAREN); 02505 } 02506 02507 /* Since we just checked for an empty stack above, this 02508 ``can't happen''. */ 02509 assert (compile_stack.avail != 0); 02510 { 02511 /* We don't just want to restore into `regnum', because 02512 later groups should continue to be numbered higher, 02513 as in `(ab)c(de)' -- the second group is #2. */ 02514 regnum_t this_group_regnum; 02515 02516 compile_stack.avail--; 02517 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 02518 fixup_alt_jump 02519 = COMPILE_STACK_TOP.fixup_alt_jump 02520 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 02521 : 0; 02522 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 02523 this_group_regnum = COMPILE_STACK_TOP.regnum; 02524 /* If we've reached MAX_REGNUM groups, then this open 02525 won't actually generate any code, so we'll have to 02526 clear pending_exact explicitly. */ 02527 pending_exact = 0; 02528 02529 /* We're at the end of the group, so now we know how many 02530 groups were inside this one. */ 02531 if (this_group_regnum <= MAX_REGNUM) 02532 { 02533 unsigned char *inner_group_loc 02534 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 02535 02536 *inner_group_loc = regnum - this_group_regnum; 02537 BUF_PUSH_3 (stop_memory, this_group_regnum, 02538 regnum - this_group_regnum); 02539 } 02540 } 02541 break; 02542 02543 02544 case '|': /* `\|'. */ 02545 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 02546 goto normal_backslash; 02547 handle_alt: 02548 if (syntax & RE_LIMITED_OPS) 02549 goto normal_char; 02550 02551 /* Insert before the previous alternative a jump which 02552 jumps to this alternative if the former fails. */ 02553 GET_BUFFER_SPACE (3); 02554 INSERT_JUMP (on_failure_jump, begalt, b + 6); 02555 pending_exact = 0; 02556 b += 3; 02557 02558 /* The alternative before this one has a jump after it 02559 which gets executed if it gets matched. Adjust that 02560 jump so it will jump to this alternative's analogous 02561 jump (put in below, which in turn will jump to the next 02562 (if any) alternative's such jump, etc.). The last such 02563 jump jumps to the correct final destination. A picture: 02564 _____ _____ 02565 | | | | 02566 | v | v 02567 a | b | c 02568 02569 If we are at `b', then fixup_alt_jump right now points to a 02570 three-byte space after `a'. We'll put in the jump, set 02571 fixup_alt_jump to right after `b', and leave behind three 02572 bytes which we'll fill in when we get to after `c'. */ 02573 02574 if (fixup_alt_jump) 02575 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 02576 02577 /* Mark and leave space for a jump after this alternative, 02578 to be filled in later either by next alternative or 02579 when know we're at the end of a series of alternatives. */ 02580 fixup_alt_jump = b; 02581 GET_BUFFER_SPACE (3); 02582 b += 3; 02583 02584 laststart = 0; 02585 begalt = b; 02586 break; 02587 02588 02589 case '{': 02590 /* If \{ is a literal. */ 02591 if (!(syntax & RE_INTERVALS) 02592 /* If we're at `\{' and it's not the open-interval 02593 operator. */ 02594 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 02595 || (p - 2 == pattern && p == pend)) 02596 goto normal_backslash; 02597 02598 handle_interval: 02599 { 02600 /* If got here, then the syntax allows intervals. */ 02601 02602 /* At least (most) this many matches must be made. */ 02603 int lower_bound = -1, upper_bound = -1; 02604 02605 beg_interval = p - 1; 02606 02607 if (p == pend) 02608 { 02609 if (syntax & RE_NO_BK_BRACES) 02610 goto unfetch_interval; 02611 else 02612 FREE_STACK_RETURN (REG_EBRACE); 02613 } 02614 02615 GET_UNSIGNED_NUMBER (lower_bound); 02616 02617 if (c == ',') 02618 { 02619 GET_UNSIGNED_NUMBER (upper_bound); 02620 if (upper_bound < 0) upper_bound = RE_DUP_MAX; 02621 } 02622 else 02623 /* Interval such as `{1}' => match exactly once. */ 02624 upper_bound = lower_bound; 02625 02626 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 02627 || lower_bound > upper_bound) 02628 { 02629 if (syntax & RE_NO_BK_BRACES) 02630 goto unfetch_interval; 02631 else 02632 FREE_STACK_RETURN (REG_BADBR); 02633 } 02634 02635 if (!(syntax & RE_NO_BK_BRACES)) 02636 { 02637 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 02638 02639 PATFETCH (c); 02640 } 02641 02642 if (c != '}') 02643 { 02644 if (syntax & RE_NO_BK_BRACES) 02645 goto unfetch_interval; 02646 else 02647 FREE_STACK_RETURN (REG_BADBR); 02648 } 02649 02650 /* We just parsed a valid interval. */ 02651 02652 /* If it's invalid to have no preceding re. */ 02653 if (!laststart) 02654 { 02655 if (syntax & RE_CONTEXT_INVALID_OPS) 02656 FREE_STACK_RETURN (REG_BADRPT); 02657 else if (syntax & RE_CONTEXT_INDEP_OPS) 02658 laststart = b; 02659 else 02660 goto unfetch_interval; 02661 } 02662 02663 /* If the upper bound is zero, don't want to succeed at 02664 all; jump from `laststart' to `b + 3', which will be 02665 the end of the buffer after we insert the jump. */ 02666 if (upper_bound == 0) 02667 { 02668 GET_BUFFER_SPACE (3); 02669 INSERT_JUMP (jump, laststart, b + 3); 02670 b += 3; 02671 } 02672 02673 /* Otherwise, we have a nontrivial interval. When 02674 we're all done, the pattern will look like: 02675 set_number_at <jump count> <upper bound> 02676 set_number_at <succeed_n count> <lower bound> 02677 succeed_n <after jump addr> <succeed_n count> 02678 <body of loop> 02679 jump_n <succeed_n addr> <jump count> 02680 (The upper bound and `jump_n' are omitted if 02681 `upper_bound' is 1, though.) */ 02682 else 02683 { /* If the upper bound is > 1, we need to insert 02684 more at the end of the loop. */ 02685 unsigned nbytes = 10 + (upper_bound > 1) * 10; 02686 02687 GET_BUFFER_SPACE (nbytes); 02688 02689 /* Initialize lower bound of the `succeed_n', even 02690 though it will be set during matching by its 02691 attendant `set_number_at' (inserted next), 02692 because `re_compile_fastmap' needs to know. 02693 Jump to the `jump_n' we might insert below. */ 02694 INSERT_JUMP2 (succeed_n, laststart, 02695 b + 5 + (upper_bound > 1) * 5, 02696 lower_bound); 02697 b += 5; 02698 02699 /* Code to initialize the lower bound. Insert 02700 before the `succeed_n'. The `5' is the last two 02701 bytes of this `set_number_at', plus 3 bytes of 02702 the following `succeed_n'. */ 02703 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 02704 b += 5; 02705 02706 if (upper_bound > 1) 02707 { /* More than one repetition is allowed, so 02708 append a backward jump to the `succeed_n' 02709 that starts this interval. 02710 02711 When we've reached this during matching, 02712 we'll have matched the interval once, so 02713 jump back only `upper_bound - 1' times. */ 02714 STORE_JUMP2 (jump_n, b, laststart + 5, 02715 upper_bound - 1); 02716 b += 5; 02717 02718 /* The location we want to set is the second 02719 parameter of the `jump_n'; that is `b-2' as 02720 an absolute address. `laststart' will be 02721 the `set_number_at' we're about to insert; 02722 `laststart+3' the number to set, the source 02723 for the relative address. But we are 02724 inserting into the middle of the pattern -- 02725 so everything is getting moved up by 5. 02726 Conclusion: (b - 2) - (laststart + 3) + 5, 02727 i.e., b - laststart. 02728 02729 We insert this at the beginning of the loop 02730 so that if we fail during matching, we'll 02731 reinitialize the bounds. */ 02732 insert_op2 (set_number_at, laststart, b - laststart, 02733 upper_bound - 1, b); 02734 b += 5; 02735 } 02736 } 02737 pending_exact = 0; 02738 beg_interval = NULL; 02739 } 02740 break; 02741 02742 unfetch_interval: 02743 /* If an invalid interval, match the characters as literals. */ 02744 assert (beg_interval); 02745 p = beg_interval; 02746 beg_interval = NULL; 02747 02748 /* normal_char and normal_backslash need `c'. */ 02749 PATFETCH (c); 02750 02751 if (!(syntax & RE_NO_BK_BRACES)) 02752 { 02753 if (p > pattern && p[-1] == '\\') 02754 goto normal_backslash; 02755 } 02756 goto normal_char; 02757 02758 #ifdef emacs 02759 /* There is no way to specify the before_dot and after_dot 02760 operators. rms says this is ok. --karl */ 02761 case '=': 02762 BUF_PUSH (at_dot); 02763 break; 02764 02765 case 's': 02766 laststart = b; 02767 PATFETCH (c); 02768 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 02769 break; 02770 02771 case 'S': 02772 laststart = b; 02773 PATFETCH (c); 02774 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 02775 break; 02776 #endif /* emacs */ 02777 02778 02779 case 'w': 02780 if (syntax & RE_NO_GNU_OPS) 02781 goto normal_char; 02782 laststart = b; 02783 BUF_PUSH (wordchar); 02784 break; 02785 02786 02787 case 'W': 02788 if (syntax & RE_NO_GNU_OPS) 02789 goto normal_char; 02790 laststart = b; 02791 BUF_PUSH (notwordchar); 02792 break; 02793 02794 02795 case '<': 02796 if (syntax & RE_NO_GNU_OPS) 02797 goto normal_char; 02798 BUF_PUSH (wordbeg); 02799 break; 02800 02801 case '>': 02802 if (syntax & RE_NO_GNU_OPS) 02803 goto normal_char; 02804 BUF_PUSH (wordend); 02805 break; 02806 02807 case 'b': 02808 if (syntax & RE_NO_GNU_OPS) 02809 goto normal_char; 02810 BUF_PUSH (wordbound); 02811 break; 02812 02813 case 'B': 02814 if (syntax & RE_NO_GNU_OPS) 02815 goto normal_char; 02816 BUF_PUSH (notwordbound); 02817 break; 02818 02819 case '`': 02820 if (syntax & RE_NO_GNU_OPS) 02821 goto normal_char; 02822 BUF_PUSH (begbuf); 02823 break; 02824 02825 case '\'': 02826 if (syntax & RE_NO_GNU_OPS) 02827 goto normal_char; 02828 BUF_PUSH (endbuf); 02829 break; 02830 02831 case '1': case '2': case '3': case '4': case '5': 02832 case '6': case '7': case '8': case '9': 02833 if (syntax & RE_NO_BK_REFS) 02834 goto normal_char; 02835 02836 c1 = c - '0'; 02837 02838 if (c1 > regnum) 02839 FREE_STACK_RETURN (REG_ESUBREG); 02840 02841 /* Can't back reference to a subexpression if inside of it. */ 02842 if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 02843 goto normal_char; 02844 02845 laststart = b; 02846 BUF_PUSH_2 (duplicate, c1); 02847 break; 02848 02849 02850 case '+': 02851 case '?': 02852 if (syntax & RE_BK_PLUS_QM) 02853 goto handle_plus; 02854 else 02855 goto normal_backslash; 02856 02857 default: 02858 normal_backslash: 02859 /* You might think it would be useful for \ to mean 02860 not to translate; but if we don't translate it 02861 it will never match anything. */ 02862 c = TRANSLATE (c); 02863 goto normal_char; 02864 } 02865 break; 02866 02867 02868 default: 02869 /* Expects the character in `c'. */ 02870 normal_char: 02871 /* If no exactn currently being built. */ 02872 if (!pending_exact 02873 02874 /* If last exactn not at current position. */ 02875 || pending_exact + *pending_exact + 1 != b 02876 02877 /* We have only one byte following the exactn for the count. */ 02878 || *pending_exact == (1 << BYTEWIDTH) - 1 02879 02880 /* If followed by a repetition operator. */ 02881 || *p == '*' || *p == '^' 02882 || ((syntax & RE_BK_PLUS_QM) 02883 ? *p == '\\' && (p[1] == '+' || p[1] == '?') 02884 : (*p == '+' || *p == '?')) 02885 || ((syntax & RE_INTERVALS) 02886 && ((syntax & RE_NO_BK_BRACES) 02887 ? *p == '{' 02888 : (p[0] == '\\' && p[1] == '{')))) 02889 { 02890 /* Start building a new exactn. */ 02891 02892 laststart = b; 02893 02894 BUF_PUSH_2 (exactn, 0); 02895 pending_exact = b - 1; 02896 } 02897 02898 BUF_PUSH (c); 02899 (*pending_exact)++; 02900 break; 02901 } /* switch (c) */ 02902 } /* while p != pend */ 02903 02904 02905 /* Through the pattern now. */ 02906 02907 if (fixup_alt_jump) 02908 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 02909 02910 if (!COMPILE_STACK_EMPTY) 02911 FREE_STACK_RETURN (REG_EPAREN); 02912 02913 /* If we don't want backtracking, force success 02914 the first time we reach the end of the compiled pattern. */ 02915 if (syntax & RE_NO_POSIX_BACKTRACKING) 02916 BUF_PUSH (succeed); 02917 02918 free (compile_stack.stack); 02919 02920 /* We have succeeded; set the length of the buffer. */ 02921 bufp->used = b - bufp->buffer; 02922 02923 #ifdef DEBUG 02924 if (debug) 02925 { 02926 DEBUG_PRINT1 ("\nCompiled pattern: \n"); 02927 print_compiled_pattern (bufp); 02928 } 02929 #endif /* DEBUG */ 02930 02931 #ifndef MATCH_MAY_ALLOCATE 02932 /* Initialize the failure stack to the largest possible stack. This 02933 isn't necessary unless we're trying to avoid calling alloca in 02934 the search and match routines. */ 02935 { 02936 int num_regs = bufp->re_nsub + 1; 02937 02938 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 02939 is strictly greater than re_max_failures, the largest possible stack 02940 is 2 * re_max_failures failure points. */ 02941 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 02942 { 02943 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 02944 02945 # ifdef emacs 02946 if (! fail_stack.stack) 02947 fail_stack.stack 02948 = (fail_stack_elt_t *) xmalloc (fail_stack.size 02949 * sizeof (fail_stack_elt_t)); 02950 else 02951 fail_stack.stack 02952 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, 02953 (fail_stack.size 02954 * sizeof (fail_stack_elt_t))); 02955 # else /* not emacs */ 02956 if (! fail_stack.stack) 02957 fail_stack.stack 02958 = (fail_stack_elt_t *) malloc (fail_stack.size 02959 * sizeof (fail_stack_elt_t)); 02960 else 02961 fail_stack.stack 02962 = (fail_stack_elt_t *) realloc (fail_stack.stack, 02963 (fail_stack.size 02964 * sizeof (fail_stack_elt_t))); 02965 # endif /* not emacs */ 02966 } 02967 02968 regex_grow_registers (num_regs); 02969 } 02970 #endif /* not MATCH_MAY_ALLOCATE */ 02971 02972 return REG_NOERROR; 02973 } /* regex_compile */ 02974 02975 /* Subroutines for `regex_compile'. */ 02976 02977 /* Store OP at LOC followed by two-byte integer parameter ARG. */ 02978 02979 static void 02980 store_op1 (op, loc, arg) 02981 re_opcode_t op; 02982 unsigned char *loc; 02983 int arg; 02984 { 02985 *loc = (unsigned char) op; 02986 STORE_NUMBER (loc + 1, arg); 02987 } 02988 02989 02990 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 02991 02992 static void 02993 store_op2 (op, loc, arg1, arg2) 02994 re_opcode_t op; 02995 unsigned char *loc; 02996 int arg1, arg2; 02997 { 02998 *loc = (unsigned char) op; 02999 STORE_NUMBER (loc + 1, arg1); 03000 STORE_NUMBER (loc + 3, arg2); 03001 } 03002 03003 03004 /* Copy the bytes from LOC to END to open up three bytes of space at LOC 03005 for OP followed by two-byte integer parameter ARG. */ 03006 03007 static void 03008 insert_op1 (op, loc, arg, end) 03009 re_opcode_t op; 03010 unsigned char *loc; 03011 int arg; 03012 unsigned char *end; 03013 { 03014 register unsigned char *pfrom = end; 03015 register unsigned char *pto = end + 3; 03016 03017 while (pfrom != loc) 03018 *--pto = *--pfrom; 03019 03020 store_op1 (op, loc, arg); 03021 } 03022 03023 03024 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 03025 03026 static void 03027 insert_op2 (op, loc, arg1, arg2, end) 03028 re_opcode_t op; 03029 unsigned char *loc; 03030 int arg1, arg2; 03031 unsigned char *end; 03032 { 03033 register unsigned char *pfrom = end; 03034 register unsigned char *pto = end + 5; 03035 03036 while (pfrom != loc) 03037 *--pto = *--pfrom; 03038 03039 store_op2 (op, loc, arg1, arg2); 03040 } 03041 03042 03043 /* P points to just after a ^ in PATTERN. Return true if that ^ comes 03044 after an alternative or a begin-subexpression. We assume there is at 03045 least one character before the ^. */ 03046 03047 static boolean 03048 at_begline_loc_p (pattern, p, syntax) 03049 const char *pattern, *p; 03050 reg_syntax_t syntax; 03051 { 03052 const char *prev = p - 2; 03053 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 03054 03055 return 03056 /* After a subexpression? */ 03057 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 03058 /* After an alternative? */ 03059 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 03060 } 03061 03062 03063 /* The dual of at_begline_loc_p. This one is for $. We assume there is 03064 at least one character after the $, i.e., `P < PEND'. */ 03065 03066 static boolean 03067 at_endline_loc_p (p, pend, syntax) 03068 const char *p, *pend; 03069 reg_syntax_t syntax; 03070 { 03071 const char *next = p; 03072 boolean next_backslash = *next == '\\'; 03073 const char *next_next = p + 1 < pend ? p + 1 : 0; 03074 03075 return 03076 /* Before a subexpression? */ 03077 (syntax & RE_NO_BK_PARENS ? *next == ')' 03078 : next_backslash && next_next && *next_next == ')') 03079 /* Before an alternative? */ 03080 || (syntax & RE_NO_BK_VBAR ? *next == '|' 03081 : next_backslash && next_next && *next_next == '|'); 03082 } 03083 03084 03085 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 03086 false if it's not. */ 03087 03088 static boolean 03089 group_in_compile_stack (compile_stack, regnum) 03090 compile_stack_type compile_stack; 03091 regnum_t regnum; 03092 { 03093 int this_element; 03094 03095 for (this_element = compile_stack.avail - 1; 03096 this_element >= 0; 03097 this_element--) 03098 if (compile_stack.stack[this_element].regnum == regnum) 03099 return true; 03100 03101 return false; 03102 } 03103 03104 03105 /* Read the ending character of a range (in a bracket expression) from the 03106 uncompiled pattern *P_PTR (which ends at PEND). We assume the 03107 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 03108 Then we set the translation of all bits between the starting and 03109 ending characters (inclusive) in the compiled pattern B. 03110 03111 Return an error code. 03112 03113 We use these short variable names so we can use the same macros as 03114 `regex_compile' itself. */ 03115 03116 static reg_errcode_t 03117 compile_range (p_ptr, pend, translate, syntax, b) 03118 const char **p_ptr, *pend; 03119 RE_TRANSLATE_TYPE translate; 03120 reg_syntax_t syntax; 03121 unsigned char *b; 03122 { 03123 unsigned this_char; 03124 03125 const char *p = *p_ptr; 03126 unsigned int range_start, range_end; 03127 03128 if (p == pend) 03129 return REG_ERANGE; 03130 03131 /* Even though the pattern is a signed `char *', we need to fetch 03132 with unsigned char *'s; if the high bit of the pattern character 03133 is set, the range endpoints will be negative if we fetch using a 03134 signed char *. 03135 03136 We also want to fetch the endpoints without translating them; the 03137 appropriate translation is done in the bit-setting loop below. */ 03138 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ 03139 range_start = ((const unsigned char *) p)[-2]; 03140 range_end = ((const unsigned char *) p)[0]; 03141 03142 /* Have to increment the pointer into the pattern string, so the 03143 caller isn't still at the ending character. */ 03144 (*p_ptr)++; 03145 03146 /* If the start is after the end, the range is empty. */ 03147 if (range_start > range_end) 03148 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 03149 03150 /* Here we see why `this_char' has to be larger than an `unsigned 03151 char' -- the range is inclusive, so if `range_end' == 0xff 03152 (assuming 8-bit characters), we would otherwise go into an infinite 03153 loop, since all characters <= 0xff. */ 03154 for (this_char = range_start; this_char <= range_end; this_char++) 03155 { 03156 SET_LIST_BIT (TRANSLATE (this_char)); 03157 } 03158 03159 return REG_NOERROR; 03160 } 03161 03162 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 03163 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 03164 characters can start a string that matches the pattern. This fastmap 03165 is used by re_search to skip quickly over impossible starting points. 03166 03167 The caller must supply the address of a (1 << BYTEWIDTH)-byte data 03168 area as BUFP->fastmap. 03169 03170 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 03171 the pattern buffer. 03172 03173 Returns 0 if we succeed, -2 if an internal error. */ 03174 03175 int 03176 re_compile_fastmap (bufp) 03177 struct re_pattern_buffer *bufp; 03178 { 03179 int j, k; 03180 #ifdef MATCH_MAY_ALLOCATE 03181 fail_stack_type fail_stack; 03182 #endif 03183 #ifndef REGEX_MALLOC 03184 char *destination; 03185 #endif 03186 03187 register char *fastmap = bufp->fastmap; 03188 unsigned char *pattern = bufp->buffer; 03189 unsigned char *p = pattern; 03190 register unsigned char *pend = pattern + bufp->used; 03191 03192 #ifdef REL_ALLOC 03193 /* This holds the pointer to the failure stack, when 03194 it is allocated relocatably. */ 03195 fail_stack_elt_t *failure_stack_ptr; 03196 #endif 03197 03198 /* Assume that each path through the pattern can be null until 03199 proven otherwise. We set this false at the bottom of switch 03200 statement, to which we get only if a particular path doesn't 03201 match the empty string. */ 03202 boolean path_can_be_null = true; 03203 03204 /* We aren't doing a `succeed_n' to begin with. */ 03205 boolean succeed_n_p = false; 03206 03207 assert (fastmap != NULL && p != NULL); 03208 03209 INIT_FAIL_STACK (); 03210 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 03211 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 03212 bufp->can_be_null = 0; 03213 03214 while (1) 03215 { 03216 if (p == pend || *p == succeed) 03217 { 03218 /* We have reached the (effective) end of pattern. */ 03219 if (!FAIL_STACK_EMPTY ()) 03220 { 03221 bufp->can_be_null |= path_can_be_null; 03222 03223 /* Reset for next path. */ 03224 path_can_be_null = true; 03225 03226 p = fail_stack.stack[--fail_stack.avail].pointer; 03227 03228 continue; 03229 } 03230 else 03231 break; 03232 } 03233 03234 /* We should never be about to go beyond the end of the pattern. */ 03235 assert (p < pend); 03236 03237 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 03238 { 03239 03240 /* I guess the idea here is to simply not bother with a fastmap 03241 if a backreference is used, since it's too hard to figure out 03242 the fastmap for the corresponding group. Setting 03243 `can_be_null' stops `re_search_2' from using the fastmap, so 03244 that is all we do. */ 03245 case duplicate: 03246 bufp->can_be_null = 1; 03247 goto done; 03248 03249 03250 /* Following are the cases which match a character. These end 03251 with `break'. */ 03252 03253 case exactn: 03254 fastmap[p[1]] = 1; 03255 break; 03256 03257 03258 case charset: 03259 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 03260 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 03261 fastmap[j] = 1; 03262 break; 03263 03264 03265 case charset_not: 03266 /* Chars beyond end of map must be allowed. */ 03267 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 03268 fastmap[j] = 1; 03269 03270 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 03271 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 03272 fastmap[j] = 1; 03273 break; 03274 03275 03276 case wordchar: 03277 for (j = 0; j < (1 << BYTEWIDTH); j++) 03278 if (SYNTAX (j) == Sword) 03279 fastmap[j] = 1; 03280 break; 03281 03282 03283 case notwordchar: 03284 for (j = 0; j < (1 << BYTEWIDTH); j++) 03285 if (SYNTAX (j) != Sword) 03286 fastmap[j] = 1; 03287 break; 03288 03289 03290 case anychar: 03291 { 03292 int fastmap_newline = fastmap['\n']; 03293 03294 /* `.' matches anything ... */ 03295 for (j = 0; j < (1 << BYTEWIDTH); j++) 03296 fastmap[j] = 1; 03297 03298 /* ... except perhaps newline. */ 03299 if (!(bufp->syntax & RE_DOT_NEWLINE)) 03300 fastmap['\n'] = fastmap_newline; 03301 03302 /* Return if we have already set `can_be_null'; if we have, 03303 then the fastmap is irrelevant. Something's wrong here. */ 03304 else if (bufp->can_be_null) 03305 goto done; 03306 03307 /* Otherwise, have to check alternative paths. */ 03308 break; 03309 } 03310 03311 #ifdef emacs 03312 case syntaxspec: 03313 k = *p++; 03314 for (j = 0; j < (1 << BYTEWIDTH); j++) 03315 if (SYNTAX (j) == (enum syntaxcode) k) 03316 fastmap[j] = 1; 03317 break; 03318 03319 03320 case notsyntaxspec: 03321 k = *p++; 03322 for (j = 0; j < (1 << BYTEWIDTH); j++) 03323 if (SYNTAX (j) != (enum syntaxcode) k) 03324 fastmap[j] = 1; 03325 break; 03326 03327 03328 /* All cases after this match the empty string. These end with 03329 `continue'. */ 03330 03331 03332 case before_dot: 03333 case at_dot: 03334 case after_dot: 03335 continue; 03336 #endif /* emacs */ 03337 03338 03339 case no_op: 03340 case begline: 03341 case endline: 03342 case begbuf: 03343 case endbuf: 03344 case wordbound: 03345 case notwordbound: 03346 case wordbeg: 03347 case wordend: 03348 case push_dummy_failure: 03349 continue; 03350 03351 03352 case jump_n: 03353 case pop_failure_jump: 03354 case maybe_pop_jump: 03355 case jump: 03356 case jump_past_alt: 03357 case dummy_failure_jump: 03358 EXTRACT_NUMBER_AND_INCR (j, p); 03359 p += j; 03360 if (j > 0) 03361 continue; 03362 03363 /* Jump backward implies we just went through the body of a 03364 loop and matched nothing. Opcode jumped to should be 03365 `on_failure_jump' or `succeed_n'. Just treat it like an 03366 ordinary jump. For a * loop, it has pushed its failure 03367 point already; if so, discard that as redundant. */ 03368 if ((re_opcode_t) *p != on_failure_jump 03369 && (re_opcode_t) *p != succeed_n) 03370 continue; 03371 03372 p++; 03373 EXTRACT_NUMBER_AND_INCR (j, p); 03374 p += j; 03375 03376 /* If what's on the stack is where we are now, pop it. */ 03377 if (!FAIL_STACK_EMPTY () 03378 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 03379 fail_stack.avail--; 03380 03381 continue; 03382 03383 03384 case on_failure_jump: 03385 case on_failure_keep_string_jump: 03386 handle_on_failure_jump: 03387 EXTRACT_NUMBER_AND_INCR (j, p); 03388 03389 /* For some patterns, e.g., `(a?)?', `p+j' here points to the 03390 end of the pattern. We don't want to push such a point, 03391 since when we restore it above, entering the switch will 03392 increment `p' past the end of the pattern. We don't need 03393 to push such a point since we obviously won't find any more 03394 fastmap entries beyond `pend'. Such a pattern can match 03395 the null string, though. */ 03396 if (p + j < pend) 03397 { 03398 if (!PUSH_PATTERN_OP (p + j, fail_stack)) 03399 { 03400 RESET_FAIL_STACK (); 03401 return -2; 03402 } 03403 } 03404 else 03405 bufp->can_be_null = 1; 03406 03407 if (succeed_n_p) 03408 { 03409 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 03410 succeed_n_p = false; 03411 } 03412 03413 continue; 03414 03415 03416 case succeed_n: 03417 /* Get to the number of times to succeed. */ 03418 p += 2; 03419 03420 /* Increment p past the n for when k != 0. */ 03421 EXTRACT_NUMBER_AND_INCR (k, p); 03422 if (k == 0) 03423 { 03424 p -= 4; 03425 succeed_n_p = true; /* Spaghetti code alert. */ 03426 goto handle_on_failure_jump; 03427 } 03428 continue; 03429 03430 03431 case set_number_at: 03432 p += 4; 03433 continue; 03434 03435 03436 case start_memory: 03437 case stop_memory: 03438 p += 2; 03439 continue; 03440 03441 03442 default: 03443 abort (); /* We have listed all the cases. */ 03444 } /* switch *p++ */ 03445 03446 /* Getting here means we have found the possible starting 03447 characters for one path of the pattern -- and that the empty 03448 string does not match. We need not follow this path further. 03449 Instead, look at the next alternative (remembered on the 03450 stack), or quit if no more. The test at the top of the loop 03451 does these things. */ 03452 path_can_be_null = false; 03453 p = pend; 03454 } /* while p */ 03455 03456 /* Set `can_be_null' for the last path (also the first path, if the 03457 pattern is empty). */ 03458 bufp->can_be_null |= path_can_be_null; 03459 03460 done: 03461 RESET_FAIL_STACK (); 03462 return 0; 03463 } /* re_compile_fastmap */ 03464 #ifdef _LIBC 03465 weak_alias (__re_compile_fastmap, re_compile_fastmap) 03466 #endif 03467 03468 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 03469 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 03470 this memory for recording register information. STARTS and ENDS 03471 must be allocated using the malloc library routine, and must each 03472 be at least NUM_REGS * sizeof (regoff_t) bytes long. 03473 03474 If NUM_REGS == 0, then subsequent matches should allocate their own 03475 register data. 03476 03477 Unless this function is called, the first search or match using 03478 PATTERN_BUFFER will allocate its own register data, without 03479 freeing the old data. */ 03480 03481 void 03482 re_set_registers (bufp, regs, num_regs, starts, ends) 03483 struct re_pattern_buffer *bufp; 03484 struct re_registers *regs; 03485 unsigned num_regs; 03486 regoff_t *starts, *ends; 03487 { 03488 if (num_regs) 03489 { 03490 bufp->regs_allocated = REGS_REALLOCATE; 03491 regs->num_regs = num_regs; 03492 regs->start = starts; 03493 regs->end = ends; 03494 } 03495 else 03496 { 03497 bufp->regs_allocated = REGS_UNALLOCATED; 03498 regs->num_regs = 0; 03499 regs->start = regs->end = (regoff_t *) 0; 03500 } 03501 } 03502 #ifdef _LIBC 03503 weak_alias (__re_set_registers, re_set_registers) 03504 #endif 03505 03506 /* Searching routines. */ 03507 03508 /* Like re_search_2, below, but only one string is specified, and 03509 doesn't let you say where to stop matching. */ 03510 03511 int 03512 re_search (bufp, string, size, startpos, range, regs) 03513 struct re_pattern_buffer *bufp; 03514 const char *string; 03515 int size, startpos, range; 03516 struct re_registers *regs; 03517 { 03518 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 03519 regs, size); 03520 } 03521 #ifdef _LIBC 03522 weak_alias (__re_search, re_search) 03523 #endif 03524 03525 03526 /* Using the compiled pattern in BUFP->buffer, first tries to match the 03527 virtual concatenation of STRING1 and STRING2, starting first at index 03528 STARTPOS, then at STARTPOS + 1, and so on. 03529 03530 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 03531 03532 RANGE is how far to scan while trying to match. RANGE = 0 means try 03533 only at STARTPOS; in general, the last start tried is STARTPOS + 03534 RANGE. 03535 03536 In REGS, return the indices of the virtual concatenation of STRING1 03537 and STRING2 that matched the entire BUFP->buffer and its contained 03538 subexpressions. 03539 03540 Do not consider matching one past the index STOP in the virtual 03541 concatenation of STRING1 and STRING2. 03542 03543 We return either the position in the strings at which the match was 03544 found, -1 if no match, or -2 if error (such as failure 03545 stack overflow). */ 03546 03547 int 03548 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) 03549 struct re_pattern_buffer *bufp; 03550 const char *string1, *string2; 03551 int size1, size2; 03552 int startpos; 03553 int range; 03554 struct re_registers *regs; 03555 int stop; 03556 { 03557 int val; 03558 register char *fastmap = bufp->fastmap; 03559 register RE_TRANSLATE_TYPE translate = bufp->translate; 03560 int total_size = size1 + size2; 03561 int endpos = startpos + range; 03562 03563 /* Check for out-of-range STARTPOS. */ 03564 if (startpos < 0 || startpos > total_size) 03565 return -1; 03566 03567 /* Fix up RANGE if it might eventually take us outside 03568 the virtual concatenation of STRING1 and STRING2. 03569 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 03570 if (endpos < 0) 03571 range = 0 - startpos; 03572 else if (endpos > total_size) 03573 range = total_size - startpos; 03574 03575 /* If the search isn't to be a backwards one, don't waste time in a 03576 search for a pattern that must be anchored. */ 03577 if (bufp->used > 0 && range > 0 03578 && ((re_opcode_t) bufp->buffer[0] == begbuf 03579 /* `begline' is like `begbuf' if it cannot match at newlines. */ 03580 || ((re_opcode_t) bufp->buffer[0] == begline 03581 && !bufp->newline_anchor))) 03582 { 03583 if (startpos > 0) 03584 return -1; 03585 else 03586 range = 1; 03587 } 03588 03589 #ifdef emacs 03590 /* In a forward search for something that starts with \=. 03591 don't keep searching past point. */ 03592 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 03593 { 03594 range = PT - startpos; 03595 if (range <= 0) 03596 return -1; 03597 } 03598 #endif /* emacs */ 03599 03600 /* Update the fastmap now if not correct already. */ 03601 if (fastmap && !bufp->fastmap_accurate) 03602 if (re_compile_fastmap (bufp) == -2) 03603 return -2; 03604 03605 /* Loop through the string, looking for a place to start matching. */ 03606 for (;;) 03607 { 03608 /* If a fastmap is supplied, skip quickly over characters that 03609 cannot be the start of a match. If the pattern can match the 03610 null string, however, we don't need to skip characters; we want 03611 the first null string. */ 03612 if (fastmap && startpos < total_size && !bufp->can_be_null) 03613 { 03614 if (range > 0) /* Searching forwards. */ 03615 { 03616 register const char *d; 03617 register int lim = 0; 03618 int irange = range; 03619 03620 if (startpos < size1 && startpos + range >= size1) 03621 lim = range - (size1 - startpos); 03622 03623 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 03624 03625 /* Written out as an if-else to avoid testing `translate' 03626 inside the loop. */ 03627 if (translate) 03628 while (range > lim 03629 && !fastmap[(unsigned char) 03630 translate[(unsigned char) *d++]]) 03631 range--; 03632 else 03633 while (range > lim && !fastmap[(unsigned char) *d++]) 03634 range--; 03635 03636 startpos += irange - range; 03637 } 03638 else /* Searching backwards. */ 03639 { 03640 register char c = (size1 == 0 || startpos >= size1 03641 ? string2[startpos - size1] 03642 : string1[startpos]); 03643 03644 if (!fastmap[(unsigned char) TRANSLATE (c)]) 03645 goto advance; 03646 } 03647 } 03648 03649 /* If can't match the null string, and that's all we have left, fail. */ 03650 if (range >= 0 && startpos == total_size && fastmap 03651 && !bufp->can_be_null) 03652 return -1; 03653 03654 val = re_match_2_internal (bufp, string1, size1, string2, size2, 03655 startpos, regs, stop); 03656 #ifndef REGEX_MALLOC 03657 # ifdef C_ALLOCA 03658 alloca (0); 03659 # endif 03660 #endif 03661 03662 if (val >= 0) 03663 return startpos; 03664 03665 if (val == -2) 03666 return -2; 03667 03668 advance: 03669 if (!range) 03670 break; 03671 else if (range > 0) 03672 { 03673 range--; 03674 startpos++; 03675 } 03676 else 03677 { 03678 range++; 03679 startpos--; 03680 } 03681 } 03682 return -1; 03683 } /* re_search_2 */ 03684 #ifdef _LIBC 03685 weak_alias (__re_search_2, re_search_2) 03686 #endif 03687 03688 /* This converts PTR, a pointer into one of the search strings `string1' 03689 and `string2' into an offset from the beginning of that string. */ 03690 #define POINTER_TO_OFFSET(ptr) \ 03691 (FIRST_STRING_P (ptr) \ 03692 ? ((regoff_t) ((ptr) - string1)) \ 03693 : ((regoff_t) ((ptr) - string2 + size1))) 03694 03695 /* Macros for dealing with the split strings in re_match_2. */ 03696 03697 #define MATCHING_IN_FIRST_STRING (dend == end_match_1) 03698 03699 /* Call before fetching a character with *d. This switches over to 03700 string2 if necessary. */ 03701 #define PREFETCH() \ 03702 while (d == dend) \ 03703 { \ 03704 /* End of string2 => fail. */ \ 03705 if (dend == end_match_2) \ 03706 goto fail; \ 03707 /* End of string1 => advance to string2. */ \ 03708 d = string2; \ 03709 dend = end_match_2; \ 03710 } 03711 03712 03713 /* Test if at very beginning or at very end of the virtual concatenation 03714 of `string1' and `string2'. If only one string, it's `string2'. */ 03715 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 03716 #define AT_STRINGS_END(d) ((d) == end2) 03717 03718 03719 /* Test if D points to a character which is word-constituent. We have 03720 two special cases to check for: if past the end of string1, look at 03721 the first character in string2; and if before the beginning of 03722 string2, look at the last character in string1. */ 03723 #define WORDCHAR_P(d) \ 03724 (SYNTAX ((d) == end1 ? *string2 \ 03725 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 03726 == Sword) 03727 03728 /* Disabled due to a compiler bug -- see comment at case wordbound */ 03729 #if 0 03730 /* Test if the character before D and the one at D differ with respect 03731 to being word-constituent. */ 03732 #define AT_WORD_BOUNDARY(d) \ 03733 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 03734 || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 03735 #endif 03736 03737 /* Free everything we malloc. */ 03738 #ifdef MATCH_MAY_ALLOCATE 03739 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL 03740 # define FREE_VARIABLES() \ 03741 do { \ 03742 REGEX_FREE_STACK (fail_stack.stack); \ 03743 FREE_VAR (regstart); \ 03744 FREE_VAR (regend); \ 03745 FREE_VAR (old_regstart); \ 03746 FREE_VAR (old_regend); \ 03747 FREE_VAR (best_regstart); \ 03748 FREE_VAR (best_regend); \ 03749 FREE_VAR (reg_info); \ 03750 FREE_VAR (reg_dummy); \ 03751 FREE_VAR (reg_info_dummy); \ 03752 } while (0) 03753 #else 03754 # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 03755 #endif /* not MATCH_MAY_ALLOCATE */ 03756 03757 /* These values must meet several constraints. They must not be valid 03758 register values; since we have a limit of 255 registers (because 03759 we use only one byte in the pattern for the register number), we can 03760 use numbers larger than 255. They must differ by 1, because of 03761 NUM_FAILURE_ITEMS above. And the value for the lowest register must 03762 be larger than the value for the highest register, so we do not try 03763 to actually save any registers when none are active. */ 03764 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 03765 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 03766 03767 /* Matching routines. */ 03768 03769 #ifndef emacs /* Emacs never uses this. */ 03770 /* re_match is like re_match_2 except it takes only a single string. */ 03771 03772 int 03773 re_match (bufp, string, size, pos, regs) 03774 struct re_pattern_buffer *bufp; 03775 const char *string; 03776 int size, pos; 03777 struct re_registers *regs; 03778 { 03779 int result = re_match_2_internal (bufp, NULL, 0, string, size, 03780 pos, regs, size); 03781 # ifndef REGEX_MALLOC 03782 # ifdef C_ALLOCA 03783 alloca (0); 03784 # endif 03785 # endif 03786 return result; 03787 } 03788 # ifdef _LIBC 03789 weak_alias (__re_match, re_match) 03790 # endif 03791 #endif /* not emacs */ 03792 03793 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, 03794 unsigned char *end, 03795 register_info_type *reg_info)); 03796 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p, 03797 unsigned char *end, 03798 register_info_type *reg_info)); 03799 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p, 03800 unsigned char *end, 03801 register_info_type *reg_info)); 03802 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, 03803 int len, char *translate)); 03804 03805 /* re_match_2 matches the compiled pattern in BUFP against the 03806 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 03807 and SIZE2, respectively). We start matching at POS, and stop 03808 matching at STOP. 03809 03810 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 03811 store offsets for the substring each group matched in REGS. See the 03812 documentation for exactly how many groups we fill. 03813 03814 We return -1 if no match, -2 if an internal error (such as the 03815 failure stack overflowing). Otherwise, we return the length of the 03816 matched substring. */ 03817 03818 int 03819 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 03820 struct re_pattern_buffer *bufp; 03821 const char *string1, *string2; 03822 int size1, size2; 03823 int pos; 03824 struct re_registers *regs; 03825 int stop; 03826 { 03827 int result = re_match_2_internal (bufp, string1, size1, string2, size2, 03828 pos, regs, stop); 03829 #ifndef REGEX_MALLOC 03830 # ifdef C_ALLOCA 03831 alloca (0); 03832 # endif 03833 #endif 03834 return result; 03835 } 03836 #ifdef _LIBC 03837 weak_alias (__re_match_2, re_match_2) 03838 #endif 03839 03840 /* This is a separate function so that we can force an alloca cleanup 03841 afterwards. */ 03842 static int 03843 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 03844 struct re_pattern_buffer *bufp; 03845 const char *string1, *string2; 03846 int size1, size2; 03847 int pos; 03848 struct re_registers *regs; 03849 int stop; 03850 { 03851 /* General temporaries. */ 03852 int mcnt; 03853 unsigned char *p1; 03854 03855 /* Just past the end of the corresponding string. */ 03856 const char *end1, *end2; 03857 03858 /* Pointers into string1 and string2, just past the last characters in 03859 each to consider matching. */ 03860 const char *end_match_1, *end_match_2; 03861 03862 /* Where we are in the data, and the end of the current string. */ 03863 const char *d, *dend; 03864 03865 /* Where we are in the pattern, and the end of the pattern. */ 03866 unsigned char *p = bufp->buffer; 03867 register unsigned char *pend = p + bufp->used; 03868 03869 /* Mark the opcode just after a start_memory, so we can test for an 03870 empty subpattern when we get to the stop_memory. */ 03871 unsigned char *just_past_start_mem = 0; 03872 03873 /* We use this to map every character in the string. */ 03874 RE_TRANSLATE_TYPE translate = bufp->translate; 03875 03876 /* Failure point stack. Each place that can handle a failure further 03877 down the line pushes a failure point on this stack. It consists of 03878 restart, regend, and reg_info for all registers corresponding to 03879 the subexpressions we're currently inside, plus the number of such 03880 registers, and, finally, two char *'s. The first char * is where 03881 to resume scanning the pattern; the second one is where to resume 03882 scanning the strings. If the latter is zero, the failure point is 03883 a ``dummy''; if a failure happens and the failure point is a dummy, 03884 it gets discarded and the next next one is tried. */ 03885 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 03886 fail_stack_type fail_stack; 03887 #endif 03888 #ifdef DEBUG 03889 static unsigned failure_id = 0; 03890 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 03891 #endif 03892 03893 #ifdef REL_ALLOC 03894 /* This holds the pointer to the failure stack, when 03895 it is allocated relocatably. */ 03896 fail_stack_elt_t *failure_stack_ptr; 03897 #endif 03898 03899 /* We fill all the registers internally, independent of what we 03900 return, for use in backreferences. The number here includes 03901 an element for register zero. */ 03902 size_t num_regs = bufp->re_nsub + 1; 03903 03904 /* The currently active registers. */ 03905 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 03906 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 03907 03908 /* Information on the contents of registers. These are pointers into 03909 the input strings; they record just what was matched (on this 03910 attempt) by a subexpression part of the pattern, that is, the 03911 regnum-th regstart pointer points to where in the pattern we began 03912 matching and the regnum-th regend points to right after where we 03913 stopped matching the regnum-th subexpression. (The zeroth register 03914 keeps track of what the whole pattern matches.) */ 03915 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03916 const char **regstart, **regend; 03917 #endif 03918 03919 /* If a group that's operated upon by a repetition operator fails to 03920 match anything, then the register for its start will need to be 03921 restored because it will have been set to wherever in the string we 03922 are when we last see its open-group operator. Similarly for a 03923 register's end. */ 03924 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03925 const char **old_regstart, **old_regend; 03926 #endif 03927 03928 /* The is_active field of reg_info helps us keep track of which (possibly 03929 nested) subexpressions we are currently in. The matched_something 03930 field of reg_info[reg_num] helps us tell whether or not we have 03931 matched any of the pattern so far this time through the reg_num-th 03932 subexpression. These two fields get reset each time through any 03933 loop their register is in. */ 03934 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 03935 register_info_type *reg_info; 03936 #endif 03937 03938 /* The following record the register info as found in the above 03939 variables when we find a match better than any we've seen before. 03940 This happens as we backtrack through the failure points, which in 03941 turn happens only if we have not yet matched the entire string. */ 03942 unsigned best_regs_set = false; 03943 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03944 const char **best_regstart, **best_regend; 03945 #endif 03946 03947 /* Logically, this is `best_regend[0]'. But we don't want to have to 03948 allocate space for that if we're not allocating space for anything 03949 else (see below). Also, we never need info about register 0 for 03950 any of the other register vectors, and it seems rather a kludge to 03951 treat `best_regend' differently than the rest. So we keep track of 03952 the end of the best match so far in a separate variable. We 03953 initialize this to NULL so that when we backtrack the first time 03954 and need to test it, it's not garbage. */ 03955 const char *match_end = NULL; 03956 03957 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 03958 int set_regs_matched_done = 0; 03959 03960 /* Used when we pop values we don't care about. */ 03961 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03962 const char **reg_dummy; 03963 register_info_type *reg_info_dummy; 03964 #endif 03965 03966 #ifdef DEBUG 03967 /* Counts the total number of registers pushed. */ 03968 unsigned num_regs_pushed = 0; 03969 #endif 03970 03971 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 03972 03973 INIT_FAIL_STACK (); 03974 03975 #ifdef MATCH_MAY_ALLOCATE 03976 /* Do not bother to initialize all the register variables if there are 03977 no groups in the pattern, as it takes a fair amount of time. If 03978 there are groups, we include space for register 0 (the whole 03979 pattern), even though we never use it, since it simplifies the 03980 array indexing. We should fix this. */ 03981 if (bufp->re_nsub) 03982 { 03983 regstart = REGEX_TALLOC (num_regs, const char *); 03984 regend = REGEX_TALLOC (num_regs, const char *); 03985 old_regstart = REGEX_TALLOC (num_regs, const char *); 03986 old_regend = REGEX_TALLOC (num_regs, const char *); 03987 best_regstart = REGEX_TALLOC (num_regs, const char *); 03988 best_regend = REGEX_TALLOC (num_regs, const char *); 03989 reg_info = REGEX_TALLOC (num_regs, register_info_type); 03990 reg_dummy = REGEX_TALLOC (num_regs, const char *); 03991 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 03992 03993 if (!(regstart && regend && old_regstart && old_regend && reg_info 03994 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 03995 { 03996 FREE_VARIABLES (); 03997 return -2; 03998 } 03999 } 04000 else 04001 { 04002 /* We must initialize all our variables to NULL, so that 04003 `FREE_VARIABLES' doesn't try to free them. */ 04004 regstart = regend = old_regstart = old_regend = best_regstart 04005 = best_regend = reg_dummy = NULL; 04006 reg_info = reg_info_dummy = (register_info_type *) NULL; 04007 } 04008 #endif /* MATCH_MAY_ALLOCATE */ 04009 04010 /* The starting position is bogus. */ 04011 if (pos < 0 || pos > size1 + size2) 04012 { 04013 FREE_VARIABLES (); 04014 return -1; 04015 } 04016 04017 /* Initialize subexpression text positions to -1 to mark ones that no 04018 start_memory/stop_memory has been seen for. Also initialize the 04019 register information struct. */ 04020 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 04021 { 04022 regstart[mcnt] = regend[mcnt] 04023 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 04024 04025 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 04026 IS_ACTIVE (reg_info[mcnt]) = 0; 04027 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 04028 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 04029 } 04030 04031 /* We move `string1' into `string2' if the latter's empty -- but not if 04032 `string1' is null. */ 04033 if (size2 == 0 && string1 != NULL) 04034 { 04035 string2 = string1; 04036 size2 = size1; 04037 string1 = 0; 04038 size1 = 0; 04039 } 04040 end1 = string1 + size1; 04041 end2 = string2 + size2; 04042 04043 /* Compute where to stop matching, within the two strings. */ 04044 if (stop <= size1) 04045 { 04046 end_match_1 = string1 + stop; 04047 end_match_2 = string2; 04048 } 04049 else 04050 { 04051 end_match_1 = end1; 04052 end_match_2 = string2 + stop - size1; 04053 } 04054 04055 /* `p' scans through the pattern as `d' scans through the data. 04056 `dend' is the end of the input string that `d' points within. `d' 04057 is advanced into the following input string whenever necessary, but 04058 this happens before fetching; therefore, at the beginning of the 04059 loop, `d' can be pointing at the end of a string, but it cannot 04060 equal `string2'. */ 04061 if (size1 > 0 && pos <= size1) 04062 { 04063 d = string1 + pos; 04064 dend = end_match_1; 04065 } 04066 else 04067 { 04068 d = string2 + pos - size1; 04069 dend = end_match_2; 04070 } 04071 04072 DEBUG_PRINT1 ("The compiled pattern is:\n"); 04073 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 04074 DEBUG_PRINT1 ("The string to match is: `"); 04075 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 04076 DEBUG_PRINT1 ("'\n"); 04077 04078 /* This loops over pattern commands. It exits by returning from the 04079 function if the match is complete, or it drops through if the match 04080 fails at this starting point in the input data. */ 04081 for (;;) 04082 { 04083 #ifdef _LIBC 04084 DEBUG_PRINT2 ("\n%p: ", p); 04085 #else 04086 DEBUG_PRINT2 ("\n0x%x: ", p); 04087 #endif 04088 04089 if (p == pend) 04090 { /* End of pattern means we might have succeeded. */ 04091 DEBUG_PRINT1 ("end of pattern ... "); 04092 04093 /* If we haven't matched the entire string, and we want the 04094 longest match, try backtracking. */ 04095 if (d != end_match_2) 04096 { 04097 /* 1 if this match ends in the same string (string1 or string2) 04098 as the best previous match. */ 04099 boolean same_str_p = (FIRST_STRING_P (match_end) 04100 == MATCHING_IN_FIRST_STRING); 04101 /* 1 if this match is the best seen so far. */ 04102 boolean best_match_p; 04103 04104 /* AIX compiler got confused when this was combined 04105 with the previous declaration. */ 04106 if (same_str_p) 04107 best_match_p = d > match_end; 04108 else 04109 best_match_p = !MATCHING_IN_FIRST_STRING; 04110 04111 DEBUG_PRINT1 ("backtracking.\n"); 04112 04113 if (!FAIL_STACK_EMPTY ()) 04114 { /* More failure points to try. */ 04115 04116 /* If exceeds best match so far, save it. */ 04117 if (!best_regs_set || best_match_p) 04118 { 04119 best_regs_set = true; 04120 match_end = d; 04121 04122 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 04123 04124 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 04125 { 04126 best_regstart[mcnt] = regstart[mcnt]; 04127 best_regend[mcnt] = regend[mcnt]; 04128 } 04129 } 04130 goto fail; 04131 } 04132 04133 /* If no failure points, don't restore garbage. And if 04134 last match is real best match, don't restore second 04135 best one. */ 04136 else if (best_regs_set && !best_match_p) 04137 { 04138 restore_best_regs: 04139 /* Restore best match. It may happen that `dend == 04140 end_match_1' while the restored d is in string2. 04141 For example, the pattern `x.*y.*z' against the 04142 strings `x-' and `y-z-', if the two strings are 04143 not consecutive in memory. */ 04144 DEBUG_PRINT1 ("Restoring best registers.\n"); 04145 04146 d = match_end; 04147 dend = ((d >= string1 && d <= end1) 04148 ? end_match_1 : end_match_2); 04149 04150 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 04151 { 04152 regstart[mcnt] = best_regstart[mcnt]; 04153 regend[mcnt] = best_regend[mcnt]; 04154 } 04155 } 04156 } /* d != end_match_2 */ 04157 04158 succeed_label: 04159 DEBUG_PRINT1 ("Accepting match.\n"); 04160 04161 /* If caller wants register contents data back, do it. */ 04162 if (regs && !bufp->no_sub) 04163 { 04164 /* Have the register data arrays been allocated? */ 04165 if (bufp->regs_allocated == REGS_UNALLOCATED) 04166 { /* No. So allocate them with malloc. We need one 04167 extra element beyond `num_regs' for the `-1' marker 04168 GNU code uses. */ 04169 regs->num_regs = MAX (RE_NREGS, num_regs + 1); 04170 regs->start = TALLOC (regs->num_regs, regoff_t); 04171 regs->end = TALLOC (regs->num_regs, regoff_t); 04172 if (regs->start == NULL || regs->end == NULL) 04173 { 04174 FREE_VARIABLES (); 04175 return -2; 04176 } 04177 bufp->regs_allocated = REGS_REALLOCATE; 04178 } 04179 else if (bufp->regs_allocated == REGS_REALLOCATE) 04180 { /* Yes. If we need more elements than were already 04181 allocated, reallocate them. If we need fewer, just 04182 leave it alone. */ 04183 if (regs->num_regs < num_regs + 1) 04184 { 04185 regs->num_regs = num_regs + 1; 04186 RETALLOC (regs->start, regs->num_regs, regoff_t); 04187 RETALLOC (regs->end, regs->num_regs, regoff_t); 04188 if (regs->start == NULL || regs->end == NULL) 04189 { 04190 FREE_VARIABLES (); 04191 return -2; 04192 } 04193 } 04194 } 04195 else 04196 { 04197 /* These braces fend off a "empty body in an else-statement" 04198 warning under GCC when assert expands to nothing. */ 04199 assert (bufp->regs_allocated == REGS_FIXED); 04200 } 04201 04202 /* Convert the pointer data in `regstart' and `regend' to 04203 indices. Register zero has to be set differently, 04204 since we haven't kept track of any info for it. */ 04205 if (regs->num_regs > 0) 04206 { 04207 regs->start[0] = pos; 04208 regs->end[0] = (MATCHING_IN_FIRST_STRING 04209 ? ((regoff_t) (d - string1)) 04210 : ((regoff_t) (d - string2 + size1))); 04211 } 04212 04213 /* Go through the first `min (num_regs, regs->num_regs)' 04214 registers, since that is all we initialized. */ 04215 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 04216 mcnt++) 04217 { 04218 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 04219 regs->start[mcnt] = regs->end[mcnt] = -1; 04220 else 04221 { 04222 regs->start[mcnt] 04223 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 04224 regs->end[mcnt] 04225 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 04226 } 04227 } 04228 04229 /* If the regs structure we return has more elements than 04230 were in the pattern, set the extra elements to -1. If 04231 we (re)allocated the registers, this is the case, 04232 because we always allocate enough to have at least one 04233 -1 at the end. */ 04234 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 04235 regs->start[mcnt] = regs->end[mcnt] = -1; 04236 } /* regs && !bufp->no_sub */ 04237 04238 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 04239 nfailure_points_pushed, nfailure_points_popped, 04240 nfailure_points_pushed - nfailure_points_popped); 04241 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 04242 04243 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 04244 ? string1 04245 : string2 - size1); 04246 04247 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 04248 04249 FREE_VARIABLES (); 04250 return mcnt; 04251 } 04252 04253 /* Otherwise match next pattern command. */ 04254 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 04255 { 04256 /* Ignore these. Used to ignore the n of succeed_n's which 04257 currently have n == 0. */ 04258 case no_op: 04259 DEBUG_PRINT1 ("EXECUTING no_op.\n"); 04260 break; 04261 04262 case succeed: 04263 DEBUG_PRINT1 ("EXECUTING succeed.\n"); 04264 goto succeed_label; 04265 04266 /* Match the next n pattern characters exactly. The following 04267 byte in the pattern defines n, and the n bytes after that 04268 are the characters to match. */ 04269 case exactn: 04270 mcnt = *p++; 04271 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 04272 04273 /* This is written out as an if-else so we don't waste time 04274 testing `translate' inside the loop. */ 04275 if (translate) 04276 { 04277 do 04278 { 04279 PREFETCH (); 04280 if ((unsigned char) translate[(unsigned char) *d++] 04281 != (unsigned char) *p++) 04282 goto fail; 04283 } 04284 while (--mcnt); 04285 } 04286 else 04287 { 04288 do 04289 { 04290 PREFETCH (); 04291 if (*d++ != (char) *p++) goto fail; 04292 } 04293 while (--mcnt); 04294 } 04295 SET_REGS_MATCHED (); 04296 break; 04297 04298 04299 /* Match any character except possibly a newline or a null. */ 04300 case anychar: 04301 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 04302 04303 PREFETCH (); 04304 04305 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 04306 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 04307 goto fail; 04308 04309 SET_REGS_MATCHED (); 04310 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 04311 d++; 04312 break; 04313 04314 04315 case charset: 04316 case charset_not: 04317 { 04318 register unsigned char c; 04319 boolean not = (re_opcode_t) *(p - 1) == charset_not; 04320 04321 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 04322 04323 PREFETCH (); 04324 c = TRANSLATE (*d); /* The character to match. */ 04325 04326 /* Cast to `unsigned' instead of `unsigned char' in case the 04327 bit list is a full 32 bytes long. */ 04328 if (c < (unsigned) (*p * BYTEWIDTH) 04329 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 04330 not = !not; 04331 04332 p += 1 + *p; 04333 04334 if (!not) goto fail; 04335 04336 SET_REGS_MATCHED (); 04337 d++; 04338 break; 04339 } 04340 04341 04342 /* The beginning of a group is represented by start_memory. 04343 The arguments are the register number in the next byte, and the 04344 number of groups inner to this one in the next. The text 04345 matched within the group is recorded (in the internal 04346 registers data structure) under the register number. */ 04347 case start_memory: 04348 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 04349 04350 /* Find out if this group can match the empty string. */ 04351 p1 = p; /* To send to group_match_null_string_p. */ 04352 04353 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 04354 REG_MATCH_NULL_STRING_P (reg_info[*p]) 04355 = group_match_null_string_p (&p1, pend, reg_info); 04356 04357 /* Save the position in the string where we were the last time 04358 we were at this open-group operator in case the group is 04359 operated upon by a repetition operator, e.g., with `(a*)*b' 04360 against `ab'; then we want to ignore where we are now in 04361 the string in case this attempt to match fails. */ 04362 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 04363 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 04364 : regstart[*p]; 04365 DEBUG_PRINT2 (" old_regstart: %d\n", 04366 POINTER_TO_OFFSET (old_regstart[*p])); 04367 04368 regstart[*p] = d; 04369 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 04370 04371 IS_ACTIVE (reg_info[*p]) = 1; 04372 MATCHED_SOMETHING (reg_info[*p]) = 0; 04373 04374 /* Clear this whenever we change the register activity status. */ 04375 set_regs_matched_done = 0; 04376 04377 /* This is the new highest active register. */ 04378 highest_active_reg = *p; 04379 04380 /* If nothing was active before, this is the new lowest active 04381 register. */ 04382 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 04383 lowest_active_reg = *p; 04384 04385 /* Move past the register number and inner group count. */ 04386 p += 2; 04387 just_past_start_mem = p; 04388 04389 break; 04390 04391 04392 /* The stop_memory opcode represents the end of a group. Its 04393 arguments are the same as start_memory's: the register 04394 number, and the number of inner groups. */ 04395 case stop_memory: 04396 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 04397 04398 /* We need to save the string position the last time we were at 04399 this close-group operator in case the group is operated 04400 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 04401 against `aba'; then we want to ignore where we are now in 04402 the string in case this attempt to match fails. */ 04403 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 04404 ? REG_UNSET (regend[*p]) ? d : regend[*p] 04405 : regend[*p]; 04406 DEBUG_PRINT2 (" old_regend: %d\n", 04407 POINTER_TO_OFFSET (old_regend[*p])); 04408 04409 regend[*p] = d; 04410 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 04411 04412 /* This register isn't active anymore. */ 04413 IS_ACTIVE (reg_info[*p]) = 0; 04414 04415 /* Clear this whenever we change the register activity status. */ 04416 set_regs_matched_done = 0; 04417 04418 /* If this was the only register active, nothing is active 04419 anymore. */ 04420 if (lowest_active_reg == highest_active_reg) 04421 { 04422 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 04423 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 04424 } 04425 else 04426 { /* We must scan for the new highest active register, since 04427 it isn't necessarily one less than now: consider 04428 (a(b)c(d(e)f)g). When group 3 ends, after the f), the 04429 new highest active register is 1. */ 04430 unsigned char r = *p - 1; 04431 while (r > 0 && !IS_ACTIVE (reg_info[r])) 04432 r--; 04433 04434 /* If we end up at register zero, that means that we saved 04435 the registers as the result of an `on_failure_jump', not 04436 a `start_memory', and we jumped to past the innermost 04437 `stop_memory'. For example, in ((.)*) we save 04438 registers 1 and 2 as a result of the *, but when we pop 04439 back to the second ), we are at the stop_memory 1. 04440 Thus, nothing is active. */ 04441 if (r == 0) 04442 { 04443 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 04444 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 04445 } 04446 else 04447 highest_active_reg = r; 04448 } 04449 04450 /* If just failed to match something this time around with a 04451 group that's operated on by a repetition operator, try to 04452 force exit from the ``loop'', and restore the register 04453 information for this group that we had before trying this 04454 last match. */ 04455 if ((!MATCHED_SOMETHING (reg_info[*p]) 04456 || just_past_start_mem == p - 1) 04457 && (p + 2) < pend) 04458 { 04459 boolean is_a_jump_n = false; 04460 04461 p1 = p + 2; 04462 mcnt = 0; 04463 switch ((re_opcode_t) *p1++) 04464 { 04465 case jump_n: 04466 is_a_jump_n = true; 04467 case pop_failure_jump: 04468 case maybe_pop_jump: 04469 case jump: 04470 case dummy_failure_jump: 04471 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 04472 if (is_a_jump_n) 04473 p1 += 2; 04474 break; 04475 04476 default: 04477 /* do nothing */ ; 04478 } 04479 p1 += mcnt; 04480 04481 /* If the next operation is a jump backwards in the pattern 04482 to an on_failure_jump right before the start_memory 04483 corresponding to this stop_memory, exit from the loop 04484 by forcing a failure after pushing on the stack the 04485 on_failure_jump's jump in the pattern, and d. */ 04486 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 04487 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) 04488 { 04489 /* If this group ever matched anything, then restore 04490 what its registers were before trying this last 04491 failed match, e.g., with `(a*)*b' against `ab' for 04492 regstart[1], and, e.g., with `((a*)*(b*)*)*' 04493 against `aba' for regend[3]. 04494 04495 Also restore the registers for inner groups for, 04496 e.g., `((a*)(b*))*' against `aba' (register 3 would 04497 otherwise get trashed). */ 04498 04499 if (EVER_MATCHED_SOMETHING (reg_info[*p])) 04500 { 04501 unsigned r; 04502 04503 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 04504 04505 /* Restore this and inner groups' (if any) registers. */ 04506 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 04507 r++) 04508 { 04509 regstart[r] = old_regstart[r]; 04510 04511 /* xx why this test? */ 04512 if (old_regend[r] >= regstart[r]) 04513 regend[r] = old_regend[r]; 04514 } 04515 } 04516 p1++; 04517 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 04518 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 04519 04520 goto fail; 04521 } 04522 } 04523 04524 /* Move past the register number and the inner group count. */ 04525 p += 2; 04526 break; 04527 04528 04529 /* <digit> has been turned into a `duplicate' command which is 04530 followed by the numeric value of <digit> as the register number. */ 04531 case duplicate: 04532 { 04533 register const char *d2, *dend2; 04534 int regno = *p++; /* Get which register to match against. */ 04535 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 04536 04537 /* Can't back reference a group which we've never matched. */ 04538 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 04539 goto fail; 04540 04541 /* Where in input to try to start matching. */ 04542 d2 = regstart[regno]; 04543 04544 /* Where to stop matching; if both the place to start and 04545 the place to stop matching are in the same string, then 04546 set to the place to stop, otherwise, for now have to use 04547 the end of the first string. */ 04548 04549 dend2 = ((FIRST_STRING_P (regstart[regno]) 04550 == FIRST_STRING_P (regend[regno])) 04551 ? regend[regno] : end_match_1); 04552 for (;;) 04553 { 04554 /* If necessary, advance to next segment in register 04555 contents. */ 04556 while (d2 == dend2) 04557 { 04558 if (dend2 == end_match_2) break; 04559 if (dend2 == regend[regno]) break; 04560 04561 /* End of string1 => advance to string2. */ 04562 d2 = string2; 04563 dend2 = regend[regno]; 04564 } 04565 /* At end of register contents => success */ 04566 if (d2 == dend2) break; 04567 04568 /* If necessary, advance to next segment in data. */ 04569 PREFETCH (); 04570 04571 /* How many characters left in this segment to match. */ 04572 mcnt = dend - d; 04573 04574 /* Want how many consecutive characters we can match in 04575 one shot, so, if necessary, adjust the count. */ 04576 if (mcnt > dend2 - d2) 04577 mcnt = dend2 - d2; 04578 04579 /* Compare that many; failure if mismatch, else move 04580 past them. */ 04581 if (translate 04582 ? bcmp_translate (d, d2, mcnt, translate) 04583 : memcmp (d, d2, mcnt)) 04584 goto fail; 04585 d += mcnt, d2 += mcnt; 04586 04587 /* Do this because we've match some characters. */ 04588 SET_REGS_MATCHED (); 04589 } 04590 } 04591 break; 04592 04593 04594 /* begline matches the empty string at the beginning of the string 04595 (unless `not_bol' is set in `bufp'), and, if 04596 `newline_anchor' is set, after newlines. */ 04597 case begline: 04598 DEBUG_PRINT1 ("EXECUTING begline.\n"); 04599 04600 if (AT_STRINGS_BEG (d)) 04601 { 04602 if (!bufp->not_bol) break; 04603 } 04604 else if (d[-1] == '\n' && bufp->newline_anchor) 04605 { 04606 break; 04607 } 04608 /* In all other cases, we fail. */ 04609 goto fail; 04610 04611 04612 /* endline is the dual of begline. */ 04613 case endline: 04614 DEBUG_PRINT1 ("EXECUTING endline.\n"); 04615 04616 if (AT_STRINGS_END (d)) 04617 { 04618 if (!bufp->not_eol) break; 04619 } 04620 04621 /* We have to ``prefetch'' the next character. */ 04622 else if ((d == end1 ? *string2 : *d) == '\n' 04623 && bufp->newline_anchor) 04624 { 04625 break; 04626 } 04627 goto fail; 04628 04629 04630 /* Match at the very beginning of the data. */ 04631 case begbuf: 04632 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 04633 if (AT_STRINGS_BEG (d)) 04634 break; 04635 goto fail; 04636 04637 04638 /* Match at the very end of the data. */ 04639 case endbuf: 04640 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 04641 if (AT_STRINGS_END (d)) 04642 break; 04643 goto fail; 04644 04645 04646 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 04647 pushes NULL as the value for the string on the stack. Then 04648 `pop_failure_point' will keep the current value for the 04649 string, instead of restoring it. To see why, consider 04650 matching `foo\nbar' against `.*\n'. The .* matches the foo; 04651 then the . fails against the \n. But the next thing we want 04652 to do is match the \n against the \n; if we restored the 04653 string value, we would be back at the foo. 04654 04655 Because this is used only in specific cases, we don't need to 04656 check all the things that `on_failure_jump' does, to make 04657 sure the right things get saved on the stack. Hence we don't 04658 share its code. The only reason to push anything on the 04659 stack at all is that otherwise we would have to change 04660 `anychar's code to do something besides goto fail in this 04661 case; that seems worse than this. */ 04662 case on_failure_keep_string_jump: 04663 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 04664 04665 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04666 #ifdef _LIBC 04667 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 04668 #else 04669 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 04670 #endif 04671 04672 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 04673 break; 04674 04675 04676 /* Uses of on_failure_jump: 04677 04678 Each alternative starts with an on_failure_jump that points 04679 to the beginning of the next alternative. Each alternative 04680 except the last ends with a jump that in effect jumps past 04681 the rest of the alternatives. (They really jump to the 04682 ending jump of the following alternative, because tensioning 04683 these jumps is a hassle.) 04684 04685 Repeats start with an on_failure_jump that points past both 04686 the repetition text and either the following jump or 04687 pop_failure_jump back to this on_failure_jump. */ 04688 case on_failure_jump: 04689 on_failure: 04690 DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 04691 04692 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04693 #ifdef _LIBC 04694 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 04695 #else 04696 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 04697 #endif 04698 04699 /* If this on_failure_jump comes right before a group (i.e., 04700 the original * applied to a group), save the information 04701 for that group and all inner ones, so that if we fail back 04702 to this point, the group's information will be correct. 04703 For example, in \(a*\)*\1, we need the preceding group, 04704 and in \(zz\(a*\)b*\)\2, we need the inner group. */ 04705 04706 /* We can't use `p' to check ahead because we push 04707 a failure point to `p + mcnt' after we do this. */ 04708 p1 = p; 04709 04710 /* We need to skip no_op's before we look for the 04711 start_memory in case this on_failure_jump is happening as 04712 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 04713 against aba. */ 04714 while (p1 < pend && (re_opcode_t) *p1 == no_op) 04715 p1++; 04716 04717 if (p1 < pend && (re_opcode_t) *p1 == start_memory) 04718 { 04719 /* We have a new highest active register now. This will 04720 get reset at the start_memory we are about to get to, 04721 but we will have saved all the registers relevant to 04722 this repetition op, as described above. */ 04723 highest_active_reg = *(p1 + 1) + *(p1 + 2); 04724 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 04725 lowest_active_reg = *(p1 + 1); 04726 } 04727 04728 DEBUG_PRINT1 (":\n"); 04729 PUSH_FAILURE_POINT (p + mcnt, d, -2); 04730 break; 04731 04732 04733 /* A smart repeat ends with `maybe_pop_jump'. 04734 We change it to either `pop_failure_jump' or `jump'. */ 04735 case maybe_pop_jump: 04736 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04737 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 04738 { 04739 register unsigned char *p2 = p; 04740 04741 /* Compare the beginning of the repeat with what in the 04742 pattern follows its end. If we can establish that there 04743 is nothing that they would both match, i.e., that we 04744 would have to backtrack because of (as in, e.g., `a*a') 04745 then we can change to pop_failure_jump, because we'll 04746 never have to backtrack. 04747 04748 This is not true in the case of alternatives: in 04749 `(a|ab)*' we do need to backtrack to the `ab' alternative 04750 (e.g., if the string was `ab'). But instead of trying to 04751 detect that here, the alternative has put on a dummy 04752 failure point which is what we will end up popping. */ 04753 04754 /* Skip over open/close-group commands. 04755 If what follows this loop is a ...+ construct, 04756 look at what begins its body, since we will have to 04757 match at least one of that. */ 04758 while (1) 04759 { 04760 if (p2 + 2 < pend 04761 && ((re_opcode_t) *p2 == stop_memory 04762 || (re_opcode_t) *p2 == start_memory)) 04763 p2 += 3; 04764 else if (p2 + 6 < pend 04765 && (re_opcode_t) *p2 == dummy_failure_jump) 04766 p2 += 6; 04767 else 04768 break; 04769 } 04770 04771 p1 = p + mcnt; 04772 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 04773 to the `maybe_finalize_jump' of this case. Examine what 04774 follows. */ 04775 04776 /* If we're at the end of the pattern, we can change. */ 04777 if (p2 == pend) 04778 { 04779 /* Consider what happens when matching ":\(.*\)" 04780 against ":/". I don't really understand this code 04781 yet. */ 04782 p[-3] = (unsigned char) pop_failure_jump; 04783 DEBUG_PRINT1 04784 (" End of pattern: change to `pop_failure_jump'.\n"); 04785 } 04786 04787 else if ((re_opcode_t) *p2 == exactn 04788 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 04789 { 04790 register unsigned char c 04791 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 04792 04793 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) 04794 { 04795 p[-3] = (unsigned char) pop_failure_jump; 04796 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 04797 c, p1[5]); 04798 } 04799 04800 else if ((re_opcode_t) p1[3] == charset 04801 || (re_opcode_t) p1[3] == charset_not) 04802 { 04803 int not = (re_opcode_t) p1[3] == charset_not; 04804 04805 if (c < (unsigned char) (p1[4] * BYTEWIDTH) 04806 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 04807 not = !not; 04808 04809 /* `not' is equal to 1 if c would match, which means 04810 that we can't change to pop_failure_jump. */ 04811 if (!not) 04812 { 04813 p[-3] = (unsigned char) pop_failure_jump; 04814 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04815 } 04816 } 04817 } 04818 else if ((re_opcode_t) *p2 == charset) 04819 { 04820 #ifdef DEBUG 04821 register unsigned char c 04822 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 04823 #endif 04824 04825 #if 0 04826 if ((re_opcode_t) p1[3] == exactn 04827 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 04828 && (p2[2 + p1[5] / BYTEWIDTH] 04829 & (1 << (p1[5] % BYTEWIDTH))))) 04830 #else 04831 if ((re_opcode_t) p1[3] == exactn 04832 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] 04833 && (p2[2 + p1[4] / BYTEWIDTH] 04834 & (1 << (p1[4] % BYTEWIDTH))))) 04835 #endif 04836 { 04837 p[-3] = (unsigned char) pop_failure_jump; 04838 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 04839 c, p1[5]); 04840 } 04841 04842 else if ((re_opcode_t) p1[3] == charset_not) 04843 { 04844 int idx; 04845 /* We win if the charset_not inside the loop 04846 lists every character listed in the charset after. */ 04847 for (idx = 0; idx < (int) p2[1]; idx++) 04848 if (! (p2[2 + idx] == 0 04849 || (idx < (int) p1[4] 04850 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 04851 break; 04852 04853 if (idx == p2[1]) 04854 { 04855 p[-3] = (unsigned char) pop_failure_jump; 04856 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04857 } 04858 } 04859 else if ((re_opcode_t) p1[3] == charset) 04860 { 04861 int idx; 04862 /* We win if the charset inside the loop 04863 has no overlap with the one after the loop. */ 04864 for (idx = 0; 04865 idx < (int) p2[1] && idx < (int) p1[4]; 04866 idx++) 04867 if ((p2[2 + idx] & p1[5 + idx]) != 0) 04868 break; 04869 04870 if (idx == p2[1] || idx == p1[4]) 04871 { 04872 p[-3] = (unsigned char) pop_failure_jump; 04873 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04874 } 04875 } 04876 } 04877 } 04878 p -= 2; /* Point at relative address again. */ 04879 if ((re_opcode_t) p[-1] != pop_failure_jump) 04880 { 04881 p[-1] = (unsigned char) jump; 04882 DEBUG_PRINT1 (" Match => jump.\n"); 04883 goto unconditional_jump; 04884 } 04885 /* Note fall through. */ 04886 04887 04888 /* The end of a simple repeat has a pop_failure_jump back to 04889 its matching on_failure_jump, where the latter will push a 04890 failure point. The pop_failure_jump takes off failure 04891 points put on by this pop_failure_jump's matching 04892 on_failure_jump; we got through the pattern to here from the 04893 matching on_failure_jump, so didn't fail. */ 04894 case pop_failure_jump: 04895 { 04896 /* We need to pass separate storage for the lowest and 04897 highest registers, even though we don't care about the 04898 actual values. Otherwise, we will restore only one 04899 register from the stack, since lowest will == highest in 04900 `pop_failure_point'. */ 04901 active_reg_t dummy_low_reg, dummy_high_reg; 04902 unsigned char *pdummy; 04903 const char *sdummy; 04904 04905 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 04906 POP_FAILURE_POINT (sdummy, pdummy, 04907 dummy_low_reg, dummy_high_reg, 04908 reg_dummy, reg_dummy, reg_info_dummy); 04909 } 04910 /* Note fall through. */ 04911 04912 unconditional_jump: 04913 #ifdef _LIBC 04914 DEBUG_PRINT2 ("\n%p: ", p); 04915 #else 04916 DEBUG_PRINT2 ("\n0x%x: ", p); 04917 #endif 04918 /* Note fall through. */ 04919 04920 /* Unconditionally jump (without popping any failure points). */ 04921 case jump: 04922 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 04923 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 04924 p += mcnt; /* Do the jump. */ 04925 #ifdef _LIBC 04926 DEBUG_PRINT2 ("(to %p).\n", p); 04927 #else 04928 DEBUG_PRINT2 ("(to 0x%x).\n", p); 04929 #endif 04930 break; 04931 04932 04933 /* We need this opcode so we can detect where alternatives end 04934 in `group_match_null_string_p' et al. */ 04935 case jump_past_alt: 04936 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 04937 goto unconditional_jump; 04938 04939 04940 /* Normally, the on_failure_jump pushes a failure point, which 04941 then gets popped at pop_failure_jump. We will end up at 04942 pop_failure_jump, also, and with a pattern of, say, `a+', we 04943 are skipping over the on_failure_jump, so we have to push 04944 something meaningless for pop_failure_jump to pop. */ 04945 case dummy_failure_jump: 04946 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 04947 /* It doesn't matter what we push for the string here. What 04948 the code at `fail' tests is the value for the pattern. */ 04949 PUSH_FAILURE_POINT (NULL, NULL, -2); 04950 goto unconditional_jump; 04951 04952 04953 /* At the end of an alternative, we need to push a dummy failure 04954 point in case we are followed by a `pop_failure_jump', because 04955 we don't want the failure point for the alternative to be 04956 popped. For example, matching `(a|ab)*' against `aab' 04957 requires that we match the `ab' alternative. */ 04958 case push_dummy_failure: 04959 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 04960 /* See comments just above at `dummy_failure_jump' about the 04961 two zeroes. */ 04962 PUSH_FAILURE_POINT (NULL, NULL, -2); 04963 break; 04964 04965 /* Have to succeed matching what follows at least n times. 04966 After that, handle like `on_failure_jump'. */ 04967 case succeed_n: 04968 EXTRACT_NUMBER (mcnt, p + 2); 04969 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 04970 04971 assert (mcnt >= 0); 04972 /* Originally, this is how many times we HAVE to succeed. */ 04973 if (mcnt > 0) 04974 { 04975 mcnt--; 04976 p += 2; 04977 STORE_NUMBER_AND_INCR (p, mcnt); 04978 #ifdef _LIBC 04979 DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); 04980 #else 04981 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); 04982 #endif 04983 } 04984 else if (mcnt == 0) 04985 { 04986 #ifdef _LIBC 04987 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); 04988 #else 04989 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 04990 #endif 04991 p[2] = (unsigned char) no_op; 04992 p[3] = (unsigned char) no_op; 04993 goto on_failure; 04994 } 04995 break; 04996 04997 case jump_n: 04998 EXTRACT_NUMBER (mcnt, p + 2); 04999 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 05000 05001 /* Originally, this is how many times we CAN jump. */ 05002 if (mcnt) 05003 { 05004 mcnt--; 05005 STORE_NUMBER (p + 2, mcnt); 05006 #ifdef _LIBC 05007 DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); 05008 #else 05009 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); 05010 #endif 05011 goto unconditional_jump; 05012 } 05013 /* If don't have to jump any more, skip over the rest of command. */ 05014 else 05015 p += 4; 05016 break; 05017 05018 case set_number_at: 05019 { 05020 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 05021 05022 EXTRACT_NUMBER_AND_INCR (mcnt, p); 05023 p1 = p + mcnt; 05024 EXTRACT_NUMBER_AND_INCR (mcnt, p); 05025 #ifdef _LIBC 05026 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 05027 #else 05028 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 05029 #endif 05030 STORE_NUMBER (p1, mcnt); 05031 break; 05032 } 05033 05034 #if 0 05035 /* The DEC Alpha C compiler 3.x generates incorrect code for the 05036 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 05037 AT_WORD_BOUNDARY, so this code is disabled. Expanding the 05038 macro and introducing temporary variables works around the bug. */ 05039 05040 case wordbound: 05041 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 05042 if (AT_WORD_BOUNDARY (d)) 05043 break; 05044 goto fail; 05045 05046 case notwordbound: 05047 DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 05048 if (AT_WORD_BOUNDARY (d)) 05049 goto fail; 05050 break; 05051 #else 05052 case wordbound: 05053 { 05054 boolean prevchar, thischar; 05055 05056 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 05057 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 05058 break; 05059 05060 prevchar = WORDCHAR_P (d - 1); 05061 thischar = WORDCHAR_P (d); 05062 if (prevchar != thischar) 05063 break; 05064 goto fail; 05065 } 05066 05067 case notwordbound: 05068 { 05069 boolean prevchar, thischar; 05070 05071 DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 05072 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 05073 goto fail; 05074 05075 prevchar = WORDCHAR_P (d - 1); 05076 thischar = WORDCHAR_P (d); 05077 if (prevchar != thischar) 05078 goto fail; 05079 break; 05080 } 05081 #endif 05082 05083 case wordbeg: 05084 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 05085 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 05086 break; 05087 goto fail; 05088 05089 case wordend: 05090 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 05091 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 05092 && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) 05093 break; 05094 goto fail; 05095 05096 #ifdef emacs 05097 case before_dot: 05098 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 05099 if (PTR_CHAR_POS ((unsigned char *) d) >= point) 05100 goto fail; 05101 break; 05102 05103 case at_dot: 05104 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 05105 if (PTR_CHAR_POS ((unsigned char *) d) != point) 05106 goto fail; 05107 break; 05108 05109 case after_dot: 05110 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 05111 if (PTR_CHAR_POS ((unsigned char *) d) <= point) 05112 goto fail; 05113 break; 05114 05115 case syntaxspec: 05116 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 05117 mcnt = *p++; 05118 goto matchsyntax; 05119 05120 case wordchar: 05121 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 05122 mcnt = (int) Sword; 05123 matchsyntax: 05124 PREFETCH (); 05125 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 05126 d++; 05127 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 05128 goto fail; 05129 SET_REGS_MATCHED (); 05130 break; 05131 05132 case notsyntaxspec: 05133 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 05134 mcnt = *p++; 05135 goto matchnotsyntax; 05136 05137 case notwordchar: 05138 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 05139 mcnt = (int) Sword; 05140 matchnotsyntax: 05141 PREFETCH (); 05142 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 05143 d++; 05144 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 05145 goto fail; 05146 SET_REGS_MATCHED (); 05147 break; 05148 05149 #else /* not emacs */ 05150 case wordchar: 05151 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 05152 PREFETCH (); 05153 if (!WORDCHAR_P (d)) 05154 goto fail; 05155 SET_REGS_MATCHED (); 05156 d++; 05157 break; 05158 05159 case notwordchar: 05160 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 05161 PREFETCH (); 05162 if (WORDCHAR_P (d)) 05163 goto fail; 05164 SET_REGS_MATCHED (); 05165 d++; 05166 break; 05167 #endif /* not emacs */ 05168 05169 default: 05170 abort (); 05171 } 05172 continue; /* Successfully executed one pattern command; keep going. */ 05173 05174 05175 /* We goto here if a matching operation fails. */ 05176 fail: 05177 if (!FAIL_STACK_EMPTY ()) 05178 { /* A restart point is known. Restore to that state. */ 05179 DEBUG_PRINT1 ("\nFAIL:\n"); 05180 POP_FAILURE_POINT (d, p, 05181 lowest_active_reg, highest_active_reg, 05182 regstart, regend, reg_info); 05183 05184 /* If this failure point is a dummy, try the next one. */ 05185 if (!p) 05186 goto fail; 05187 05188 /* If we failed to the end of the pattern, don't examine *p. */ 05189 assert (p <= pend); 05190 if (p < pend) 05191 { 05192 boolean is_a_jump_n = false; 05193 05194 /* If failed to a backwards jump that's part of a repetition 05195 loop, need to pop this failure point and use the next one. */ 05196 switch ((re_opcode_t) *p) 05197 { 05198 case jump_n: 05199 is_a_jump_n = true; 05200 case maybe_pop_jump: 05201 case pop_failure_jump: 05202 case jump: 05203 p1 = p + 1; 05204 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05205 p1 += mcnt; 05206 05207 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 05208 || (!is_a_jump_n 05209 && (re_opcode_t) *p1 == on_failure_jump)) 05210 goto fail; 05211 break; 05212 default: 05213 /* do nothing */ ; 05214 } 05215 } 05216 05217 if (d >= string1 && d <= end1) 05218 dend = end_match_1; 05219 } 05220 else 05221 break; /* Matching at this starting point really fails. */ 05222 } /* for (;;) */ 05223 05224 if (best_regs_set) 05225 goto restore_best_regs; 05226 05227 FREE_VARIABLES (); 05228 05229 return -1; /* Failure to match. */ 05230 } /* re_match_2 */ 05231 05232 /* Subroutine definitions for re_match_2. */ 05233 05234 05235 /* We are passed P pointing to a register number after a start_memory. 05236 05237 Return true if the pattern up to the corresponding stop_memory can 05238 match the empty string, and false otherwise. 05239 05240 If we find the matching stop_memory, sets P to point to one past its number. 05241 Otherwise, sets P to an undefined byte less than or equal to END. 05242 05243 We don't handle duplicates properly (yet). */ 05244 05245 static boolean 05246 group_match_null_string_p (p, end, reg_info) 05247 unsigned char **p, *end; 05248 register_info_type *reg_info; 05249 { 05250 int mcnt; 05251 /* Point to after the args to the start_memory. */ 05252 unsigned char *p1 = *p + 2; 05253 05254 while (p1 < end) 05255 { 05256 /* Skip over opcodes that can match nothing, and return true or 05257 false, as appropriate, when we get to one that can't, or to the 05258 matching stop_memory. */ 05259 05260 switch ((re_opcode_t) *p1) 05261 { 05262 /* Could be either a loop or a series of alternatives. */ 05263 case on_failure_jump: 05264 p1++; 05265 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05266 05267 /* If the next operation is not a jump backwards in the 05268 pattern. */ 05269 05270 if (mcnt >= 0) 05271 { 05272 /* Go through the on_failure_jumps of the alternatives, 05273 seeing if any of the alternatives cannot match nothing. 05274 The last alternative starts with only a jump, 05275 whereas the rest start with on_failure_jump and end 05276 with a jump, e.g., here is the pattern for `a|b|c': 05277 05278 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 05279 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 05280 /exactn/1/c 05281 05282 So, we have to first go through the first (n-1) 05283 alternatives and then deal with the last one separately. */ 05284 05285 05286 /* Deal with the first (n-1) alternatives, which start 05287 with an on_failure_jump (see above) that jumps to right 05288 past a jump_past_alt. */ 05289 05290 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) 05291 { 05292 /* `mcnt' holds how many bytes long the alternative 05293 is, including the ending `jump_past_alt' and 05294 its number. */ 05295 05296 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 05297 reg_info)) 05298 return false; 05299 05300 /* Move to right after this alternative, including the 05301 jump_past_alt. */ 05302 p1 += mcnt; 05303 05304 /* Break if it's the beginning of an n-th alternative 05305 that doesn't begin with an on_failure_jump. */ 05306 if ((re_opcode_t) *p1 != on_failure_jump) 05307 break; 05308 05309 /* Still have to check that it's not an n-th 05310 alternative that starts with an on_failure_jump. */ 05311 p1++; 05312 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05313 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) 05314 { 05315 /* Get to the beginning of the n-th alternative. */ 05316 p1 -= 3; 05317 break; 05318 } 05319 } 05320 05321 /* Deal with the last alternative: go back and get number 05322 of the `jump_past_alt' just before it. `mcnt' contains 05323 the length of the alternative. */ 05324 EXTRACT_NUMBER (mcnt, p1 - 2); 05325 05326 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) 05327 return false; 05328 05329 p1 += mcnt; /* Get past the n-th alternative. */ 05330 } /* if mcnt > 0 */ 05331 break; 05332 05333 05334 case stop_memory: 05335 assert (p1[1] == **p); 05336 *p = p1 + 2; 05337 return true; 05338 05339 05340 default: 05341 if (!common_op_match_null_string_p (&p1, end, reg_info)) 05342 return false; 05343 } 05344 } /* while p1 < end */ 05345 05346 return false; 05347 } /* group_match_null_string_p */ 05348 05349 05350 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: 05351 It expects P to be the first byte of a single alternative and END one 05352 byte past the last. The alternative can contain groups. */ 05353 05354 static boolean 05355 alt_match_null_string_p (p, end, reg_info) 05356 unsigned char *p, *end; 05357 register_info_type *reg_info; 05358 { 05359 int mcnt; 05360 unsigned char *p1 = p; 05361 05362 while (p1 < end) 05363 { 05364 /* Skip over opcodes that can match nothing, and break when we get 05365 to one that can't. */ 05366 05367 switch ((re_opcode_t) *p1) 05368 { 05369 /* It's a loop. */ 05370 case on_failure_jump: 05371 p1++; 05372 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05373 p1 += mcnt; 05374 break; 05375 05376 default: 05377 if (!common_op_match_null_string_p (&p1, end, reg_info)) 05378 return false; 05379 } 05380 } /* while p1 < end */ 05381 05382 return true; 05383 } /* alt_match_null_string_p */ 05384 05385 05386 /* Deals with the ops common to group_match_null_string_p and 05387 alt_match_null_string_p. 05388 05389 Sets P to one after the op and its arguments, if any. */ 05390 05391 static boolean 05392 common_op_match_null_string_p (p, end, reg_info) 05393 unsigned char **p, *end; 05394 register_info_type *reg_info; 05395 { 05396 int mcnt; 05397 boolean ret; 05398 int reg_no; 05399 unsigned char *p1 = *p; 05400 05401 switch ((re_opcode_t) *p1++) 05402 { 05403 case no_op: 05404 case begline: 05405 case endline: 05406 case begbuf: 05407 case endbuf: 05408 case wordbeg: 05409 case wordend: 05410 case wordbound: 05411 case notwordbound: 05412 #ifdef emacs 05413 case before_dot: 05414 case at_dot: 05415 case after_dot: 05416 #endif 05417 break; 05418 05419 case start_memory: 05420 reg_no = *p1; 05421 assert (reg_no > 0 && reg_no <= MAX_REGNUM); 05422 ret = group_match_null_string_p (&p1, end, reg_info); 05423 05424 /* Have to set this here in case we're checking a group which 05425 contains a group and a back reference to it. */ 05426 05427 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 05428 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 05429 05430 if (!ret) 05431 return false; 05432 break; 05433 05434 /* If this is an optimized succeed_n for zero times, make the jump. */ 05435 case jump: 05436 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05437 if (mcnt >= 0) 05438 p1 += mcnt; 05439 else 05440 return false; 05441 break; 05442 05443 case succeed_n: 05444 /* Get to the number of times to succeed. */ 05445 p1 += 2; 05446 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05447 05448 if (mcnt == 0) 05449 { 05450 p1 -= 4; 05451 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05452 p1 += mcnt; 05453 } 05454 else 05455 return false; 05456 break; 05457 05458 case duplicate: 05459 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 05460 return false; 05461 break; 05462 05463 case set_number_at: 05464 p1 += 4; 05465 05466 default: 05467 /* All other opcodes mean we cannot match the empty string. */ 05468 return false; 05469 } 05470 05471 *p = p1; 05472 return true; 05473 } /* common_op_match_null_string_p */ 05474 05475 05476 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 05477 bytes; nonzero otherwise. */ 05478 05479 static int 05480 bcmp_translate (s1, s2, len, translate) 05481 const char *s1, *s2; 05482 register int len; 05483 RE_TRANSLATE_TYPE translate; 05484 { 05485 register const unsigned char *p1 = (const unsigned char *) s1; 05486 register const unsigned char *p2 = (const unsigned char *) s2; 05487 while (len) 05488 { 05489 if (translate[*p1++] != translate[*p2++]) return 1; 05490 len--; 05491 } 05492 return 0; 05493 } 05494 05495 /* Entry points for GNU code. */ 05496 05497 /* re_compile_pattern is the GNU regular expression compiler: it 05498 compiles PATTERN (of length SIZE) and puts the result in BUFP. 05499 Returns 0 if the pattern was valid, otherwise an error string. 05500 05501 Assumes the `allocated' (and perhaps `buffer') and `translate' fields 05502 are set in BUFP on entry. 05503 05504 We call regex_compile to do the actual compilation. */ 05505 05506 const char * 05507 re_compile_pattern (pattern, length, bufp) 05508 const char *pattern; 05509 size_t length; 05510 struct re_pattern_buffer *bufp; 05511 { 05512 reg_errcode_t ret; 05513 05514 /* GNU code is written to assume at least RE_NREGS registers will be set 05515 (and at least one extra will be -1). */ 05516 bufp->regs_allocated = REGS_UNALLOCATED; 05517 05518 /* And GNU code determines whether or not to get register information 05519 by passing null for the REGS argument to re_match, etc., not by 05520 setting no_sub. */ 05521 bufp->no_sub = 0; 05522 05523 /* Match anchors at newline. */ 05524 bufp->newline_anchor = 1; 05525 05526 ret = regex_compile (pattern, length, re_syntax_options, bufp); 05527 05528 if (!ret) 05529 return NULL; 05530 return gettext (re_error_msgid[(int) ret]); 05531 } 05532 #ifdef _LIBC 05533 weak_alias (__re_compile_pattern, re_compile_pattern) 05534 #endif 05535 05536 /* Entry points compatible with 4.2 BSD regex library. We don't define 05537 them unless specifically requested. */ 05538 05539 #if defined _REGEX_RE_COMP || defined _LIBC 05540 05541 /* BSD has one and only one pattern buffer. */ 05542 static struct re_pattern_buffer re_comp_buf; 05543 05544 char * 05545 #ifdef _LIBC 05546 /* Make these definitions weak in libc, so POSIX programs can redefine 05547 these names if they don't use our functions, and still use 05548 regcomp/regexec below without link errors. */ 05549 weak_function 05550 #endif 05551 re_comp (s) 05552 const char *s; 05553 { 05554 reg_errcode_t ret; 05555 05556 if (!s) 05557 { 05558 if (!re_comp_buf.buffer) 05559 return gettext ("No previous regular expression"); 05560 return 0; 05561 } 05562 05563 if (!re_comp_buf.buffer) 05564 { 05565 re_comp_buf.buffer = (unsigned char *) malloc (200); 05566 if (re_comp_buf.buffer == NULL) 05567 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 05568 re_comp_buf.allocated = 200; 05569 05570 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 05571 if (re_comp_buf.fastmap == NULL) 05572 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 05573 } 05574 05575 /* Since `re_exec' always passes NULL for the `regs' argument, we 05576 don't need to initialize the pattern buffer fields which affect it. */ 05577 05578 /* Match anchors at newlines. */ 05579 re_comp_buf.newline_anchor = 1; 05580 05581 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 05582 05583 if (!ret) 05584 return NULL; 05585 05586 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 05587 return (char *) gettext (re_error_msgid[(int) ret]); 05588 } 05589 05590 05591 int 05592 #ifdef _LIBC 05593 weak_function 05594 #endif 05595 re_exec (s) 05596 const char *s; 05597 { 05598 const int len = strlen (s); 05599 return 05600 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 05601 } 05602 05603 #endif /* _REGEX_RE_COMP */ 05604 05605 /* POSIX.2 functions. Don't define these for Emacs. */ 05606 05607 #ifndef emacs 05608 05609 /* regcomp takes a regular expression as a string and compiles it. 05610 05611 PREG is a regex_t *. We do not expect any fields to be initialized, 05612 since POSIX says we shouldn't. Thus, we set 05613 05614 `buffer' to the compiled pattern; 05615 `used' to the length of the compiled pattern; 05616 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 05617 REG_EXTENDED bit in CFLAGS is set; otherwise, to 05618 RE_SYNTAX_POSIX_BASIC; 05619 `newline_anchor' to REG_NEWLINE being set in CFLAGS; 05620 `fastmap' and `fastmap_accurate' to zero; 05621 `re_nsub' to the number of subexpressions in PATTERN. 05622 05623 PATTERN is the address of the pattern string. 05624 05625 CFLAGS is a series of bits which affect compilation. 05626 05627 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 05628 use POSIX basic syntax. 05629 05630 If REG_NEWLINE is set, then . and [^...] don't match newline. 05631 Also, regexec will try a match beginning after every newline. 05632 05633 If REG_ICASE is set, then we considers upper- and lowercase 05634 versions of letters to be equivalent when matching. 05635 05636 If REG_NOSUB is set, then when PREG is passed to regexec, that 05637 routine will report only success or failure, and nothing about the 05638 registers. 05639 05640 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 05641 the return codes and their meanings.) */ 05642 05643 int 05644 regcomp (preg, pattern, cflags) 05645 regex_t *preg; 05646 const char *pattern; 05647 int cflags; 05648 { 05649 reg_errcode_t ret; 05650 reg_syntax_t syntax 05651 = (cflags & REG_EXTENDED) ? 05652 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 05653 05654 /* regex_compile will allocate the space for the compiled pattern. */ 05655 preg->buffer = 0; 05656 preg->allocated = 0; 05657 preg->used = 0; 05658 05659 /* Don't bother to use a fastmap when searching. This simplifies the 05660 REG_NEWLINE case: if we used a fastmap, we'd have to put all the 05661 characters after newlines into the fastmap. This way, we just try 05662 every character. */ 05663 preg->fastmap = 0; 05664 05665 if (cflags & REG_ICASE) 05666 { 05667 unsigned i; 05668 05669 preg->translate 05670 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 05671 * sizeof (*(RE_TRANSLATE_TYPE)0)); 05672 if (preg->translate == NULL) 05673 return (int) REG_ESPACE; 05674 05675 /* Map uppercase characters to corresponding lowercase ones. */ 05676 for (i = 0; i < CHAR_SET_SIZE; i++) 05677 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 05678 } 05679 else 05680 preg->translate = NULL; 05681 05682 /* If REG_NEWLINE is set, newlines are treated differently. */ 05683 if (cflags & REG_NEWLINE) 05684 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 05685 syntax &= ~RE_DOT_NEWLINE; 05686 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 05687 /* It also changes the matching behavior. */ 05688 preg->newline_anchor = 1; 05689 } 05690 else 05691 preg->newline_anchor = 0; 05692 05693 preg->no_sub = !!(cflags & REG_NOSUB); 05694 05695 /* POSIX says a null character in the pattern terminates it, so we 05696 can use strlen here in compiling the pattern. */ 05697 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 05698 05699 /* POSIX doesn't distinguish between an unmatched open-group and an 05700 unmatched close-group: both are REG_EPAREN. */ 05701 if (ret == REG_ERPAREN) ret = REG_EPAREN; 05702 05703 return (int) ret; 05704 } 05705 #ifdef _LIBC 05706 weak_alias (__regcomp, regcomp) 05707 #endif 05708 05709 05710 /* regexec searches for a given pattern, specified by PREG, in the 05711 string STRING. 05712 05713 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 05714 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 05715 least NMATCH elements, and we set them to the offsets of the 05716 corresponding matched substrings. 05717 05718 EFLAGS specifies `execution flags' which affect matching: if 05719 REG_NOTBOL is set, then ^ does not match at the beginning of the 05720 string; if REG_NOTEOL is set, then $ does not match at the end. 05721 05722 We return 0 if we find a match and REG_NOMATCH if not. */ 05723 05724 int 05725 regexec (preg, string, nmatch, pmatch, eflags) 05726 const regex_t *preg; 05727 const char *string; 05728 size_t nmatch; 05729 regmatch_t pmatch[]; 05730 int eflags; 05731 { 05732 int ret; 05733 struct re_registers regs; 05734 regex_t private_preg; 05735 int len = strlen (string); 05736 boolean want_reg_info = !preg->no_sub && nmatch > 0; 05737 05738 private_preg = *preg; 05739 05740 private_preg.not_bol = !!(eflags & REG_NOTBOL); 05741 private_preg.not_eol = !!(eflags & REG_NOTEOL); 05742 05743 /* The user has told us exactly how many registers to return 05744 information about, via `nmatch'. We have to pass that on to the 05745 matching routines. */ 05746 private_preg.regs_allocated = REGS_FIXED; 05747 05748 if (want_reg_info) 05749 { 05750 regs.num_regs = nmatch; 05751 regs.start = TALLOC (nmatch, regoff_t); 05752 regs.end = TALLOC (nmatch, regoff_t); 05753 if (regs.start == NULL || regs.end == NULL) 05754 return (int) REG_NOMATCH; 05755 } 05756 05757 /* Perform the searching operation. */ 05758 ret = re_search (&private_preg, string, len, 05759 /* start: */ 0, /* range: */ len, 05760 want_reg_info ? ®s : (struct re_registers *) 0); 05761 05762 /* Copy the register information to the POSIX structure. */ 05763 if (want_reg_info) 05764 { 05765 if (ret >= 0) 05766 { 05767 unsigned r; 05768 05769 for (r = 0; r < nmatch; r++) 05770 { 05771 pmatch[r].rm_so = regs.start[r]; 05772 pmatch[r].rm_eo = regs.end[r]; 05773 } 05774 } 05775 05776 /* If we needed the temporary register info, free the space now. */ 05777 free (regs.start); 05778 free (regs.end); 05779 } 05780 05781 /* We want zero return to mean success, unlike `re_search'. */ 05782 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 05783 } 05784 #ifdef _LIBC 05785 weak_alias (__regexec, regexec) 05786 #endif 05787 05788 05789 /* Returns a message corresponding to an error code, ERRCODE, returned 05790 from either regcomp or regexec. We don't use PREG here. */ 05791 05792 size_t 05793 regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) 05794 { 05795 const char *msg; 05796 size_t msg_size; 05797 05798 if (errcode < 0 05799 || errcode >= (int) (sizeof (re_error_msgid) 05800 / sizeof (re_error_msgid[0]))) 05801 /* Only error codes returned by the rest of the code should be passed 05802 to this routine. If we are given anything else, or if other regex 05803 code generates an invalid error code, then the program has a bug. 05804 Dump core so we can fix it. */ 05805 abort (); 05806 05807 msg = gettext (re_error_msgid[errcode]); 05808 05809 msg_size = strlen (msg) + 1; /* Includes the null. */ 05810 05811 if (errbuf_size != 0) 05812 { 05813 if (msg_size > errbuf_size) 05814 { 05815 #if defined HAVE_MEMPCPY || defined _LIBC 05816 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 05817 #else 05818 memcpy (errbuf, msg, errbuf_size - 1); 05819 errbuf[errbuf_size - 1] = 0; 05820 #endif 05821 } 05822 else 05823 memcpy (errbuf, msg, msg_size); 05824 } 05825 05826 return msg_size; 05827 } 05828 #ifdef _LIBC 05829 weak_alias (__regerror, regerror) 05830 #endif 05831 05832 05833 /* Free dynamically allocated space used by PREG. */ 05834 05835 void 05836 regfree (preg) 05837 regex_t *preg; 05838 { 05839 if (preg->buffer != NULL) 05840 free (preg->buffer); 05841 preg->buffer = NULL; 05842 05843 preg->allocated = 0; 05844 preg->used = 0; 05845 05846 if (preg->fastmap != NULL) 05847 free (preg->fastmap); 05848 preg->fastmap = NULL; 05849 preg->fastmap_accurate = 0; 05850 05851 if (preg->translate != NULL) 05852 free (preg->translate); 05853 preg->translate = NULL; 05854 } 05855 #ifdef _LIBC 05856 weak_alias (__regfree, regfree) 05857 #endif 05858 05859 #endif /* not emacs */ Generated on Sat May 26 2012 04:22:00 for ReactOS by
1.7.6.1
|