ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

regex.c
Go to the documentation of this file.
00001 /* This is a modified version of the GNU C Library regular expression
00002    matching library.  It is modified to meet the requirements the NGS
00003    JS interpreter has for its extension functions (re-entrancy, etc.).
00004    All modifications are isolated with `JS' defines.  You can enable
00005    the original features by defining the pre-processor constant JS to
00006    value 0. */
00007 #define JS 1
00008 
00009 /* Extended regular expression matching and search library,
00010    version 0.12.
00011    (Implements POSIX draft P1003.2/D11.2, except for some of the
00012    internationalization features.)
00013    Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
00014 
00015    The GNU C Library is free software; you can redistribute it and/or
00016    modify it under the terms of the GNU Library General Public License as
00017    published by the Free Software Foundation; either version 2 of the
00018    License, or (at your option) any later version.
00019 
00020    The GNU C Library is distributed in the hope that it will be useful,
00021    but WITHOUT ANY WARRANTY; without even the implied warranty of
00022    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00023    Library General Public License for more details.
00024 
00025    You should have received a copy of the GNU Library General Public
00026    License along with the GNU C Library; see the file COPYING.LIB.  If not,
00027    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00028    Boston, MA 02111-1307, USA.  */
00029 
00030 /* AIX requires this to be the first thing in the file. */
00031 #if defined _AIX && !defined REGEX_MALLOC
00032   #pragma alloca
00033 #endif
00034 
00035 #undef  _GNU_SOURCE
00036 #define _GNU_SOURCE
00037 
00038 #if JS
00039 //#include <jsconfig.h>
00040 #else /* not JS */
00041 #ifdef HAVE_CONFIG_H
00042 # include <config.h>
00043 #endif
00044 #endif /* not JS */
00045 
00046 #ifndef PARAMS
00047 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
00048 #  define PARAMS(args) args
00049 # else
00050 #  define PARAMS(args) ()
00051 # endif  /* GCC.  */
00052 #endif  /* Not PARAMS.  */
00053 
00054 #if defined STDC_HEADERS && !defined emacs
00055 # include <stddef.h>
00056 #else
00057 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00058 # include <sys/types.h>
00059 #endif
00060 
00061 #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
00062 
00063 /* For platform which support the ISO C amendement 1 functionality we
00064    support user defined character classes.  */
00065 #if defined _LIBC || WIDE_CHAR_SUPPORT
00066 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
00067 # include <wchar.h>
00068 # include <wctype.h>
00069 #endif
00070 
00071 #ifdef _LIBC
00072 /* We have to keep the namespace clean.  */
00073 # define regfree(preg) __regfree (preg)
00074 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
00075 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
00076 # define regerror(errcode, preg, errbuf, errbuf_size) \
00077     __regerror(errcode, preg, errbuf, errbuf_size)
00078 # define re_set_registers(bu, re, nu, st, en) \
00079     __re_set_registers (bu, re, nu, st, en)
00080 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
00081     __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
00082 # define re_match(bufp, string, size, pos, regs) \
00083     __re_match (bufp, string, size, pos, regs)
00084 # define re_search(bufp, string, size, startpos, range, regs) \
00085     __re_search (bufp, string, size, startpos, range, regs)
00086 # define re_compile_pattern(pattern, length, bufp) \
00087     __re_compile_pattern (pattern, length, bufp)
00088 # define re_set_syntax(syntax) __re_set_syntax (syntax)
00089 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
00090     __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
00091 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
00092 
00093 #define btowc __btowc
00094 #endif
00095 
00096 /* This is for other GNU distributions with internationalized messages.  */
00097 #if HAVE_LIBINTL_H || defined _LIBC
00098 # include <libintl.h>
00099 #else
00100 # define gettext(msgid) (msgid)
00101 #endif
00102 
00103 #ifndef gettext_noop
00104 /* This define is so xgettext can find the internationalizable
00105    strings.  */
00106 # define gettext_noop(String) String
00107 #endif
00108 
00109 /* The `emacs' switch turns on certain matching commands
00110    that make sense only in Emacs. */
00111 #ifdef emacs
00112 
00113 # include "lisp.h"
00114 # include "buffer.h"
00115 # include "syntax.h"
00116 
00117 #else  /* not emacs */
00118 
00119 /* If we are not linking with Emacs proper,
00120    we can't use the relocating allocator
00121    even if config.h says that we can.  */
00122 # undef REL_ALLOC
00123 
00124 # if defined STDC_HEADERS || defined _LIBC || defined _WIN32
00125 #  include <stdlib.h>
00126 # else
00127 char *malloc ();
00128 char *realloc ();
00129 # endif
00130 
00131 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
00132    If nothing else has been done, use the method below.  */
00133 # ifdef INHIBIT_STRING_HEADER
00134 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
00135 #   if !defined bzero && !defined bcopy
00136 #    undef INHIBIT_STRING_HEADER
00137 #   endif
00138 #  endif
00139 # endif
00140 
00141 /* This is the normal way of making sure we have a bcopy and a bzero.
00142    This is used in most programs--a few other programs avoid this
00143    by defining INHIBIT_STRING_HEADER.  */
00144 # ifndef INHIBIT_STRING_HEADER
00145 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
00146 #   include <string.h>
00147 #   ifndef bzero
00148 //#    ifndef _LIBC
00149 //#     define bzero(s, n)  (memset (s, '\0', n), (s))
00150 //#    else
00151 //#     define bzero(s, n)  __bzero (s, n)
00152 //#    endif
00153 #   endif
00154 #  else
00155 #   include <strings.h>
00156 //#   ifndef memcmp
00157 //#    define memcmp(s1, s2, n) bcmp (s1, s2, n)
00158 //#   endif
00159 //#   ifndef memcpy
00160 //#    define memcpy(d, s, n)   (bcopy (s, d, n), (d))
00161 //#   endif
00162 #define bzero(d,l) memset(d,0,l)
00163 #  endif
00164 # endif
00165 
00166 /* Define the syntax stuff for <, >, etc.  */
00167 
00168 /* This must be nonzero for the wordchar and notwordchar pattern
00169    commands in re_match_2.  */
00170 # ifndef Sword
00171 #  define Sword 1
00172 # endif
00173 
00174 # ifdef SWITCH_ENUM_BUG
00175 #  define SWITCH_ENUM_CAST(x) ((int)(x))
00176 # else
00177 #  define SWITCH_ENUM_CAST(x) (x)
00178 # endif
00179 
00180 /* How many characters in the character set.  */
00181 # define CHAR_SET_SIZE 256
00182 
00183 # ifdef SYNTAX_TABLE
00184 
00185 extern char *re_syntax_table;
00186 
00187 # else /* not SYNTAX_TABLE */
00188 
00189 #if JS
00190 static char re_syntax_table[CHAR_SET_SIZE] =
00191 {
00192 /*
00193   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
00194   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x0f */
00195   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 - 0x1f */
00196   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x2f */
00197   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3f */
00198   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 - 0x4f */
00199   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 - 0x5f */
00200   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 - 0x6f */
00201   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70 - 0x7f */
00202   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8f */
00203   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x90 - 0x9f */
00204   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0 - 0xaf */
00205   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0 - 0xbf */
00206   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 - 0xcf */
00207   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0 - 0xdf */
00208   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0 - 0xef */
00209   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xf0 - 0xff */
00210 };
00211 
00212 static void
00213 init_syntax_once ()
00214 {
00215   /* Nothing here. */
00216 }
00217 #else /* not JS */
00218 static char re_syntax_table[CHAR_SET_SIZE];
00219 
00220 static void
00221 init_syntax_once ()
00222 {
00223    register int c;
00224    static int done = 0;
00225 
00226    if (done)
00227      return;
00228 
00229    bzero (re_syntax_table, sizeof re_syntax_table);
00230 
00231    for (c = 'a'; c <= 'z'; c++)
00232      re_syntax_table[c] = Sword;
00233 
00234    for (c = 'A'; c <= 'Z'; c++)
00235      re_syntax_table[c] = Sword;
00236 
00237    for (c = '0'; c <= '9'; c++)
00238      re_syntax_table[c] = Sword;
00239 
00240    re_syntax_table['_'] = Sword;
00241 
00242    done = 1;
00243 }
00244 #endif /* not JS */
00245 
00246 # endif /* not SYNTAX_TABLE */
00247 
00248 # define SYNTAX(c) re_syntax_table[c]
00249 
00250 #endif /* not emacs */
00251 
00252 /* Get the interface, including the syntax bits.  */
00253 #include "regex.h"
00254 
00255 /* isalpha etc. are used for the character classes.  */
00256 #include <ctype.h>
00257 
00258 /* Jim Meyering writes:
00259 
00260    "... Some ctype macros are valid only for character codes that
00261    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
00262    using /bin/cc or gcc but without giving an ansi option).  So, all
00263    ctype uses should be through macros like ISPRINT...  If
00264    STDC_HEADERS is defined, then autoconf has verified that the ctype
00265    macros don't need to be guarded with references to isascii. ...
00266    Defining isascii to 1 should let any compiler worth its salt
00267    eliminate the && through constant folding."
00268    Solaris defines some of these symbols so we must undefine them first.  */
00269 
00270 #undef ISASCII
00271 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
00272 # define ISASCII(c) 1
00273 #else
00274 # define ISASCII(c) isascii(c)
00275 #endif
00276 
00277 #ifdef isblank
00278 # define ISBLANK(c) (ISASCII (c) && isblank (c))
00279 #else
00280 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00281 #endif
00282 #ifdef isgraph
00283 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00284 #else
00285 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00286 #endif
00287 
00288 #undef ISPRINT
00289 #define ISPRINT(c) (ISASCII (c) && isprint (c))
00290 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00291 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
00292 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
00293 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00294 #define ISLOWER(c) (ISASCII (c) && islower (c))
00295 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00296 #define ISSPACE(c) (ISASCII (c) && isspace (c))
00297 #define ISUPPER(c) (ISASCII (c) && isupper (c))
00298 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00299 
00300 #ifndef NULL
00301 # define NULL (void *)0
00302 #endif
00303 
00304 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00305    since ours (we hope) works properly with all combinations of
00306    machines, compilers, `char' and `unsigned char' argument types.
00307    (Per Bothner suggested the basic approach.)  */
00308 #undef SIGN_EXTEND_CHAR
00309 #if __STDC__
00310 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00311 #else  /* not __STDC__ */
00312 /* As in Harbison and Steele.  */
00313 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00314 #endif
00315 
00316 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00317    use `alloca' instead of `malloc'.  This is because using malloc in
00318    re_search* or re_match* could cause memory leaks when C-g is used in
00319    Emacs; also, malloc is slower and causes storage fragmentation.  On
00320    the other hand, malloc is more portable, and easier to debug.
00321 
00322    Because we sometimes use alloca, some routines have to be macros,
00323    not functions -- `alloca'-allocated space disappears at the end of the
00324    function it is called in.  */
00325 
00326 #ifdef REGEX_MALLOC
00327 
00328 # define REGEX_ALLOCATE malloc
00329 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00330 # define REGEX_FREE free
00331 
00332 #else /* not REGEX_MALLOC  */
00333 
00334 /* Emacs already defines alloca, sometimes.  */
00335 # ifndef alloca
00336 
00337 /* Make alloca work the best possible way.  */
00338 #  ifdef __GNUC__
00339 #   define alloca __builtin_alloca
00340 #  else /* not __GNUC__ */
00341 #   if HAVE_ALLOCA_H
00342 #    include <alloca.h>
00343 #   endif /* HAVE_ALLOCA_H */
00344 #  endif /* not __GNUC__ */
00345 
00346 # endif /* not alloca */
00347 
00348 # define REGEX_ALLOCATE alloca
00349 
00350 /* Assumes a `char *destination' variable.  */
00351 # define REGEX_REALLOCATE(source, osize, nsize)             \
00352   (destination = (char *) alloca (nsize),               \
00353    memcpy (destination, source, osize))
00354 
00355 /* No need to do anything to free, after alloca.  */
00356 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
00357 
00358 #endif /* not REGEX_MALLOC */
00359 
00360 /* Define how to allocate the failure stack.  */
00361 
00362 #if defined REL_ALLOC && defined REGEX_MALLOC
00363 
00364 # define REGEX_ALLOCATE_STACK(size)             \
00365   r_alloc (&failure_stack_ptr, (size))
00366 # define REGEX_REALLOCATE_STACK(source, osize, nsize)       \
00367   r_re_alloc (&failure_stack_ptr, (nsize))
00368 # define REGEX_FREE_STACK(ptr)                  \
00369   r_alloc_free (&failure_stack_ptr)
00370 
00371 #else /* not using relocating allocator */
00372 
00373 # ifdef REGEX_MALLOC
00374 
00375 #  define REGEX_ALLOCATE_STACK malloc
00376 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00377 #  define REGEX_FREE_STACK free
00378 
00379 # else /* not REGEX_MALLOC */
00380 
00381 #  define REGEX_ALLOCATE_STACK alloca
00382 
00383 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)          \
00384    REGEX_REALLOCATE (source, osize, nsize)
00385 /* No need to explicitly free anything.  */
00386 #  define REGEX_FREE_STACK(arg)
00387 
00388 # endif /* not REGEX_MALLOC */
00389 #endif /* not using relocating allocator */
00390 
00391 
00392 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00393    `string1' or just past its end.  This works if PTR is NULL, which is
00394    a good thing.  */
00395 #define FIRST_STRING_P(ptr)                     \
00396   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00397 
00398 /* (Re)Allocate N items of type T using malloc, or fail.  */
00399 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00400 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00401 #define RETALLOC_IF(addr, n, t) \
00402   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00403 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00404 
00405 #define BYTEWIDTH 8 /* In bits.  */
00406 
00407 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00408 
00409 #undef MAX
00410 #undef MIN
00411 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00412 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00413 
00414 typedef char boolean;
00415 #define false 0
00416 #define true 1
00417 
00418 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
00419                     const char *string1, int size1,
00420                     const char *string2, int size2,
00421                     int pos,
00422                     struct re_registers *regs,
00423                     int stop));
00424 
00425 /* These are the command codes that appear in compiled regular
00426    expressions.  Some opcodes are followed by argument bytes.  A
00427    command code can specify any interpretation whatsoever for its
00428    arguments.  Zero bytes may appear in the compiled regular expression.  */
00429 
00430 typedef enum
00431 {
00432   no_op = 0,
00433 
00434   /* Succeed right away--no more backtracking.  */
00435   succeed,
00436 
00437         /* Followed by one byte giving n, then by n literal bytes.  */
00438   exactn,
00439 
00440         /* Matches any (more or less) character.  */
00441   anychar,
00442 
00443         /* Matches any one char belonging to specified set.  First
00444            following byte is number of bitmap bytes.  Then come bytes
00445            for a bitmap saying which chars are in.  Bits in each byte
00446            are ordered low-bit-first.  A character is in the set if its
00447            bit is 1.  A character too large to have a bit in the map is
00448            automatically not in the set.  */
00449   charset,
00450 
00451         /* Same parameters as charset, but match any character that is
00452            not one of those specified.  */
00453   charset_not,
00454 
00455         /* Start remembering the text that is matched, for storing in a
00456            register.  Followed by one byte with the register number, in
00457            the range 0 to one less than the pattern buffer's re_nsub
00458            field.  Then followed by one byte with the number of groups
00459            inner to this one.  (This last has to be part of the
00460            start_memory only because we need it in the on_failure_jump
00461            of re_match_2.)  */
00462   start_memory,
00463 
00464         /* Stop remembering the text that is matched and store it in a
00465            memory register.  Followed by one byte with the register
00466            number, in the range 0 to one less than `re_nsub' in the
00467            pattern buffer, and one byte with the number of inner groups,
00468            just like `start_memory'.  (We need the number of inner
00469            groups here because we don't have any easy way of finding the
00470            corresponding start_memory when we're at a stop_memory.)  */
00471   stop_memory,
00472 
00473         /* Match a duplicate of something remembered. Followed by one
00474            byte containing the register number.  */
00475   duplicate,
00476 
00477         /* Fail unless at beginning of line.  */
00478   begline,
00479 
00480         /* Fail unless at end of line.  */
00481   endline,
00482 
00483         /* Succeeds if at beginning of buffer (if emacs) or at beginning
00484            of string to be matched (if not).  */
00485   begbuf,
00486 
00487         /* Analogously, for end of buffer/string.  */
00488   endbuf,
00489 
00490         /* Followed by two byte relative address to which to jump.  */
00491   jump,
00492 
00493     /* Same as jump, but marks the end of an alternative.  */
00494   jump_past_alt,
00495 
00496         /* Followed by two-byte relative address of place to resume at
00497            in case of failure.  */
00498   on_failure_jump,
00499 
00500         /* Like on_failure_jump, but pushes a placeholder instead of the
00501            current string position when executed.  */
00502   on_failure_keep_string_jump,
00503 
00504         /* Throw away latest failure point and then jump to following
00505            two-byte relative address.  */
00506   pop_failure_jump,
00507 
00508         /* Change to pop_failure_jump if know won't have to backtrack to
00509            match; otherwise change to jump.  This is used to jump
00510            back to the beginning of a repeat.  If what follows this jump
00511            clearly won't match what the repeat does, such that we can be
00512            sure that there is no use backtracking out of repetitions
00513            already matched, then we change it to a pop_failure_jump.
00514            Followed by two-byte address.  */
00515   maybe_pop_jump,
00516 
00517         /* Jump to following two-byte address, and push a dummy failure
00518            point. This failure point will be thrown away if an attempt
00519            is made to use it for a failure.  A `+' construct makes this
00520            before the first repeat.  Also used as an intermediary kind
00521            of jump when compiling an alternative.  */
00522   dummy_failure_jump,
00523 
00524     /* Push a dummy failure point and continue.  Used at the end of
00525        alternatives.  */
00526   push_dummy_failure,
00527 
00528         /* Followed by two-byte relative address and two-byte number n.
00529            After matching N times, jump to the address upon failure.  */
00530   succeed_n,
00531 
00532         /* Followed by two-byte relative address, and two-byte number n.
00533            Jump to the address N times, then fail.  */
00534   jump_n,
00535 
00536         /* Set the following two-byte relative address to the
00537            subsequent two-byte number.  The address *includes* the two
00538            bytes of number.  */
00539   set_number_at,
00540 
00541   wordchar, /* Matches any word-constituent character.  */
00542   notwordchar,  /* Matches any char that is not a word-constituent.  */
00543 
00544   wordbeg,  /* Succeeds if at word beginning.  */
00545   wordend,  /* Succeeds if at word end.  */
00546 
00547   wordbound,    /* Succeeds if at a word boundary.  */
00548   notwordbound  /* Succeeds if not at a word boundary.  */
00549 
00550 #ifdef emacs
00551   ,before_dot,  /* Succeeds if before point.  */
00552   at_dot,   /* Succeeds if at point.  */
00553   after_dot,    /* Succeeds if after point.  */
00554 
00555     /* Matches any character whose syntax is specified.  Followed by
00556            a byte which contains a syntax code, e.g., Sword.  */
00557   syntaxspec,
00558 
00559     /* Matches any character whose syntax is not that specified.  */
00560   notsyntaxspec
00561 #endif /* emacs */
00562 } re_opcode_t;
00563 
00564 /* Common operations on the compiled pattern.  */
00565 
00566 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00567 
00568 #define STORE_NUMBER(destination, number)               \
00569   do {                                  \
00570     (destination)[0] = (number) & 0377;                 \
00571     (destination)[1] = (number) >> 8;                   \
00572   } while (0)
00573 
00574 /* Same as STORE_NUMBER, except increment DESTINATION to
00575    the byte after where the number is stored.  Therefore, DESTINATION
00576    must be an lvalue.  */
00577 
00578 #define STORE_NUMBER_AND_INCR(destination, number)          \
00579   do {                                  \
00580     STORE_NUMBER (destination, number);                 \
00581     (destination) += 2;                         \
00582   } while (0)
00583 
00584 /* Put into DESTINATION a number stored in two contiguous bytes starting
00585    at SOURCE.  */
00586 
00587 #define EXTRACT_NUMBER(destination, source)             \
00588   do {                                  \
00589     (destination) = *(source) & 0377;                   \
00590     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;       \
00591   } while (0)
00592 
00593 #ifdef DEBUG
00594 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
00595 static void
00596 extract_number (dest, source)
00597     int *dest;
00598     unsigned char *source;
00599 {
00600   int temp = SIGN_EXTEND_CHAR (*(source + 1));
00601   *dest = *source & 0377;
00602   *dest += temp << 8;
00603 }
00604 
00605 # ifndef EXTRACT_MACROS /* To debug the macros.  */
00606 #  undef EXTRACT_NUMBER
00607 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00608 # endif /* not EXTRACT_MACROS */
00609 
00610 #endif /* DEBUG */
00611 
00612 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00613    SOURCE must be an lvalue.  */
00614 
00615 #define EXTRACT_NUMBER_AND_INCR(destination, source)            \
00616   do {                                  \
00617     EXTRACT_NUMBER (destination, source);               \
00618     (source) += 2;                          \
00619   } while (0)
00620 
00621 #ifdef DEBUG
00622 static void extract_number_and_incr _RE_ARGS ((int *destination,
00623                            unsigned char **source));
00624 static void
00625 extract_number_and_incr (destination, source)
00626     int *destination;
00627     unsigned char **source;
00628 {
00629   extract_number (destination, *source);
00630   *source += 2;
00631 }
00632 
00633 # ifndef EXTRACT_MACROS
00634 #  undef EXTRACT_NUMBER_AND_INCR
00635 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
00636   extract_number_and_incr (&dest, &src)
00637 # endif /* not EXTRACT_MACROS */
00638 
00639 #endif /* DEBUG */
00640 
00641 /* If DEBUG is defined, Regex prints many voluminous messages about what
00642    it is doing (if the variable `debug' is nonzero).  If linked with the
00643    main program in `iregex.c', you can enter patterns and strings
00644    interactively.  And if linked with the main program in `main.c' and
00645    the other test files, you can run the already-written tests.  */
00646 
00647 #ifdef DEBUG
00648 
00649 /* We use standard I/O for debugging.  */
00650 # include <stdio.h>
00651 
00652 /* It is useful to test things that ``must'' be true when debugging.  */
00653 # include <assert.h>
00654 
00655 static int debug = 0;
00656 
00657 # define DEBUG_STATEMENT(e) e
00658 # define DEBUG_PRINT1(x) if (debug) printf (x)
00659 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00660 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00661 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00662 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)              \
00663   if (debug) print_partial_compiled_pattern (s, e)
00664 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)         \
00665   if (debug) print_double_string (w, s1, sz1, s2, sz2)
00666 
00667 
00668 /* Print the fastmap in human-readable form.  */
00669 
00670 void
00671 print_fastmap (fastmap)
00672     char *fastmap;
00673 {
00674   unsigned was_a_range = 0;
00675   unsigned i = 0;
00676 
00677   while (i < (1 << BYTEWIDTH))
00678     {
00679       if (fastmap[i++])
00680     {
00681       was_a_range = 0;
00682           putchar (i - 1);
00683           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00684             {
00685               was_a_range = 1;
00686               i++;
00687             }
00688       if (was_a_range)
00689             {
00690               printf ("-");
00691               putchar (i - 1);
00692             }
00693         }
00694     }
00695   putchar ('\n');
00696 }
00697 
00698 
00699 /* Print a compiled pattern string in human-readable form, starting at
00700    the START pointer into it and ending just before the pointer END.  */
00701 
00702 void
00703 print_partial_compiled_pattern (start, end)
00704     unsigned char *start;
00705     unsigned char *end;
00706 {
00707   int mcnt, mcnt2;
00708   unsigned char *p1;
00709   unsigned char *p = start;
00710   unsigned char *pend = end;
00711 
00712   if (start == NULL)
00713     {
00714       printf ("(null)\n");
00715       return;
00716     }
00717 
00718   /* Loop over pattern commands.  */
00719   while (p < pend)
00720     {
00721       printf ("%d:\t", p - start);
00722 
00723       switch ((re_opcode_t) *p++)
00724     {
00725         case no_op:
00726           printf ("/no_op");
00727           break;
00728 
00729     case exactn:
00730       mcnt = *p++;
00731           printf ("/exactn/%d", mcnt);
00732           do
00733         {
00734               putchar ('/');
00735           putchar (*p++);
00736             }
00737           while (--mcnt);
00738           break;
00739 
00740     case start_memory:
00741           mcnt = *p++;
00742           printf ("/start_memory/%d/%d", mcnt, *p++);
00743           break;
00744 
00745     case stop_memory:
00746           mcnt = *p++;
00747       printf ("/stop_memory/%d/%d", mcnt, *p++);
00748           break;
00749 
00750     case duplicate:
00751       printf ("/duplicate/%d", *p++);
00752       break;
00753 
00754     case anychar:
00755       printf ("/anychar");
00756       break;
00757 
00758     case charset:
00759         case charset_not:
00760           {
00761             register int c, last = -100;
00762         register int in_range = 0;
00763 
00764         printf ("/charset [%s",
00765                 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00766 
00767             assert (p + *p < pend);
00768 
00769             for (c = 0; c < 256; c++)
00770           if (c / 8 < *p
00771           && (p[1 + (c/8)] & (1 << (c % 8))))
00772         {
00773           /* Are we starting a range?  */
00774           if (last + 1 == c && ! in_range)
00775             {
00776               putchar ('-');
00777               in_range = 1;
00778             }
00779           /* Have we broken a range?  */
00780           else if (last + 1 != c && in_range)
00781               {
00782               putchar (last);
00783               in_range = 0;
00784             }
00785 
00786           if (! in_range)
00787             putchar (c);
00788 
00789           last = c;
00790               }
00791 
00792         if (in_range)
00793           putchar (last);
00794 
00795         putchar (']');
00796 
00797         p += 1 + *p;
00798       }
00799       break;
00800 
00801     case begline:
00802       printf ("/begline");
00803           break;
00804 
00805     case endline:
00806           printf ("/endline");
00807           break;
00808 
00809     case on_failure_jump:
00810           extract_number_and_incr (&mcnt, &p);
00811       printf ("/on_failure_jump to %d", p + mcnt - start);
00812           break;
00813 
00814     case on_failure_keep_string_jump:
00815           extract_number_and_incr (&mcnt, &p);
00816       printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00817           break;
00818 
00819     case dummy_failure_jump:
00820           extract_number_and_incr (&mcnt, &p);
00821       printf ("/dummy_failure_jump to %d", p + mcnt - start);
00822           break;
00823 
00824     case push_dummy_failure:
00825           printf ("/push_dummy_failure");
00826           break;
00827 
00828         case maybe_pop_jump:
00829           extract_number_and_incr (&mcnt, &p);
00830       printf ("/maybe_pop_jump to %d", p + mcnt - start);
00831       break;
00832 
00833         case pop_failure_jump:
00834       extract_number_and_incr (&mcnt, &p);
00835       printf ("/pop_failure_jump to %d", p + mcnt - start);
00836       break;
00837 
00838         case jump_past_alt:
00839       extract_number_and_incr (&mcnt, &p);
00840       printf ("/jump_past_alt to %d", p + mcnt - start);
00841       break;
00842 
00843         case jump:
00844       extract_number_and_incr (&mcnt, &p);
00845       printf ("/jump to %d", p + mcnt - start);
00846       break;
00847 
00848         case succeed_n:
00849           extract_number_and_incr (&mcnt, &p);
00850       p1 = p + mcnt;
00851           extract_number_and_incr (&mcnt2, &p);
00852       printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
00853           break;
00854 
00855         case jump_n:
00856           extract_number_and_incr (&mcnt, &p);
00857       p1 = p + mcnt;
00858           extract_number_and_incr (&mcnt2, &p);
00859       printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
00860           break;
00861 
00862         case set_number_at:
00863           extract_number_and_incr (&mcnt, &p);
00864       p1 = p + mcnt;
00865           extract_number_and_incr (&mcnt2, &p);
00866       printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
00867           break;
00868 
00869         case wordbound:
00870       printf ("/wordbound");
00871       break;
00872 
00873     case notwordbound:
00874       printf ("/notwordbound");
00875           break;
00876 
00877     case wordbeg:
00878       printf ("/wordbeg");
00879       break;
00880 
00881     case wordend:
00882       printf ("/wordend");
00883 
00884 # ifdef emacs
00885     case before_dot:
00886       printf ("/before_dot");
00887           break;
00888 
00889     case at_dot:
00890       printf ("/at_dot");
00891           break;
00892 
00893     case after_dot:
00894       printf ("/after_dot");
00895           break;
00896 
00897     case syntaxspec:
00898           printf ("/syntaxspec");
00899       mcnt = *p++;
00900       printf ("/%d", mcnt);
00901           break;
00902 
00903     case notsyntaxspec:
00904           printf ("/notsyntaxspec");
00905       mcnt = *p++;
00906       printf ("/%d", mcnt);
00907       break;
00908 # endif /* emacs */
00909 
00910     case wordchar:
00911       printf ("/wordchar");
00912           break;
00913 
00914     case notwordchar:
00915       printf ("/notwordchar");
00916           break;
00917 
00918     case begbuf:
00919       printf ("/begbuf");
00920           break;
00921 
00922     case endbuf:
00923       printf ("/endbuf");
00924           break;
00925 
00926         default:
00927           printf ("?%d", *(p-1));
00928     }
00929 
00930       putchar ('\n');
00931     }
00932 
00933   printf ("%d:\tend of pattern.\n", p - start);
00934 }
00935 
00936 
00937 void
00938 print_compiled_pattern (bufp)
00939     struct re_pattern_buffer *bufp;
00940 {
00941   unsigned char *buffer = bufp->buffer;
00942 
00943   print_partial_compiled_pattern (buffer, buffer + bufp->used);
00944   printf ("%ld bytes used/%ld bytes allocated.\n",
00945       bufp->used, bufp->allocated);
00946 
00947   if (bufp->fastmap_accurate && bufp->fastmap)
00948     {
00949       printf ("fastmap: ");
00950       print_fastmap (bufp->fastmap);
00951     }
00952 
00953   printf ("re_nsub: %d\t", bufp->re_nsub);
00954   printf ("regs_alloc: %d\t", bufp->regs_allocated);
00955   printf ("can_be_null: %d\t", bufp->can_be_null);
00956   printf ("newline_anchor: %d\n", bufp->newline_anchor);
00957   printf ("no_sub: %d\t", bufp->no_sub);
00958   printf ("not_bol: %d\t", bufp->not_bol);
00959   printf ("not_eol: %d\t", bufp->not_eol);
00960   printf ("syntax: %lx\n", bufp->syntax);
00961   /* Perhaps we should print the translate table?  */
00962 }
00963 
00964 
00965 void
00966 print_double_string (where, string1, size1, string2, size2)
00967     const char *where;
00968     const char *string1;
00969     const char *string2;
00970     int size1;
00971     int size2;
00972 {
00973   int this_char;
00974 
00975   if (where == NULL)
00976     printf ("(null)");
00977   else
00978     {
00979       if (FIRST_STRING_P (where))
00980         {
00981           for (this_char = where - string1; this_char < size1; this_char++)
00982             putchar (string1[this_char]);
00983 
00984           where = string2;
00985         }
00986 
00987       for (this_char = where - string2; this_char < size2; this_char++)
00988         putchar (string2[this_char]);
00989     }
00990 }
00991 
00992 void
00993 printchar (c)
00994      int c;
00995 {
00996   putc (c, stderr);
00997 }
00998 
00999 #else /* not DEBUG */
01000 
01001 # undef assert
01002 # define assert(e)
01003 
01004 # define DEBUG_STATEMENT(e)
01005 # define DEBUG_PRINT1(x)
01006 # define DEBUG_PRINT2(x1, x2)
01007 # define DEBUG_PRINT3(x1, x2, x3)
01008 # define DEBUG_PRINT4(x1, x2, x3, x4)
01009 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
01010 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
01011 
01012 #endif /* not DEBUG */
01013 
01014 #if JS
01015 reg_syntax_t re_syntax_options = RE_SYNTAX_GNU_AWK;
01016 #else /* not JS */
01017 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
01018    also be assigned to arbitrarily: each pattern buffer stores its own
01019    syntax, so it can be changed between regex compilations.  */
01020 /* This has no initializer because initialized variables in Emacs
01021    become read-only after dumping.  */
01022 reg_syntax_t re_syntax_options;
01023 #endif /* not JS */
01024 
01025 
01026 /* Specify the precise syntax of regexps for compilation.  This provides
01027    for compatibility for various utilities which historically have
01028    different, incompatible syntaxes.
01029 
01030    The argument SYNTAX is a bit mask comprised of the various bits
01031    defined in regex.h.  We return the old syntax.  */
01032 
01033 reg_syntax_t
01034 re_set_syntax (syntax)
01035     reg_syntax_t syntax;
01036 {
01037   reg_syntax_t ret = re_syntax_options;
01038 
01039   re_syntax_options = syntax;
01040 #ifdef DEBUG
01041   if (syntax & RE_DEBUG)
01042     debug = 1;
01043   else if (debug) /* was on but now is not */
01044     debug = 0;
01045 #endif /* DEBUG */
01046   return ret;
01047 }
01048 #ifdef _LIBC
01049 weak_alias (__re_set_syntax, re_set_syntax)
01050 #endif
01051 
01052 /* This table gives an error message for each of the error codes listed
01053    in regex.h.  Obviously the order here has to be same as there.
01054    POSIX doesn't require that we do anything for REG_NOERROR,
01055    but why not be nice?  */
01056 
01057 static const char *re_error_msgid[] =
01058   {
01059     gettext_noop ("Success"),   /* REG_NOERROR */
01060     gettext_noop ("No match"),  /* REG_NOMATCH */
01061     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
01062     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
01063     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
01064     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
01065     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
01066     gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
01067     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
01068     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
01069     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
01070     gettext_noop ("Invalid range end"), /* REG_ERANGE */
01071     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
01072     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
01073     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
01074     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
01075     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
01076   };
01077 
01078 /* Avoiding alloca during matching, to placate r_alloc.  */
01079 
01080 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
01081    searching and matching functions should not call alloca.  On some
01082    systems, alloca is implemented in terms of malloc, and if we're
01083    using the relocating allocator routines, then malloc could cause a
01084    relocation, which might (if the strings being searched are in the
01085    ralloc heap) shift the data out from underneath the regexp
01086    routines.
01087 
01088    Here's another reason to avoid allocation: Emacs
01089    processes input from X in a signal handler; processing X input may
01090    call malloc; if input arrives while a matching routine is calling
01091    malloc, then we're scrod.  But Emacs can't just block input while
01092    calling matching routines; then we don't notice interrupts when
01093    they come in.  So, Emacs blocks input around all regexp calls
01094    except the matching calls, which it leaves unprotected, in the
01095    faith that they will not malloc.  */
01096 
01097 /* Normally, this is fine.  */
01098 #define MATCH_MAY_ALLOCATE
01099 
01100 /* When using GNU C, we are not REALLY using the C alloca, no matter
01101    what config.h may say.  So don't take precautions for it.  */
01102 #ifdef __GNUC__
01103 # undef C_ALLOCA
01104 #endif
01105 
01106 /* The match routines may not allocate if (1) they would do it with malloc
01107    and (2) it's not safe for them to use malloc.
01108    Note that if REL_ALLOC is defined, matching would not use malloc for the
01109    failure stack, but we would still use it for the register vectors;
01110    so REL_ALLOC should not affect this.  */
01111 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
01112 # undef MATCH_MAY_ALLOCATE
01113 #endif
01114 
01115 
01116 /* Failure stack declarations and macros; both re_compile_fastmap and
01117    re_match_2 use a failure stack.  These have to be macros because of
01118    REGEX_ALLOCATE_STACK.  */
01119 
01120 
01121 /* Number of failure points for which to initially allocate space
01122    when matching.  If this number is exceeded, we allocate more
01123    space, so it is not a hard limit.  */
01124 #ifndef INIT_FAILURE_ALLOC
01125 # define INIT_FAILURE_ALLOC 5
01126 #endif
01127 
01128 /* Roughly the maximum number of failure points on the stack.  Would be
01129    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
01130    This is a variable only so users of regex can assign to it; we never
01131    change it ourselves.  */
01132 
01133 #ifdef INT_IS_16BIT
01134 
01135 # if defined MATCH_MAY_ALLOCATE
01136 /* 4400 was enough to cause a crash on Alpha OSF/1,
01137    whose default stack limit is 2mb.  */
01138 long int re_max_failures = 4000;
01139 # else
01140 long int re_max_failures = 2000;
01141 # endif
01142 
01143 union fail_stack_elt
01144 {
01145   unsigned char *pointer;
01146   long int integer;
01147 };
01148 
01149 typedef union fail_stack_elt fail_stack_elt_t;
01150 
01151 typedef struct
01152 {
01153   fail_stack_elt_t *stack;
01154   unsigned long int size;
01155   unsigned long int avail;      /* Offset of next open position.  */
01156 } fail_stack_type;
01157 
01158 #else /* not INT_IS_16BIT */
01159 
01160 # if defined MATCH_MAY_ALLOCATE
01161 /* 4400 was enough to cause a crash on Alpha OSF/1,
01162    whose default stack limit is 2mb.  */
01163 int re_max_failures = 20000;
01164 # else
01165 int re_max_failures = 2000;
01166 # endif
01167 
01168 union fail_stack_elt
01169 {
01170   unsigned char *pointer;
01171   int integer;
01172 };
01173 
01174 typedef union fail_stack_elt fail_stack_elt_t;
01175 
01176 typedef struct
01177 {
01178   fail_stack_elt_t *stack;
01179   unsigned size;
01180   unsigned avail;           /* Offset of next open position.  */
01181 } fail_stack_type;
01182 
01183 #endif /* INT_IS_16BIT */
01184 
01185 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
01186 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01187 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
01188 
01189 
01190 /* Define macros to initialize and free the failure stack.
01191    Do `return -2' if the alloc fails.  */
01192 
01193 #ifdef MATCH_MAY_ALLOCATE
01194 # define INIT_FAIL_STACK()                      \
01195   do {                                  \
01196     fail_stack.stack = (fail_stack_elt_t *)             \
01197       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
01198                                     \
01199     if (fail_stack.stack == NULL)                   \
01200       return -2;                            \
01201                                     \
01202     fail_stack.size = INIT_FAILURE_ALLOC;               \
01203     fail_stack.avail = 0;                       \
01204   } while (0)
01205 
01206 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
01207 #else
01208 # define INIT_FAIL_STACK()                      \
01209   do {                                  \
01210     fail_stack.avail = 0;                       \
01211   } while (0)
01212 
01213 # define RESET_FAIL_STACK()
01214 #endif
01215 
01216 
01217 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
01218 
01219    Return 1 if succeeds, and 0 if either ran out of memory
01220    allocating space for it or it was already too large.
01221 
01222    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
01223 
01224 #define DOUBLE_FAIL_STACK(fail_stack)                   \
01225   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01226    ? 0                                  \
01227    : ((fail_stack).stack = (fail_stack_elt_t *)             \
01228         REGEX_REALLOCATE_STACK ((fail_stack).stack,             \
01229           (fail_stack).size * sizeof (fail_stack_elt_t),        \
01230           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),    \
01231                                     \
01232       (fail_stack).stack == NULL                    \
01233       ? 0                               \
01234       : ((fail_stack).size <<= 1,                   \
01235          1)))
01236 
01237 
01238 /* Push pointer POINTER on FAIL_STACK.
01239    Return 1 if was able to do so and 0 if ran out of memory allocating
01240    space to do so.  */
01241 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                \
01242   ((FAIL_STACK_FULL ()                          \
01243     && !DOUBLE_FAIL_STACK (FAIL_STACK))                 \
01244    ? 0                                  \
01245    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,   \
01246       1))
01247 
01248 /* Push a pointer value onto the failure stack.
01249    Assumes the variable `fail_stack'.  Probably should only
01250    be called from within `PUSH_FAILURE_POINT'.  */
01251 #define PUSH_FAILURE_POINTER(item)                  \
01252   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01253 
01254 /* This pushes an integer-valued item onto the failure stack.
01255    Assumes the variable `fail_stack'.  Probably should only
01256    be called from within `PUSH_FAILURE_POINT'.  */
01257 #define PUSH_FAILURE_INT(item)                  \
01258   fail_stack.stack[fail_stack.avail++].integer = (item)
01259 
01260 /* Push a fail_stack_elt_t value onto the failure stack.
01261    Assumes the variable `fail_stack'.  Probably should only
01262    be called from within `PUSH_FAILURE_POINT'.  */
01263 #define PUSH_FAILURE_ELT(item)                  \
01264   fail_stack.stack[fail_stack.avail++] =  (item)
01265 
01266 /* These three POP... operations complement the three PUSH... operations.
01267    All assume that `fail_stack' is nonempty.  */
01268 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01269 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01270 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01271 
01272 /* Used to omit pushing failure point id's when we're not debugging.  */
01273 #ifdef DEBUG
01274 # define DEBUG_PUSH PUSH_FAILURE_INT
01275 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01276 #else
01277 # define DEBUG_PUSH(item)
01278 # define DEBUG_POP(item_addr)
01279 #endif
01280 
01281 
01282 /* Push the information about the state we will need
01283    if we ever fail back to it.
01284 
01285    Requires variables fail_stack, regstart, regend, reg_info, and
01286    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
01287    be declared.
01288 
01289    Does `return FAILURE_CODE' if runs out of memory.  */
01290 
01291 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
01292   do {                                  \
01293     char *destination;                          \
01294     /* Must be int, so when we don't save any registers, the arithmetic \
01295        of 0 + -1 isn't done as unsigned.  */                \
01296     /* Can't be int, since there is not a shred of a guarantee that int \
01297        is wide enough to hold a value of something to which pointer can \
01298        be assigned */                           \
01299     active_reg_t this_reg;                      \
01300                                         \
01301     DEBUG_STATEMENT (failure_id++);                 \
01302     DEBUG_STATEMENT (nfailure_points_pushed++);             \
01303     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);       \
01304     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
01305     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
01306                                     \
01307     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);      \
01308     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);   \
01309                                     \
01310     /* Ensure we have enough space allocated for what we will push.  */ \
01311     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)           \
01312       {                                 \
01313         if (!DOUBLE_FAIL_STACK (fail_stack))                \
01314           return failure_code;                      \
01315                                     \
01316         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",      \
01317                (fail_stack).size);              \
01318         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01319       }                                 \
01320                                     \
01321     /* Push the info, starting with the registers.  */          \
01322     DEBUG_PRINT1 ("\n");                        \
01323                                     \
01324     if (1)                              \
01325       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01326        this_reg++)                          \
01327     {                               \
01328       DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);      \
01329       DEBUG_STATEMENT (num_regs_pushed++);              \
01330                                     \
01331       DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);     \
01332       PUSH_FAILURE_POINTER (regstart[this_reg]);            \
01333                                     \
01334       DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);     \
01335       PUSH_FAILURE_POINTER (regend[this_reg]);          \
01336                                     \
01337       DEBUG_PRINT2 ("    info: %p\n      ",             \
01338             reg_info[this_reg].word.pointer);       \
01339       DEBUG_PRINT2 (" match_null=%d",               \
01340             REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
01341       DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
01342       DEBUG_PRINT2 (" matched_something=%d",            \
01343             MATCHED_SOMETHING (reg_info[this_reg]));    \
01344       DEBUG_PRINT2 (" ever_matched=%d",             \
01345             EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
01346       DEBUG_PRINT1 ("\n");                      \
01347       PUSH_FAILURE_ELT (reg_info[this_reg].word);           \
01348     }                               \
01349                                     \
01350     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
01351     PUSH_FAILURE_INT (lowest_active_reg);               \
01352                                     \
01353     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
01354     PUSH_FAILURE_INT (highest_active_reg);              \
01355                                     \
01356     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);        \
01357     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);       \
01358     PUSH_FAILURE_POINTER (pattern_place);               \
01359                                     \
01360     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);      \
01361     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
01362                  size2);                \
01363     DEBUG_PRINT1 ("'\n");                       \
01364     PUSH_FAILURE_POINTER (string_place);                \
01365                                     \
01366     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);        \
01367     DEBUG_PUSH (failure_id);                        \
01368   } while (0)
01369 
01370 /* This is the number of items that are pushed and popped on the stack
01371    for each register.  */
01372 #define NUM_REG_ITEMS  3
01373 
01374 /* Individual items aside from the registers.  */
01375 #ifdef DEBUG
01376 # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
01377 #else
01378 # define NUM_NONREG_ITEMS 4
01379 #endif
01380 
01381 /* We push at most this many items on the stack.  */
01382 /* We used to use (num_regs - 1), which is the number of registers
01383    this regexp will save; but that was changed to 5
01384    to avoid stack overflow for a regexp with lots of parens.  */
01385 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01386 
01387 /* We actually push this many items.  */
01388 #define NUM_FAILURE_ITEMS               \
01389   (((0                          \
01390      ? 0 : highest_active_reg - lowest_active_reg + 1)  \
01391     * NUM_REG_ITEMS)                    \
01392    + NUM_NONREG_ITEMS)
01393 
01394 /* How many items can still be added to the stack without overflowing it.  */
01395 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01396 
01397 
01398 /* Pops what PUSH_FAIL_STACK pushes.
01399 
01400    We restore into the parameters, all of which should be lvalues:
01401      STR -- the saved data position.
01402      PAT -- the saved pattern position.
01403      LOW_REG, HIGH_REG -- the highest and lowest active registers.
01404      REGSTART, REGEND -- arrays of string positions.
01405      REG_INFO -- array of information about each subexpression.
01406 
01407    Also assumes the variables `fail_stack' and (if debugging), `bufp',
01408    `pend', `string1', `size1', `string2', and `size2'.  */
01409 
01410 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01411 {                                   \
01412   DEBUG_STATEMENT (unsigned failure_id;)                \
01413   active_reg_t this_reg;                        \
01414   const unsigned char *string_temp;                 \
01415                                     \
01416   assert (!FAIL_STACK_EMPTY ());                    \
01417                                     \
01418   /* Remove failure points and point to how many regs pushed.  */   \
01419   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                \
01420   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
01421   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size); \
01422                                     \
01423   assert (fail_stack.avail >= NUM_NONREG_ITEMS);            \
01424                                     \
01425   DEBUG_POP (&failure_id);                      \
01426   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);      \
01427                                     \
01428   /* If the saved string location is NULL, it came from an      \
01429      on_failure_keep_string_jump opcode, and we want to throw away the  \
01430      saved NULL, thus retaining our current position in the string.  */ \
01431   string_temp = POP_FAILURE_POINTER ();                 \
01432   if (string_temp != NULL)                      \
01433     str = (const char *) string_temp;                   \
01434                                     \
01435   DEBUG_PRINT2 ("  Popping string %p: `", str);             \
01436   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);  \
01437   DEBUG_PRINT1 ("'\n");                         \
01438                                     \
01439   pat = (unsigned char *) POP_FAILURE_POINTER ();           \
01440   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);            \
01441   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);           \
01442                                     \
01443   /* Restore register info.  */                     \
01444   high_reg = (active_reg_t) POP_FAILURE_INT ();             \
01445   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);      \
01446                                     \
01447   low_reg = (active_reg_t) POP_FAILURE_INT ();              \
01448   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);       \
01449                                     \
01450   if (1)                                \
01451     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)      \
01452       {                                 \
01453     DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);      \
01454                                     \
01455     reg_info[this_reg].word = POP_FAILURE_ELT ();           \
01456     DEBUG_PRINT2 ("      info: %p\n",               \
01457               reg_info[this_reg].word.pointer);         \
01458                                     \
01459     regend[this_reg] = (const char *) POP_FAILURE_POINTER ();   \
01460     DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);     \
01461                                     \
01462     regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01463     DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);     \
01464       }                                 \
01465   else                                  \
01466     {                                   \
01467       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01468     {                               \
01469       reg_info[this_reg].word.integer = 0;              \
01470       regend[this_reg] = 0;                     \
01471       regstart[this_reg] = 0;                   \
01472     }                               \
01473       highest_active_reg = high_reg;                    \
01474     }                                   \
01475                                     \
01476   set_regs_matched_done = 0;                        \
01477   DEBUG_STATEMENT (nfailure_points_popped++);               \
01478 } /* POP_FAILURE_POINT */
01479 
01480 
01481 
01482 /* Structure for per-register (a.k.a. per-group) information.
01483    Other register information, such as the
01484    starting and ending positions (which are addresses), and the list of
01485    inner groups (which is a bits list) are maintained in separate
01486    variables.
01487 
01488    We are making a (strictly speaking) nonportable assumption here: that
01489    the compiler will pack our bit fields into something that fits into
01490    the type of `word', i.e., is something that fits into one item on the
01491    failure stack.  */
01492 
01493 
01494 /* Declarations and macros for re_match_2.  */
01495 
01496 typedef union
01497 {
01498   fail_stack_elt_t word;
01499   struct
01500   {
01501       /* This field is one if this group can match the empty string,
01502          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
01503 #define MATCH_NULL_UNSET_VALUE 3
01504     unsigned match_null_string_p : 2;
01505     unsigned is_active : 1;
01506     unsigned matched_something : 1;
01507     unsigned ever_matched_something : 1;
01508   } bits;
01509 } register_info_type;
01510 
01511 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
01512 #define IS_ACTIVE(R)  ((R).bits.is_active)
01513 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
01514 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
01515 
01516 
01517 /* Call this when have matched a real character; it sets `matched' flags
01518    for the subexpressions which we are currently inside.  Also records
01519    that those subexprs have matched.  */
01520 #define SET_REGS_MATCHED()                      \
01521   do                                    \
01522     {                                   \
01523       if (!set_regs_matched_done)                   \
01524     {                               \
01525       active_reg_t r;                       \
01526       set_regs_matched_done = 1;                    \
01527       for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01528         {                               \
01529           MATCHED_SOMETHING (reg_info[r])               \
01530         = EVER_MATCHED_SOMETHING (reg_info[r])          \
01531         = 1;                            \
01532         }                               \
01533     }                               \
01534     }                                   \
01535   while (0)
01536 
01537 /* Registers are set to a sentinel when they haven't yet matched.  */
01538 static char reg_unset_dummy;
01539 #define REG_UNSET_VALUE (&reg_unset_dummy)
01540 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01541 
01542 /* Subroutine declarations and macros for regex_compile.  */
01543 
01544 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
01545                           reg_syntax_t syntax,
01546                           struct re_pattern_buffer *bufp));
01547 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
01548 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01549                  int arg1, int arg2));
01550 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01551                   int arg, unsigned char *end));
01552 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01553                   int arg1, int arg2, unsigned char *end));
01554 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
01555                        reg_syntax_t syntax));
01556 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
01557                        reg_syntax_t syntax));
01558 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
01559                           const char *pend,
01560                           char *translate,
01561                           reg_syntax_t syntax,
01562                           unsigned char *b));
01563 
01564 /* Fetch the next character in the uncompiled pattern---translating it
01565    if necessary.  Also cast from a signed character in the constant
01566    string passed to us by the user to an unsigned char that we can use
01567    as an array index (in, e.g., `translate').  */
01568 #ifndef PATFETCH
01569 # define PATFETCH(c)                            \
01570   do {if (p == pend) return REG_EEND;                   \
01571     c = (unsigned char) *p++;                       \
01572     if (translate) c = (unsigned char) translate[c];            \
01573   } while (0)
01574 #endif
01575 
01576 /* Fetch the next character in the uncompiled pattern, with no
01577    translation.  */
01578 #define PATFETCH_RAW(c)                         \
01579   do {if (p == pend) return REG_EEND;                   \
01580     c = (unsigned char) *p++;                       \
01581   } while (0)
01582 
01583 /* Go backwards one character in the pattern.  */
01584 #define PATUNFETCH p--
01585 
01586 
01587 /* If `translate' is non-null, return translate[D], else just D.  We
01588    cast the subscript to translate because some data is declared as
01589    `char *', to avoid warnings when a string constant is passed.  But
01590    when we use a character as a subscript we must make it unsigned.  */
01591 #ifndef TRANSLATE
01592 # define TRANSLATE(d) \
01593   (translate ? (char) translate[(unsigned char) (d)] : (d))
01594 #endif
01595 
01596 
01597 /* Macros for outputting the compiled pattern into `buffer'.  */
01598 
01599 /* If the buffer isn't allocated when it comes in, use this.  */
01600 #define INIT_BUF_SIZE  32
01601 
01602 /* Make sure we have at least N more bytes of space in buffer.  */
01603 #define GET_BUFFER_SPACE(n)                     \
01604     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
01605       EXTEND_BUFFER ()
01606 
01607 /* Make sure we have one more byte of buffer space and then add C to it.  */
01608 #define BUF_PUSH(c)                         \
01609   do {                                  \
01610     GET_BUFFER_SPACE (1);                       \
01611     *b++ = (unsigned char) (c);                     \
01612   } while (0)
01613 
01614 
01615 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
01616 #define BUF_PUSH_2(c1, c2)                      \
01617   do {                                  \
01618     GET_BUFFER_SPACE (2);                       \
01619     *b++ = (unsigned char) (c1);                    \
01620     *b++ = (unsigned char) (c2);                    \
01621   } while (0)
01622 
01623 
01624 /* As with BUF_PUSH_2, except for three bytes.  */
01625 #define BUF_PUSH_3(c1, c2, c3)                      \
01626   do {                                  \
01627     GET_BUFFER_SPACE (3);                       \
01628     *b++ = (unsigned char) (c1);                    \
01629     *b++ = (unsigned char) (c2);                    \
01630     *b++ = (unsigned char) (c3);                    \
01631   } while (0)
01632 
01633 
01634 /* Store a jump with opcode OP at LOC to location TO.  We store a
01635    relative address offset by the three bytes the jump itself occupies.  */
01636 #define STORE_JUMP(op, loc, to) \
01637   store_op1 (op, loc, (int) ((to) - (loc) - 3))
01638 
01639 /* Likewise, for a two-argument jump.  */
01640 #define STORE_JUMP2(op, loc, to, arg) \
01641   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
01642 
01643 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
01644 #define INSERT_JUMP(op, loc, to) \
01645   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
01646 
01647 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
01648 #define INSERT_JUMP2(op, loc, to, arg) \
01649   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
01650 
01651 
01652 /* This is not an arbitrary limit: the arguments which represent offsets
01653    into the pattern are two bytes long.  So if 2^16 bytes turns out to
01654    be too small, many things would have to change.  */
01655 /* Any other compiler which, like MSC, has allocation limit below 2^16
01656    bytes will have to use approach similar to what was done below for
01657    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
01658    reallocating to 0 bytes.  Such thing is not going to work too well.
01659    You have been warned!!  */
01660 #if defined _MSC_VER  && !defined WIN32
01661 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
01662    The REALLOC define eliminates a flurry of conversion warnings,
01663    but is not required. */
01664 # define MAX_BUF_SIZE  65500L
01665 # define REALLOC(p,s) realloc ((p), (size_t) (s))
01666 #else
01667 # define MAX_BUF_SIZE (1L << 16)
01668 # define REALLOC(p,s) realloc ((p), (s))
01669 #endif
01670 
01671 /* Extend the buffer by twice its current size via realloc and
01672    reset the pointers that pointed into the old block to point to the
01673    correct places in the new one.  If extending the buffer results in it
01674    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
01675 #define EXTEND_BUFFER()                         \
01676   do {                                  \
01677     unsigned char *old_buffer = bufp->buffer;               \
01678     if (bufp->allocated == MAX_BUF_SIZE)                \
01679       return REG_ESIZE;                         \
01680     bufp->allocated <<= 1;                      \
01681     if (bufp->allocated > MAX_BUF_SIZE)                 \
01682       bufp->allocated = MAX_BUF_SIZE;                   \
01683     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
01684     if (bufp->buffer == NULL)                       \
01685       return REG_ESPACE;                        \
01686     /* If the buffer moved, move all the pointers into it.  */      \
01687     if (old_buffer != bufp->buffer)                 \
01688       {                                 \
01689         b = (b - old_buffer) + bufp->buffer;                \
01690         begalt = (begalt - old_buffer) + bufp->buffer;          \
01691         if (fixup_alt_jump)                     \
01692           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01693         if (laststart)                          \
01694           laststart = (laststart - old_buffer) + bufp->buffer;      \
01695         if (pending_exact)                      \
01696           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
01697       }                                 \
01698   } while (0)
01699 
01700 
01701 /* Since we have one byte reserved for the register number argument to
01702    {start,stop}_memory, the maximum number of groups we can report
01703    things about is what fits in that byte.  */
01704 #define MAX_REGNUM 255
01705 
01706 /* But patterns can have more than `MAX_REGNUM' registers.  We just
01707    ignore the excess.  */
01708 typedef unsigned regnum_t;
01709 
01710 
01711 /* Macros for the compile stack.  */
01712 
01713 /* Since offsets can go either forwards or backwards, this type needs to
01714    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
01715 /* int may be not enough when sizeof(int) == 2.  */
01716 typedef long pattern_offset_t;
01717 
01718 typedef struct
01719 {
01720   pattern_offset_t begalt_offset;
01721   pattern_offset_t fixup_alt_jump;
01722   pattern_offset_t inner_group_offset;
01723   pattern_offset_t laststart_offset;
01724   regnum_t regnum;
01725 } compile_stack_elt_t;
01726 
01727 
01728 typedef struct
01729 {
01730   compile_stack_elt_t *stack;
01731   unsigned size;
01732   unsigned avail;           /* Offset of next open position.  */
01733 } compile_stack_type;
01734 
01735 
01736 #define INIT_COMPILE_STACK_SIZE 32
01737 
01738 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
01739 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
01740 
01741 /* The next available element.  */
01742 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01743 
01744 
01745 /* Set the bit for character C in a list.  */
01746 #define SET_LIST_BIT(c)                               \
01747   (b[((unsigned char) (c)) / BYTEWIDTH]               \
01748    |= 1 << (((unsigned char) c) % BYTEWIDTH))
01749 
01750 
01751 /* Get the next unsigned number in the uncompiled pattern.  */
01752 #define GET_UNSIGNED_NUMBER(num)                    \
01753   { if (p != pend)                          \
01754      {                                  \
01755        PATFETCH (c);                            \
01756        while (ISDIGIT (c))                      \
01757          {                              \
01758            if (num < 0)                         \
01759               num = 0;                          \
01760            num = num * 10 + c - '0';                    \
01761            if (p == pend)                       \
01762               break;                            \
01763            PATFETCH (c);                        \
01764          }                              \
01765        }                                \
01766     }
01767 
01768 #if defined _LIBC || WIDE_CHAR_SUPPORT
01769 /* The GNU C library provides support for user-defined character classes
01770    and the functions from ISO C amendement 1.  */
01771 # ifdef CHARCLASS_NAME_MAX
01772 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
01773 # else
01774 /* This shouldn't happen but some implementation might still have this
01775    problem.  Use a reasonable default value.  */
01776 #  define CHAR_CLASS_MAX_LENGTH 256
01777 # endif
01778 
01779 # ifdef _LIBC
01780 #  define IS_CHAR_CLASS(string) __wctype (string)
01781 # else
01782 #  define IS_CHAR_CLASS(string) wctype (string)
01783 # endif
01784 #else
01785 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
01786 
01787 # define IS_CHAR_CLASS(string)                      \
01788    (STREQ (string, "alpha") || STREQ (string, "upper")          \
01789     || STREQ (string, "lower") || STREQ (string, "digit")       \
01790     || STREQ (string, "alnum") || STREQ (string, "xdigit")      \
01791     || STREQ (string, "space") || STREQ (string, "print")       \
01792     || STREQ (string, "punct") || STREQ (string, "graph")       \
01793     || STREQ (string, "cntrl") || STREQ (string, "blank"))
01794 #endif
01795 
01796 #ifndef MATCH_MAY_ALLOCATE
01797 
01798 /* If we cannot allocate large objects within re_match_2_internal,
01799    we make the fail stack and register vectors global.
01800    The fail stack, we grow to the maximum size when a regexp
01801    is compiled.
01802    The register vectors, we adjust in size each time we
01803    compile a regexp, according to the number of registers it needs.  */
01804 
01805 static fail_stack_type fail_stack;
01806 
01807 /* Size with which the following vectors are currently allocated.
01808    That is so we can make them bigger as needed,
01809    but never make them smaller.  */
01810 static int regs_allocated_size;
01811 
01812 static const char **     regstart, **     regend;
01813 static const char ** old_regstart, ** old_regend;
01814 static const char **best_regstart, **best_regend;
01815 static register_info_type *reg_info;
01816 static const char **reg_dummy;
01817 static register_info_type *reg_info_dummy;
01818 
01819 /* Make the register vectors big enough for NUM_REGS registers,
01820    but don't make them smaller.  */
01821 
01822 static
01823 regex_grow_registers (num_regs)
01824      int num_regs;
01825 {
01826   if (num_regs > regs_allocated_size)
01827     {
01828       RETALLOC_IF (regstart,     num_regs, const char *);
01829       RETALLOC_IF (regend,   num_regs, const char *);
01830       RETALLOC_IF (old_regstart, num_regs, const char *);
01831       RETALLOC_IF (old_regend,   num_regs, const char *);
01832       RETALLOC_IF (best_regstart, num_regs, const char *);
01833       RETALLOC_IF (best_regend,  num_regs, const char *);
01834       RETALLOC_IF (reg_info,     num_regs, register_info_type);
01835       RETALLOC_IF (reg_dummy,    num_regs, const char *);
01836       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01837 
01838       regs_allocated_size = num_regs;
01839     }
01840 }
01841 
01842 #endif /* not MATCH_MAY_ALLOCATE */
01843 
01844 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
01845                          compile_stack,
01846                          regnum_t regnum));
01847 
01848 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
01849    Returns one of error codes defined in `regex.h', or zero for success.
01850 
01851    Assumes the `allocated' (and perhaps `buffer') and `translate'
01852    fields are set in BUFP on entry.
01853 
01854    If it succeeds, results are put in BUFP (if it returns an error, the
01855    contents of BUFP are undefined):
01856      `buffer' is the compiled pattern;
01857      `syntax' is set to SYNTAX;
01858      `used' is set to the length of the compiled pattern;
01859      `fastmap_accurate' is zero;
01860      `re_nsub' is the number of subexpressions in PATTERN;
01861      `not_bol' and `not_eol' are zero;
01862 
01863    The `fastmap' and `newline_anchor' fields are neither
01864    examined nor set.  */
01865 
01866 /* Return, freeing storage we allocated.  */
01867 #define FREE_STACK_RETURN(value)        \
01868   return (free (compile_stack.stack), value)
01869 
01870 static reg_errcode_t
01871 regex_compile (pattern, size, syntax, bufp)
01872      const char *pattern;
01873      size_t size;
01874      reg_syntax_t syntax;
01875      struct re_pattern_buffer *bufp;
01876 {
01877   /* We fetch characters from PATTERN here.  Even though PATTERN is
01878      `char *' (i.e., signed), we declare these variables as unsigned, so
01879      they can be reliably used as array indices.  */
01880   register unsigned char c, c1;
01881 
01882   /* A random temporary spot in PATTERN.  */
01883   const char *p1;
01884 
01885   /* Points to the end of the buffer, where we should append.  */
01886   register unsigned char *b;
01887 
01888   /* Keeps track of unclosed groups.  */
01889   compile_stack_type compile_stack;
01890 
01891   /* Points to the current (ending) position in the pattern.  */
01892   const char *p = pattern;
01893   const char *pend = pattern + size;
01894 
01895   /* How to translate the characters in the pattern.  */
01896   RE_TRANSLATE_TYPE translate = bufp->translate;
01897 
01898   /* Address of the count-byte of the most recently inserted `exactn'
01899      command.  This makes it possible to tell if a new exact-match
01900      character can be added to that command or if the character requires
01901      a new `exactn' command.  */
01902   unsigned char *pending_exact = 0;
01903 
01904   /* Address of start of the most recently finished expression.
01905      This tells, e.g., postfix * where to find the start of its
01906      operand.  Reset at the beginning of groups and alternatives.  */
01907   unsigned char *laststart = 0;
01908 
01909   /* Address of beginning of regexp, or inside of last group.  */
01910   unsigned char *begalt;
01911 
01912   /* Place in the uncompiled pattern (i.e., the {) to
01913      which to go back if the interval is invalid.  */
01914   const char *beg_interval;
01915 
01916   /* Address of the place where a forward jump should go to the end of
01917      the containing expression.  Each alternative of an `or' -- except the
01918      last -- ends with a forward jump of this sort.  */
01919   unsigned char *fixup_alt_jump = 0;
01920 
01921   /* Counts open-groups as they are encountered.  Remembered for the
01922      matching close-group on the compile stack, so the same register
01923      number is put in the stop_memory as the start_memory.  */
01924   regnum_t regnum = 0;
01925 
01926 #ifdef DEBUG
01927   DEBUG_PRINT1 ("\nCompiling pattern: ");
01928   if (debug)
01929     {
01930       unsigned debug_count;
01931 
01932       for (debug_count = 0; debug_count < size; debug_count++)
01933         putchar (pattern[debug_count]);
01934       putchar ('\n');
01935     }
01936 #endif /* DEBUG */
01937 
01938   /* Initialize the compile stack.  */
01939   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01940   if (compile_stack.stack == NULL)
01941     return REG_ESPACE;
01942 
01943   compile_stack.size = INIT_COMPILE_STACK_SIZE;
01944   compile_stack.avail = 0;
01945 
01946   /* Initialize the pattern buffer.  */
01947   bufp->syntax = syntax;
01948   bufp->fastmap_accurate = 0;
01949   bufp->not_bol = bufp->not_eol = 0;
01950 
01951   /* Set `used' to zero, so that if we return an error, the pattern
01952      printer (for debugging) will think there's no pattern.  We reset it
01953      at the end.  */
01954   bufp->used = 0;
01955 
01956   /* Always count groups, whether or not bufp->no_sub is set.  */
01957   bufp->re_nsub = 0;
01958 
01959 #if !defined emacs && !defined SYNTAX_TABLE
01960   /* Initialize the syntax table.  */
01961    init_syntax_once ();
01962 #endif
01963 
01964   if (bufp->allocated == 0)
01965     {
01966       if (bufp->buffer)
01967     { /* If zero allocated, but buffer is non-null, try to realloc
01968              enough space.  This loses if buffer's address is bogus, but
01969              that is the user's responsibility.  */
01970           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01971         }
01972       else
01973         { /* Caller did not allocate a buffer.  Do it for them.  */
01974           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01975         }
01976       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01977 
01978       bufp->allocated = INIT_BUF_SIZE;
01979     }
01980 
01981   begalt = b = bufp->buffer;
01982 
01983   /* Loop through the uncompiled pattern until we're at the end.  */
01984   while (p != pend)
01985     {
01986       PATFETCH (c);
01987 
01988       switch (c)
01989         {
01990         case '^':
01991           {
01992             if (   /* If at start of pattern, it's an operator.  */
01993                    p == pattern + 1
01994                    /* If context independent, it's an operator.  */
01995                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01996                    /* Otherwise, depends on what's come before.  */
01997                 || at_begline_loc_p (pattern, p, syntax))
01998               BUF_PUSH (begline);
01999             else
02000               goto normal_char;
02001           }
02002           break;
02003 
02004 
02005         case '$':
02006           {
02007             if (   /* If at end of pattern, it's an operator.  */
02008                    p == pend
02009                    /* If context independent, it's an operator.  */
02010                 || syntax & RE_CONTEXT_INDEP_ANCHORS
02011                    /* Otherwise, depends on what's next.  */
02012                 || at_endline_loc_p (p, pend, syntax))
02013                BUF_PUSH (endline);
02014              else
02015                goto normal_char;
02016            }
02017            break;
02018 
02019 
02020     case '+':
02021         case '?':
02022           if ((syntax & RE_BK_PLUS_QM)
02023               || (syntax & RE_LIMITED_OPS))
02024             goto normal_char;
02025         handle_plus:
02026         case '*':
02027           /* If there is no previous pattern... */
02028           if (!laststart)
02029             {
02030               if (syntax & RE_CONTEXT_INVALID_OPS)
02031                 FREE_STACK_RETURN (REG_BADRPT);
02032               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
02033                 goto normal_char;
02034             }
02035 
02036           {
02037             /* Are we optimizing this jump?  */
02038             boolean keep_string_p = false;
02039 
02040             /* 1 means zero (many) matches is allowed.  */
02041             char zero_times_ok = 0, many_times_ok = 0;
02042 
02043             /* If there is a sequence of repetition chars, collapse it
02044                down to just one (the right one).  We can't combine
02045                interval operators with these because of, e.g., `a{2}*',
02046                which should only match an even number of `a's.  */
02047 
02048             for (;;)
02049               {
02050                 zero_times_ok |= c != '+';
02051                 many_times_ok |= c != '?';
02052 
02053                 if (p == pend)
02054                   break;
02055 
02056                 PATFETCH (c);
02057 
02058                 if (c == '*'
02059                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
02060                   ;
02061 
02062                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
02063                   {
02064                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02065 
02066                     PATFETCH (c1);
02067                     if (!(c1 == '+' || c1 == '?'))
02068                       {
02069                         PATUNFETCH;
02070                         PATUNFETCH;
02071                         break;
02072                       }
02073 
02074                     c = c1;
02075                   }
02076                 else
02077                   {
02078                     PATUNFETCH;
02079                     break;
02080                   }
02081 
02082                 /* If we get here, we found another repeat character.  */
02083                }
02084 
02085             /* Star, etc. applied to an empty pattern is equivalent
02086                to an empty pattern.  */
02087             if (!laststart)
02088               break;
02089 
02090             /* Now we know whether or not zero matches is allowed
02091                and also whether or not two or more matches is allowed.  */
02092             if (many_times_ok)
02093               { /* More than one repetition is allowed, so put in at the
02094                    end a backward relative jump from `b' to before the next
02095                    jump we're going to put in below (which jumps from
02096                    laststart to after this jump).
02097 
02098                    But if we are at the `*' in the exact sequence `.*\n',
02099                    insert an unconditional jump backwards to the .,
02100                    instead of the beginning of the loop.  This way we only
02101                    push a failure point once, instead of every time
02102                    through the loop.  */
02103                 assert (p - 1 > pattern);
02104 
02105                 /* Allocate the space for the jump.  */
02106                 GET_BUFFER_SPACE (3);
02107 
02108                 /* We know we are not at the first character of the pattern,
02109                    because laststart was nonzero.  And we've already
02110                    incremented `p', by the way, to be the character after
02111                    the `*'.  Do we have to do something analogous here
02112                    for null bytes, because of RE_DOT_NOT_NULL?  */
02113                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02114             && zero_times_ok
02115                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02116                     && !(syntax & RE_DOT_NEWLINE))
02117                   { /* We have .*\n.  */
02118                     STORE_JUMP (jump, b, laststart);
02119                     keep_string_p = true;
02120                   }
02121                 else
02122                   /* Anything else.  */
02123                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
02124 
02125                 /* We've added more stuff to the buffer.  */
02126                 b += 3;
02127               }
02128 
02129             /* On failure, jump from laststart to b + 3, which will be the
02130                end of the buffer after this jump is inserted.  */
02131             GET_BUFFER_SPACE (3);
02132             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02133                                        : on_failure_jump,
02134                          laststart, b + 3);
02135             pending_exact = 0;
02136             b += 3;
02137 
02138             if (!zero_times_ok)
02139               {
02140                 /* At least one repetition is required, so insert a
02141                    `dummy_failure_jump' before the initial
02142                    `on_failure_jump' instruction of the loop. This
02143                    effects a skip over that instruction the first time
02144                    we hit that loop.  */
02145                 GET_BUFFER_SPACE (3);
02146                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
02147                 b += 3;
02148               }
02149             }
02150       break;
02151 
02152 
02153     case '.':
02154           laststart = b;
02155           BUF_PUSH (anychar);
02156           break;
02157 
02158 
02159         case '[':
02160           {
02161             boolean had_char_class = false;
02162 
02163             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02164 
02165             /* Ensure that we have enough space to push a charset: the
02166                opcode, the length count, and the bitset; 34 bytes in all.  */
02167         GET_BUFFER_SPACE (34);
02168 
02169             laststart = b;
02170 
02171             /* We test `*p == '^' twice, instead of using an if
02172                statement, so we only need one BUF_PUSH.  */
02173             BUF_PUSH (*p == '^' ? charset_not : charset);
02174             if (*p == '^')
02175               p++;
02176 
02177             /* Remember the first position in the bracket expression.  */
02178             p1 = p;
02179 
02180             /* Push the number of bytes in the bitmap.  */
02181             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
02182 
02183             /* Clear the whole map.  */
02184             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
02185 
02186             /* charset_not matches newline according to a syntax bit.  */
02187             if ((re_opcode_t) b[-2] == charset_not
02188                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02189               SET_LIST_BIT ('\n');
02190 
02191             /* Read in characters and ranges, setting map bits.  */
02192             for (;;)
02193               {
02194                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02195 
02196                 PATFETCH (c);
02197 
02198                 /* \ might escape characters inside [...] and [^...].  */
02199                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02200                   {
02201                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02202 
02203                     PATFETCH (c1);
02204                     SET_LIST_BIT (c1);
02205                     continue;
02206                   }
02207 
02208                 /* Could be the end of the bracket expression.  If it's
02209                    not (i.e., when the bracket expression is `[]' so
02210                    far), the ']' character bit gets set way below.  */
02211                 if (c == ']' && p != p1 + 1)
02212                   break;
02213 
02214                 /* Look ahead to see if it's a range when the last thing
02215                    was a character class.  */
02216                 if (had_char_class && c == '-' && *p != ']')
02217                   FREE_STACK_RETURN (REG_ERANGE);
02218 
02219                 /* Look ahead to see if it's a range when the last thing
02220                    was a character: if this is a hyphen not at the
02221                    beginning or the end of a list, then it's the range
02222                    operator.  */
02223                 if (c == '-'
02224                     && !(p - 2 >= pattern && p[-2] == '[')
02225                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02226                     && *p != ']')
02227                   {
02228                     reg_errcode_t ret
02229                       = compile_range (&p, pend, translate, syntax, b);
02230                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02231                   }
02232 
02233                 else if (p[0] == '-' && p[1] != ']')
02234                   { /* This handles ranges made up of characters only.  */
02235                     reg_errcode_t ret;
02236 
02237             /* Move past the `-'.  */
02238                     PATFETCH (c1);
02239 
02240                     ret = compile_range (&p, pend, translate, syntax, b);
02241                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02242                   }
02243 
02244                 /* See if we're at the beginning of a possible character
02245                    class.  */
02246 
02247                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02248                   { /* Leave room for the null.  */
02249                     char str[CHAR_CLASS_MAX_LENGTH + 1];
02250 
02251                     PATFETCH (c);
02252                     c1 = 0;
02253 
02254                     /* If pattern is `[[:'.  */
02255                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02256 
02257                     for (;;)
02258                       {
02259                         PATFETCH (c);
02260                         if ((c == ':' && *p == ']') || p == pend
02261                             || c1 == CHAR_CLASS_MAX_LENGTH)
02262                           break;
02263                         str[c1++] = c;
02264                       }
02265                     str[c1] = '\0';
02266 
02267                     /* If isn't a word bracketed by `[:' and `:]':
02268                        undo the ending character, the letters, and leave
02269                        the leading `:' and `[' (but set bits for them).  */
02270                     if (c == ':' && *p == ']')
02271                       {
02272 #if defined _LIBC || WIDE_CHAR_SUPPORT
02273                         boolean is_lower = STREQ (str, "lower");
02274                         boolean is_upper = STREQ (str, "upper");
02275             wctype_t wt;
02276                         int ch;
02277 
02278             wt = IS_CHAR_CLASS (str);
02279             if (wt == 0)
02280               FREE_STACK_RETURN (REG_ECTYPE);
02281 
02282                         /* Throw away the ] at the end of the character
02283                            class.  */
02284                         PATFETCH (c);
02285 
02286                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02287 
02288                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
02289               {
02290 # ifdef _LIBC
02291                 if (__iswctype (__btowc (ch), wt))
02292                   SET_LIST_BIT (ch);
02293 # else
02294                 if (iswctype (btowc (ch), wt))
02295                   SET_LIST_BIT (ch);
02296 # endif
02297 
02298                 if (translate && (is_upper || is_lower)
02299                 && (ISUPPER (ch) || ISLOWER (ch)))
02300                   SET_LIST_BIT (ch);
02301               }
02302 
02303                         had_char_class = true;
02304 #else
02305                         int ch;
02306                         boolean is_alnum = STREQ (str, "alnum");
02307                         boolean is_alpha = STREQ (str, "alpha");
02308                         boolean is_blank = STREQ (str, "blank");
02309                         boolean is_cntrl = STREQ (str, "cntrl");
02310                         boolean is_digit = STREQ (str, "digit");
02311                         boolean is_graph = STREQ (str, "graph");
02312                         boolean is_lower = STREQ (str, "lower");
02313                         boolean is_print = STREQ (str, "print");
02314                         boolean is_punct = STREQ (str, "punct");
02315                         boolean is_space = STREQ (str, "space");
02316                         boolean is_upper = STREQ (str, "upper");
02317                         boolean is_xdigit = STREQ (str, "xdigit");
02318 
02319                         if (!IS_CHAR_CLASS (str))
02320               FREE_STACK_RETURN (REG_ECTYPE);
02321 
02322                         /* Throw away the ] at the end of the character
02323                            class.  */
02324                         PATFETCH (c);
02325 
02326                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02327 
02328                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02329                           {
02330                 /* This was split into 3 if's to
02331                    avoid an arbitrary limit in some compiler.  */
02332                             if (   (is_alnum  && ISALNUM (ch))
02333                                 || (is_alpha  && ISALPHA (ch))
02334                                 || (is_blank  && ISBLANK (ch))
02335                                 || (is_cntrl  && ISCNTRL (ch)))
02336                   SET_LIST_BIT (ch);
02337                 if (   (is_digit  && ISDIGIT (ch))
02338                                 || (is_graph  && ISGRAPH (ch))
02339                                 || (is_lower  && ISLOWER (ch))
02340                                 || (is_print  && ISPRINT (ch)))
02341                   SET_LIST_BIT (ch);
02342                 if (   (is_punct  && ISPUNCT (ch))
02343                                 || (is_space  && ISSPACE (ch))
02344                                 || (is_upper  && ISUPPER (ch))
02345                                 || (is_xdigit && ISXDIGIT (ch)))
02346                   SET_LIST_BIT (ch);
02347                 if (   translate && (is_upper || is_lower)
02348                 && (ISUPPER (ch) || ISLOWER (ch)))
02349                   SET_LIST_BIT (ch);
02350                           }
02351                         had_char_class = true;
02352 #endif  /* libc || wctype.h */
02353                       }
02354                     else
02355                       {
02356                         c1++;
02357                         while (c1--)
02358                           PATUNFETCH;
02359                         SET_LIST_BIT ('[');
02360                         SET_LIST_BIT (':');
02361                         had_char_class = false;
02362                       }
02363                   }
02364                 else
02365                   {
02366                     had_char_class = false;
02367                     SET_LIST_BIT (c);
02368                   }
02369               }
02370 
02371             /* Discard any (non)matching list bytes that are all 0 at the
02372                end of the map.  Decrease the map-length byte too.  */
02373             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02374               b[-1]--;
02375             b += b[-1];
02376           }
02377           break;
02378 
02379 
02380     case '(':
02381           if (syntax & RE_NO_BK_PARENS)
02382             goto handle_open;
02383           else
02384             goto normal_char;
02385 
02386 
02387         case ')':
02388           if (syntax & RE_NO_BK_PARENS)
02389             goto handle_close;
02390           else
02391             goto normal_char;
02392 
02393 
02394         case '\n':
02395           if (syntax & RE_NEWLINE_ALT)
02396             goto handle_alt;
02397           else
02398             goto normal_char;
02399 
02400 
02401     case '|':
02402           if (syntax & RE_NO_BK_VBAR)
02403             goto handle_alt;
02404           else
02405             goto normal_char;
02406 
02407 
02408         case '{':
02409            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02410              goto handle_interval;
02411            else
02412              goto normal_char;
02413 
02414 
02415         case '\\':
02416           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02417 
02418           /* Do not translate the character after the \, so that we can
02419              distinguish, e.g., \B from \b, even if we normally would
02420              translate, e.g., B to b.  */
02421           PATFETCH_RAW (c);
02422 
02423           switch (c)
02424             {
02425             case '(':
02426               if (syntax & RE_NO_BK_PARENS)
02427                 goto normal_backslash;
02428 
02429             handle_open:
02430               bufp->re_nsub++;
02431               regnum++;
02432 
02433               if (COMPILE_STACK_FULL)
02434                 {
02435                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
02436                             compile_stack_elt_t);
02437                   if (compile_stack.stack == NULL) return REG_ESPACE;
02438 
02439                   compile_stack.size <<= 1;
02440                 }
02441 
02442               /* These are the values to restore when we hit end of this
02443                  group.  They are all relative offsets, so that if the
02444                  whole pattern moves because of realloc, they will still
02445                  be valid.  */
02446               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02447               COMPILE_STACK_TOP.fixup_alt_jump
02448                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02449               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02450               COMPILE_STACK_TOP.regnum = regnum;
02451 
02452               /* We will eventually replace the 0 with the number of
02453                  groups inner to this one.  But do not push a
02454                  start_memory for groups beyond the last one we can
02455                  represent in the compiled pattern.  */
02456               if (regnum <= MAX_REGNUM)
02457                 {
02458                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02459                   BUF_PUSH_3 (start_memory, regnum, 0);
02460                 }
02461 
02462               compile_stack.avail++;
02463 
02464               fixup_alt_jump = 0;
02465               laststart = 0;
02466               begalt = b;
02467           /* If we've reached MAX_REGNUM groups, then this open
02468          won't actually generate any code, so we'll have to
02469          clear pending_exact explicitly.  */
02470           pending_exact = 0;
02471               break;
02472 
02473 
02474             case ')':
02475               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02476 
02477               if (COMPILE_STACK_EMPTY)
02478         {
02479           if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02480             goto normal_backslash;
02481           else
02482             FREE_STACK_RETURN (REG_ERPAREN);
02483         }
02484 
02485             handle_close:
02486               if (fixup_alt_jump)
02487                 { /* Push a dummy failure point at the end of the
02488                      alternative for a possible future
02489                      `pop_failure_jump' to pop.  See comments at
02490                      `push_dummy_failure' in `re_match_2'.  */
02491                   BUF_PUSH (push_dummy_failure);
02492 
02493                   /* We allocated space for this jump when we assigned
02494                      to `fixup_alt_jump', in the `handle_alt' case below.  */
02495                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02496                 }
02497 
02498               /* See similar code for backslashed left paren above.  */
02499               if (COMPILE_STACK_EMPTY)
02500         {
02501           if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02502             goto normal_char;
02503           else
02504             FREE_STACK_RETURN (REG_ERPAREN);
02505         }
02506 
02507               /* Since we just checked for an empty stack above, this
02508                  ``can't happen''.  */
02509               assert (compile_stack.avail != 0);
02510               {
02511                 /* We don't just want to restore into `regnum', because
02512                    later groups should continue to be numbered higher,
02513                    as in `(ab)c(de)' -- the second group is #2.  */
02514                 regnum_t this_group_regnum;
02515 
02516                 compile_stack.avail--;
02517                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02518                 fixup_alt_jump
02519                   = COMPILE_STACK_TOP.fixup_alt_jump
02520                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02521                     : 0;
02522                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02523                 this_group_regnum = COMPILE_STACK_TOP.regnum;
02524         /* If we've reached MAX_REGNUM groups, then this open
02525            won't actually generate any code, so we'll have to
02526            clear pending_exact explicitly.  */
02527         pending_exact = 0;
02528 
02529                 /* We're at the end of the group, so now we know how many
02530                    groups were inside this one.  */
02531                 if (this_group_regnum <= MAX_REGNUM)
02532                   {
02533                     unsigned char *inner_group_loc
02534                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02535 
02536                     *inner_group_loc = regnum - this_group_regnum;
02537                     BUF_PUSH_3 (stop_memory, this_group_regnum,
02538                                 regnum - this_group_regnum);
02539                   }
02540               }
02541               break;
02542 
02543 
02544             case '|':                   /* `\|'.  */
02545               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02546                 goto normal_backslash;
02547             handle_alt:
02548               if (syntax & RE_LIMITED_OPS)
02549                 goto normal_char;
02550 
02551               /* Insert before the previous alternative a jump which
02552                  jumps to this alternative if the former fails.  */
02553               GET_BUFFER_SPACE (3);
02554               INSERT_JUMP (on_failure_jump, begalt, b + 6);
02555               pending_exact = 0;
02556               b += 3;
02557 
02558               /* The alternative before this one has a jump after it
02559                  which gets executed if it gets matched.  Adjust that
02560                  jump so it will jump to this alternative's analogous
02561                  jump (put in below, which in turn will jump to the next
02562                  (if any) alternative's such jump, etc.).  The last such
02563                  jump jumps to the correct final destination.  A picture:
02564                           _____ _____
02565                           |   | |   |
02566                           |   v |   v
02567                          a | b   | c
02568 
02569                  If we are at `b', then fixup_alt_jump right now points to a
02570                  three-byte space after `a'.  We'll put in the jump, set
02571                  fixup_alt_jump to right after `b', and leave behind three
02572                  bytes which we'll fill in when we get to after `c'.  */
02573 
02574               if (fixup_alt_jump)
02575                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02576 
02577               /* Mark and leave space for a jump after this alternative,
02578                  to be filled in later either by next alternative or
02579                  when know we're at the end of a series of alternatives.  */
02580               fixup_alt_jump = b;
02581               GET_BUFFER_SPACE (3);
02582               b += 3;
02583 
02584               laststart = 0;
02585               begalt = b;
02586               break;
02587 
02588 
02589             case '{':
02590               /* If \{ is a literal.  */
02591               if (!(syntax & RE_INTERVALS)
02592                      /* If we're at `\{' and it's not the open-interval
02593                         operator.  */
02594                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02595                   || (p - 2 == pattern  &&  p == pend))
02596                 goto normal_backslash;
02597 
02598             handle_interval:
02599               {
02600                 /* If got here, then the syntax allows intervals.  */
02601 
02602                 /* At least (most) this many matches must be made.  */
02603                 int lower_bound = -1, upper_bound = -1;
02604 
02605                 beg_interval = p - 1;
02606 
02607                 if (p == pend)
02608                   {
02609                     if (syntax & RE_NO_BK_BRACES)
02610                       goto unfetch_interval;
02611                     else
02612                       FREE_STACK_RETURN (REG_EBRACE);
02613                   }
02614 
02615                 GET_UNSIGNED_NUMBER (lower_bound);
02616 
02617                 if (c == ',')
02618                   {
02619                     GET_UNSIGNED_NUMBER (upper_bound);
02620                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02621                   }
02622                 else
02623                   /* Interval such as `{1}' => match exactly once. */
02624                   upper_bound = lower_bound;
02625 
02626                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02627                     || lower_bound > upper_bound)
02628                   {
02629                     if (syntax & RE_NO_BK_BRACES)
02630                       goto unfetch_interval;
02631                     else
02632                       FREE_STACK_RETURN (REG_BADBR);
02633                   }
02634 
02635                 if (!(syntax & RE_NO_BK_BRACES))
02636                   {
02637                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02638 
02639                     PATFETCH (c);
02640                   }
02641 
02642                 if (c != '}')
02643                   {
02644                     if (syntax & RE_NO_BK_BRACES)
02645                       goto unfetch_interval;
02646                     else
02647                       FREE_STACK_RETURN (REG_BADBR);
02648                   }
02649 
02650                 /* We just parsed a valid interval.  */
02651 
02652                 /* If it's invalid to have no preceding re.  */
02653                 if (!laststart)
02654                   {
02655                     if (syntax & RE_CONTEXT_INVALID_OPS)
02656                       FREE_STACK_RETURN (REG_BADRPT);
02657                     else if (syntax & RE_CONTEXT_INDEP_OPS)
02658                       laststart = b;
02659                     else
02660                       goto unfetch_interval;
02661                   }
02662 
02663                 /* If the upper bound is zero, don't want to succeed at
02664                    all; jump from `laststart' to `b + 3', which will be
02665                    the end of the buffer after we insert the jump.  */
02666                  if (upper_bound == 0)
02667                    {
02668                      GET_BUFFER_SPACE (3);
02669                      INSERT_JUMP (jump, laststart, b + 3);
02670                      b += 3;
02671                    }
02672 
02673                  /* Otherwise, we have a nontrivial interval.  When
02674                     we're all done, the pattern will look like:
02675                       set_number_at <jump count> <upper bound>
02676                       set_number_at <succeed_n count> <lower bound>
02677                       succeed_n <after jump addr> <succeed_n count>
02678                       <body of loop>
02679                       jump_n <succeed_n addr> <jump count>
02680                     (The upper bound and `jump_n' are omitted if
02681                     `upper_bound' is 1, though.)  */
02682                  else
02683                    { /* If the upper bound is > 1, we need to insert
02684                         more at the end of the loop.  */
02685                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
02686 
02687                      GET_BUFFER_SPACE (nbytes);
02688 
02689                      /* Initialize lower bound of the `succeed_n', even
02690                         though it will be set during matching by its
02691                         attendant `set_number_at' (inserted next),
02692                         because `re_compile_fastmap' needs to know.
02693                         Jump to the `jump_n' we might insert below.  */
02694                      INSERT_JUMP2 (succeed_n, laststart,
02695                                    b + 5 + (upper_bound > 1) * 5,
02696                                    lower_bound);
02697                      b += 5;
02698 
02699                      /* Code to initialize the lower bound.  Insert
02700                         before the `succeed_n'.  The `5' is the last two
02701                         bytes of this `set_number_at', plus 3 bytes of
02702                         the following `succeed_n'.  */
02703                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02704                      b += 5;
02705 
02706                      if (upper_bound > 1)
02707                        { /* More than one repetition is allowed, so
02708                             append a backward jump to the `succeed_n'
02709                             that starts this interval.
02710 
02711                             When we've reached this during matching,
02712                             we'll have matched the interval once, so
02713                             jump back only `upper_bound - 1' times.  */
02714                          STORE_JUMP2 (jump_n, b, laststart + 5,
02715                                       upper_bound - 1);
02716                          b += 5;
02717 
02718                          /* The location we want to set is the second
02719                             parameter of the `jump_n'; that is `b-2' as
02720                             an absolute address.  `laststart' will be
02721                             the `set_number_at' we're about to insert;
02722                             `laststart+3' the number to set, the source
02723                             for the relative address.  But we are
02724                             inserting into the middle of the pattern --
02725                             so everything is getting moved up by 5.
02726                             Conclusion: (b - 2) - (laststart + 3) + 5,
02727                             i.e., b - laststart.
02728 
02729                             We insert this at the beginning of the loop
02730                             so that if we fail during matching, we'll
02731                             reinitialize the bounds.  */
02732                          insert_op2 (set_number_at, laststart, b - laststart,
02733                                      upper_bound - 1, b);
02734                          b += 5;
02735                        }
02736                    }
02737                 pending_exact = 0;
02738                 beg_interval = NULL;
02739               }
02740               break;
02741 
02742             unfetch_interval:
02743               /* If an invalid interval, match the characters as literals.  */
02744                assert (beg_interval);
02745                p = beg_interval;
02746                beg_interval = NULL;
02747 
02748                /* normal_char and normal_backslash need `c'.  */
02749                PATFETCH (c);
02750 
02751                if (!(syntax & RE_NO_BK_BRACES))
02752                  {
02753                    if (p > pattern  &&  p[-1] == '\\')
02754                      goto normal_backslash;
02755                  }
02756                goto normal_char;
02757 
02758 #ifdef emacs
02759             /* There is no way to specify the before_dot and after_dot
02760                operators.  rms says this is ok.  --karl  */
02761             case '=':
02762               BUF_PUSH (at_dot);
02763               break;
02764 
02765             case 's':
02766               laststart = b;
02767               PATFETCH (c);
02768               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02769               break;
02770 
02771             case 'S':
02772               laststart = b;
02773               PATFETCH (c);
02774               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02775               break;
02776 #endif /* emacs */
02777 
02778 
02779             case 'w':
02780           if (syntax & RE_NO_GNU_OPS)
02781         goto normal_char;
02782               laststart = b;
02783               BUF_PUSH (wordchar);
02784               break;
02785 
02786 
02787             case 'W':
02788           if (syntax & RE_NO_GNU_OPS)
02789         goto normal_char;
02790               laststart = b;
02791               BUF_PUSH (notwordchar);
02792               break;
02793 
02794 
02795             case '<':
02796           if (syntax & RE_NO_GNU_OPS)
02797         goto normal_char;
02798               BUF_PUSH (wordbeg);
02799               break;
02800 
02801             case '>':
02802           if (syntax & RE_NO_GNU_OPS)
02803         goto normal_char;
02804               BUF_PUSH (wordend);
02805               break;
02806 
02807             case 'b':
02808           if (syntax & RE_NO_GNU_OPS)
02809         goto normal_char;
02810               BUF_PUSH (wordbound);
02811               break;
02812 
02813             case 'B':
02814           if (syntax & RE_NO_GNU_OPS)
02815         goto normal_char;
02816               BUF_PUSH (notwordbound);
02817               break;
02818 
02819             case '`':
02820           if (syntax & RE_NO_GNU_OPS)
02821         goto normal_char;
02822               BUF_PUSH (begbuf);
02823               break;
02824 
02825             case '\'':
02826           if (syntax & RE_NO_GNU_OPS)
02827         goto normal_char;
02828               BUF_PUSH (endbuf);
02829               break;
02830 
02831             case '1': case '2': case '3': case '4': case '5':
02832             case '6': case '7': case '8': case '9':
02833               if (syntax & RE_NO_BK_REFS)
02834                 goto normal_char;
02835 
02836               c1 = c - '0';
02837 
02838               if (c1 > regnum)
02839                 FREE_STACK_RETURN (REG_ESUBREG);
02840 
02841               /* Can't back reference to a subexpression if inside of it.  */
02842               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
02843                 goto normal_char;
02844 
02845               laststart = b;
02846               BUF_PUSH_2 (duplicate, c1);
02847               break;
02848 
02849 
02850             case '+':
02851             case '?':
02852               if (syntax & RE_BK_PLUS_QM)
02853                 goto handle_plus;
02854               else
02855                 goto normal_backslash;
02856 
02857             default:
02858             normal_backslash:
02859               /* You might think it would be useful for \ to mean
02860                  not to translate; but if we don't translate it
02861                  it will never match anything.  */
02862               c = TRANSLATE (c);
02863               goto normal_char;
02864             }
02865           break;
02866 
02867 
02868     default:
02869         /* Expects the character in `c'.  */
02870     normal_char:
02871           /* If no exactn currently being built.  */
02872           if (!pending_exact
02873 
02874               /* If last exactn not at current position.  */
02875               || pending_exact + *pending_exact + 1 != b
02876 
02877               /* We have only one byte following the exactn for the count.  */
02878           || *pending_exact == (1 << BYTEWIDTH) - 1
02879 
02880               /* If followed by a repetition operator.  */
02881               || *p == '*' || *p == '^'
02882           || ((syntax & RE_BK_PLUS_QM)
02883           ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02884           : (*p == '+' || *p == '?'))
02885           || ((syntax & RE_INTERVALS)
02886                   && ((syntax & RE_NO_BK_BRACES)
02887               ? *p == '{'
02888                       : (p[0] == '\\' && p[1] == '{'))))
02889         {
02890           /* Start building a new exactn.  */
02891 
02892               laststart = b;
02893 
02894           BUF_PUSH_2 (exactn, 0);
02895           pending_exact = b - 1;
02896             }
02897 
02898       BUF_PUSH (c);
02899           (*pending_exact)++;
02900       break;
02901         } /* switch (c) */
02902     } /* while p != pend */
02903 
02904 
02905   /* Through the pattern now.  */
02906 
02907   if (fixup_alt_jump)
02908     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02909 
02910   if (!COMPILE_STACK_EMPTY)
02911     FREE_STACK_RETURN (REG_EPAREN);
02912 
02913   /* If we don't want backtracking, force success
02914      the first time we reach the end of the compiled pattern.  */
02915   if (syntax & RE_NO_POSIX_BACKTRACKING)
02916     BUF_PUSH (succeed);
02917 
02918   free (compile_stack.stack);
02919 
02920   /* We have succeeded; set the length of the buffer.  */
02921   bufp->used = b - bufp->buffer;
02922 
02923 #ifdef DEBUG
02924   if (debug)
02925     {
02926       DEBUG_PRINT1 ("\nCompiled pattern: \n");
02927       print_compiled_pattern (bufp);
02928     }
02929 #endif /* DEBUG */
02930 
02931 #ifndef MATCH_MAY_ALLOCATE
02932   /* Initialize the failure stack to the largest possible stack.  This
02933      isn't necessary unless we're trying to avoid calling alloca in
02934      the search and match routines.  */
02935   {
02936     int num_regs = bufp->re_nsub + 1;
02937 
02938     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
02939        is strictly greater than re_max_failures, the largest possible stack
02940        is 2 * re_max_failures failure points.  */
02941     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02942       {
02943     fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02944 
02945 # ifdef emacs
02946     if (! fail_stack.stack)
02947       fail_stack.stack
02948         = (fail_stack_elt_t *) xmalloc (fail_stack.size
02949                         * sizeof (fail_stack_elt_t));
02950     else
02951       fail_stack.stack
02952         = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02953                          (fail_stack.size
02954                           * sizeof (fail_stack_elt_t)));
02955 # else /* not emacs */
02956     if (! fail_stack.stack)
02957       fail_stack.stack
02958         = (fail_stack_elt_t *) malloc (fail_stack.size
02959                        * sizeof (fail_stack_elt_t));
02960     else
02961       fail_stack.stack
02962         = (fail_stack_elt_t *) realloc (fail_stack.stack,
02963                         (fail_stack.size
02964                          * sizeof (fail_stack_elt_t)));
02965 # endif /* not emacs */
02966       }
02967 
02968     regex_grow_registers (num_regs);
02969   }
02970 #endif /* not MATCH_MAY_ALLOCATE */
02971 
02972   return REG_NOERROR;
02973 } /* regex_compile */
02974 
02975 /* Subroutines for `regex_compile'.  */
02976 
02977 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
02978 
02979 static void
02980 store_op1 (op, loc, arg)
02981     re_opcode_t op;
02982     unsigned char *loc;
02983     int arg;
02984 {
02985   *loc = (unsigned char) op;
02986   STORE_NUMBER (loc + 1, arg);
02987 }
02988 
02989 
02990 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
02991 
02992 static void
02993 store_op2 (op, loc, arg1, arg2)
02994     re_opcode_t op;
02995     unsigned char *loc;
02996     int arg1, arg2;
02997 {
02998   *loc = (unsigned char) op;
02999   STORE_NUMBER (loc + 1, arg1);
03000   STORE_NUMBER (loc + 3, arg2);
03001 }
03002 
03003 
03004 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
03005    for OP followed by two-byte integer parameter ARG.  */
03006 
03007 static void
03008 insert_op1 (op, loc, arg, end)
03009     re_opcode_t op;
03010     unsigned char *loc;
03011     int arg;
03012     unsigned char *end;
03013 {
03014   register unsigned char *pfrom = end;
03015   register unsigned char *pto = end + 3;
03016 
03017   while (pfrom != loc)
03018     *--pto = *--pfrom;
03019 
03020   store_op1 (op, loc, arg);
03021 }
03022 
03023 
03024 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
03025 
03026 static void
03027 insert_op2 (op, loc, arg1, arg2, end)
03028     re_opcode_t op;
03029     unsigned char *loc;
03030     int arg1, arg2;
03031     unsigned char *end;
03032 {
03033   register unsigned char *pfrom = end;
03034   register unsigned char *pto = end + 5;
03035 
03036   while (pfrom != loc)
03037     *--pto = *--pfrom;
03038 
03039   store_op2 (op, loc, arg1, arg2);
03040 }
03041 
03042 
03043 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
03044    after an alternative or a begin-subexpression.  We assume there is at
03045    least one character before the ^.  */
03046 
03047 static boolean
03048 at_begline_loc_p (pattern, p, syntax)
03049     const char *pattern, *p;
03050     reg_syntax_t syntax;
03051 {
03052   const char *prev = p - 2;
03053   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
03054 
03055   return
03056        /* After a subexpression?  */
03057        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
03058        /* After an alternative?  */
03059     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
03060 }
03061 
03062 
03063 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
03064    at least one character after the $, i.e., `P < PEND'.  */
03065 
03066 static boolean
03067 at_endline_loc_p (p, pend, syntax)
03068     const char *p, *pend;
03069     reg_syntax_t syntax;
03070 {
03071   const char *next = p;
03072   boolean next_backslash = *next == '\\';
03073   const char *next_next = p + 1 < pend ? p + 1 : 0;
03074 
03075   return
03076        /* Before a subexpression?  */
03077        (syntax & RE_NO_BK_PARENS ? *next == ')'
03078         : next_backslash && next_next && *next_next == ')')
03079        /* Before an alternative?  */
03080     || (syntax & RE_NO_BK_VBAR ? *next == '|'
03081         : next_backslash && next_next && *next_next == '|');
03082 }
03083 
03084 
03085 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
03086    false if it's not.  */
03087 
03088 static boolean
03089 group_in_compile_stack (compile_stack, regnum)
03090     compile_stack_type compile_stack;
03091     regnum_t regnum;
03092 {
03093   int this_element;
03094 
03095   for (this_element = compile_stack.avail - 1;
03096        this_element >= 0;
03097        this_element--)
03098     if (compile_stack.stack[this_element].regnum == regnum)
03099       return true;
03100 
03101   return false;
03102 }
03103 
03104 
03105 /* Read the ending character of a range (in a bracket expression) from the
03106    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
03107    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
03108    Then we set the translation of all bits between the starting and
03109    ending characters (inclusive) in the compiled pattern B.
03110 
03111    Return an error code.
03112 
03113    We use these short variable names so we can use the same macros as
03114    `regex_compile' itself.  */
03115 
03116 static reg_errcode_t
03117 compile_range (p_ptr, pend, translate, syntax, b)
03118     const char **p_ptr, *pend;
03119     RE_TRANSLATE_TYPE translate;
03120     reg_syntax_t syntax;
03121     unsigned char *b;
03122 {
03123   unsigned this_char;
03124 
03125   const char *p = *p_ptr;
03126   unsigned int range_start, range_end;
03127 
03128   if (p == pend)
03129     return REG_ERANGE;
03130 
03131   /* Even though the pattern is a signed `char *', we need to fetch
03132      with unsigned char *'s; if the high bit of the pattern character
03133      is set, the range endpoints will be negative if we fetch using a
03134      signed char *.
03135 
03136      We also want to fetch the endpoints without translating them; the
03137      appropriate translation is done in the bit-setting loop below.  */
03138   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
03139   range_start = ((const unsigned char *) p)[-2];
03140   range_end   = ((const unsigned char *) p)[0];
03141 
03142   /* Have to increment the pointer into the pattern string, so the
03143      caller isn't still at the ending character.  */
03144   (*p_ptr)++;
03145 
03146   /* If the start is after the end, the range is empty.  */
03147   if (range_start > range_end)
03148     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
03149 
03150   /* Here we see why `this_char' has to be larger than an `unsigned
03151      char' -- the range is inclusive, so if `range_end' == 0xff
03152      (assuming 8-bit characters), we would otherwise go into an infinite
03153      loop, since all characters <= 0xff.  */
03154   for (this_char = range_start; this_char <= range_end; this_char++)
03155     {
03156       SET_LIST_BIT (TRANSLATE (this_char));
03157     }
03158 
03159   return REG_NOERROR;
03160 }
03161 
03162 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
03163    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
03164    characters can start a string that matches the pattern.  This fastmap
03165    is used by re_search to skip quickly over impossible starting points.
03166 
03167    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
03168    area as BUFP->fastmap.
03169 
03170    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
03171    the pattern buffer.
03172 
03173    Returns 0 if we succeed, -2 if an internal error.   */
03174 
03175 int
03176 re_compile_fastmap (bufp)
03177      struct re_pattern_buffer *bufp;
03178 {
03179   int j, k;
03180 #ifdef MATCH_MAY_ALLOCATE
03181   fail_stack_type fail_stack;
03182 #endif
03183 #ifndef REGEX_MALLOC
03184   char *destination;
03185 #endif
03186 
03187   register char *fastmap = bufp->fastmap;
03188   unsigned char *pattern = bufp->buffer;
03189   unsigned char *p = pattern;
03190   register unsigned char *pend = pattern + bufp->used;
03191 
03192 #ifdef REL_ALLOC
03193   /* This holds the pointer to the failure stack, when
03194      it is allocated relocatably.  */
03195   fail_stack_elt_t *failure_stack_ptr;
03196 #endif
03197 
03198   /* Assume that each path through the pattern can be null until
03199      proven otherwise.  We set this false at the bottom of switch
03200      statement, to which we get only if a particular path doesn't
03201      match the empty string.  */
03202   boolean path_can_be_null = true;
03203 
03204   /* We aren't doing a `succeed_n' to begin with.  */
03205   boolean succeed_n_p = false;
03206 
03207   assert (fastmap != NULL && p != NULL);
03208 
03209   INIT_FAIL_STACK ();
03210   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
03211   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
03212   bufp->can_be_null = 0;
03213 
03214   while (1)
03215     {
03216       if (p == pend || *p == succeed)
03217     {
03218       /* We have reached the (effective) end of pattern.  */
03219       if (!FAIL_STACK_EMPTY ())
03220         {
03221           bufp->can_be_null |= path_can_be_null;
03222 
03223           /* Reset for next path.  */
03224           path_can_be_null = true;
03225 
03226           p = fail_stack.stack[--fail_stack.avail].pointer;
03227 
03228           continue;
03229         }
03230       else
03231         break;
03232     }
03233 
03234       /* We should never be about to go beyond the end of the pattern.  */
03235       assert (p < pend);
03236 
03237       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03238     {
03239 
03240         /* I guess the idea here is to simply not bother with a fastmap
03241            if a backreference is used, since it's too hard to figure out
03242            the fastmap for the corresponding group.  Setting
03243            `can_be_null' stops `re_search_2' from using the fastmap, so
03244            that is all we do.  */
03245     case duplicate:
03246       bufp->can_be_null = 1;
03247           goto done;
03248 
03249 
03250       /* Following are the cases which match a character.  These end
03251          with `break'.  */
03252 
03253     case exactn:
03254           fastmap[p[1]] = 1;
03255       break;
03256 
03257 
03258         case charset:
03259           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03260         if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
03261               fastmap[j] = 1;
03262       break;
03263 
03264 
03265     case charset_not:
03266       /* Chars beyond end of map must be allowed.  */
03267       for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
03268             fastmap[j] = 1;
03269 
03270       for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03271         if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
03272               fastmap[j] = 1;
03273           break;
03274 
03275 
03276     case wordchar:
03277       for (j = 0; j < (1 << BYTEWIDTH); j++)
03278         if (SYNTAX (j) == Sword)
03279           fastmap[j] = 1;
03280       break;
03281 
03282 
03283     case notwordchar:
03284       for (j = 0; j < (1 << BYTEWIDTH); j++)
03285         if (SYNTAX (j) != Sword)
03286           fastmap[j] = 1;
03287       break;
03288 
03289 
03290         case anychar:
03291       {
03292         int fastmap_newline = fastmap['\n'];
03293 
03294         /* `.' matches anything ...  */
03295         for (j = 0; j < (1 << BYTEWIDTH); j++)
03296           fastmap[j] = 1;
03297 
03298         /* ... except perhaps newline.  */
03299         if (!(bufp->syntax & RE_DOT_NEWLINE))
03300           fastmap['\n'] = fastmap_newline;
03301 
03302         /* Return if we have already set `can_be_null'; if we have,
03303            then the fastmap is irrelevant.  Something's wrong here.  */
03304         else if (bufp->can_be_null)
03305           goto done;
03306 
03307         /* Otherwise, have to check alternative paths.  */
03308         break;
03309       }
03310 
03311 #ifdef emacs
03312         case syntaxspec:
03313       k = *p++;
03314       for (j = 0; j < (1 << BYTEWIDTH); j++)
03315         if (SYNTAX (j) == (enum syntaxcode) k)
03316           fastmap[j] = 1;
03317       break;
03318 
03319 
03320     case notsyntaxspec:
03321       k = *p++;
03322       for (j = 0; j < (1 << BYTEWIDTH); j++)
03323         if (SYNTAX (j) != (enum syntaxcode) k)
03324           fastmap[j] = 1;
03325       break;
03326 
03327 
03328       /* All cases after this match the empty string.  These end with
03329          `continue'.  */
03330 
03331 
03332     case before_dot:
03333     case at_dot:
03334     case after_dot:
03335           continue;
03336 #endif /* emacs */
03337 
03338 
03339         case no_op:
03340         case begline:
03341         case endline:
03342     case begbuf:
03343     case endbuf:
03344     case wordbound:
03345     case notwordbound:
03346     case wordbeg:
03347     case wordend:
03348         case push_dummy_failure:
03349           continue;
03350 
03351 
03352     case jump_n:
03353         case pop_failure_jump:
03354     case maybe_pop_jump:
03355     case jump:
03356         case jump_past_alt:
03357     case dummy_failure_jump:
03358           EXTRACT_NUMBER_AND_INCR (j, p);
03359       p += j;
03360       if (j > 0)
03361         continue;
03362 
03363           /* Jump backward implies we just went through the body of a
03364              loop and matched nothing.  Opcode jumped to should be
03365              `on_failure_jump' or `succeed_n'.  Just treat it like an
03366              ordinary jump.  For a * loop, it has pushed its failure
03367              point already; if so, discard that as redundant.  */
03368           if ((re_opcode_t) *p != on_failure_jump
03369           && (re_opcode_t) *p != succeed_n)
03370         continue;
03371 
03372           p++;
03373           EXTRACT_NUMBER_AND_INCR (j, p);
03374           p += j;
03375 
03376           /* If what's on the stack is where we are now, pop it.  */
03377           if (!FAIL_STACK_EMPTY ()
03378           && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03379             fail_stack.avail--;
03380 
03381           continue;
03382 
03383 
03384         case on_failure_jump:
03385         case on_failure_keep_string_jump:
03386     handle_on_failure_jump:
03387           EXTRACT_NUMBER_AND_INCR (j, p);
03388 
03389           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
03390              end of the pattern.  We don't want to push such a point,
03391              since when we restore it above, entering the switch will
03392              increment `p' past the end of the pattern.  We don't need
03393              to push such a point since we obviously won't find any more
03394              fastmap entries beyond `pend'.  Such a pattern can match
03395              the null string, though.  */
03396           if (p + j < pend)
03397             {
03398               if (!PUSH_PATTERN_OP (p + j, fail_stack))
03399         {
03400           RESET_FAIL_STACK ();
03401           return -2;
03402         }
03403             }
03404           else
03405             bufp->can_be_null = 1;
03406 
03407           if (succeed_n_p)
03408             {
03409               EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
03410               succeed_n_p = false;
03411         }
03412 
03413           continue;
03414 
03415 
03416     case succeed_n:
03417           /* Get to the number of times to succeed.  */
03418           p += 2;
03419 
03420           /* Increment p past the n for when k != 0.  */
03421           EXTRACT_NUMBER_AND_INCR (k, p);
03422           if (k == 0)
03423         {
03424               p -= 4;
03425           succeed_n_p = true;  /* Spaghetti code alert.  */
03426               goto handle_on_failure_jump;
03427             }
03428           continue;
03429 
03430 
03431     case set_number_at:
03432           p += 4;
03433           continue;
03434 
03435 
03436     case start_memory:
03437         case stop_memory:
03438       p += 2;
03439       continue;
03440 
03441 
03442     default:
03443           abort (); /* We have listed all the cases.  */
03444         } /* switch *p++ */
03445 
03446       /* Getting here means we have found the possible starting
03447          characters for one path of the pattern -- and that the empty
03448          string does not match.  We need not follow this path further.
03449          Instead, look at the next alternative (remembered on the
03450          stack), or quit if no more.  The test at the top of the loop
03451          does these things.  */
03452       path_can_be_null = false;
03453       p = pend;
03454     } /* while p */
03455 
03456   /* Set `can_be_null' for the last path (also the first path, if the
03457      pattern is empty).  */
03458   bufp->can_be_null |= path_can_be_null;
03459 
03460  done:
03461   RESET_FAIL_STACK ();
03462   return 0;
03463 } /* re_compile_fastmap */
03464 #ifdef _LIBC
03465 weak_alias (__re_compile_fastmap, re_compile_fastmap)
03466 #endif
03467 
03468 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
03469    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
03470    this memory for recording register information.  STARTS and ENDS
03471    must be allocated using the malloc library routine, and must each
03472    be at least NUM_REGS * sizeof (regoff_t) bytes long.
03473 
03474    If NUM_REGS == 0, then subsequent matches should allocate their own
03475    register data.
03476 
03477    Unless this function is called, the first search or match using
03478    PATTERN_BUFFER will allocate its own register data, without
03479    freeing the old data.  */
03480 
03481 void
03482 re_set_registers (bufp, regs, num_regs, starts, ends)
03483     struct re_pattern_buffer *bufp;
03484     struct re_registers *regs;
03485     unsigned num_regs;
03486     regoff_t *starts, *ends;
03487 {
03488   if (num_regs)
03489     {
03490       bufp->regs_allocated = REGS_REALLOCATE;
03491       regs->num_regs = num_regs;
03492       regs->start = starts;
03493       regs->end = ends;
03494     }
03495   else
03496     {
03497       bufp->regs_allocated = REGS_UNALLOCATED;
03498       regs->num_regs = 0;
03499       regs->start = regs->end = (regoff_t *) 0;
03500     }
03501 }
03502 #ifdef _LIBC
03503 weak_alias (__re_set_registers, re_set_registers)
03504 #endif
03505 
03506 /* Searching routines.  */
03507 
03508 /* Like re_search_2, below, but only one string is specified, and
03509    doesn't let you say where to stop matching. */
03510 
03511 int
03512 re_search (bufp, string, size, startpos, range, regs)
03513      struct re_pattern_buffer *bufp;
03514      const char *string;
03515      int size, startpos, range;
03516      struct re_registers *regs;
03517 {
03518   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
03519               regs, size);
03520 }
03521 #ifdef _LIBC
03522 weak_alias (__re_search, re_search)
03523 #endif
03524 
03525 
03526 /* Using the compiled pattern in BUFP->buffer, first tries to match the
03527    virtual concatenation of STRING1 and STRING2, starting first at index
03528    STARTPOS, then at STARTPOS + 1, and so on.
03529 
03530    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
03531 
03532    RANGE is how far to scan while trying to match.  RANGE = 0 means try
03533    only at STARTPOS; in general, the last start tried is STARTPOS +
03534    RANGE.
03535 
03536    In REGS, return the indices of the virtual concatenation of STRING1
03537    and STRING2 that matched the entire BUFP->buffer and its contained
03538    subexpressions.
03539 
03540    Do not consider matching one past the index STOP in the virtual
03541    concatenation of STRING1 and STRING2.
03542 
03543    We return either the position in the strings at which the match was
03544    found, -1 if no match, or -2 if error (such as failure
03545    stack overflow).  */
03546 
03547 int
03548 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
03549      struct re_pattern_buffer *bufp;
03550      const char *string1, *string2;
03551      int size1, size2;
03552      int startpos;
03553      int range;
03554      struct re_registers *regs;
03555      int stop;
03556 {
03557   int val;
03558   register char *fastmap = bufp->fastmap;
03559   register RE_TRANSLATE_TYPE translate = bufp->translate;
03560   int total_size = size1 + size2;
03561   int endpos = startpos + range;
03562 
03563   /* Check for out-of-range STARTPOS.  */
03564   if (startpos < 0 || startpos > total_size)
03565     return -1;
03566 
03567   /* Fix up RANGE if it might eventually take us outside
03568      the virtual concatenation of STRING1 and STRING2.
03569      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
03570   if (endpos < 0)
03571     range = 0 - startpos;
03572   else if (endpos > total_size)
03573     range = total_size - startpos;
03574 
03575   /* If the search isn't to be a backwards one, don't waste time in a
03576      search for a pattern that must be anchored.  */
03577   if (bufp->used > 0 && range > 0
03578       && ((re_opcode_t) bufp->buffer[0] == begbuf
03579       /* `begline' is like `begbuf' if it cannot match at newlines.  */
03580       || ((re_opcode_t) bufp->buffer[0] == begline
03581           && !bufp->newline_anchor)))
03582     {
03583       if (startpos > 0)
03584     return -1;
03585       else
03586     range = 1;
03587     }
03588 
03589 #ifdef emacs
03590   /* In a forward search for something that starts with \=.
03591      don't keep searching past point.  */
03592   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
03593     {
03594       range = PT - startpos;
03595       if (range <= 0)
03596     return -1;
03597     }
03598 #endif /* emacs */
03599 
03600   /* Update the fastmap now if not correct already.  */
03601   if (fastmap && !bufp->fastmap_accurate)
03602     if (re_compile_fastmap (bufp) == -2)
03603       return -2;
03604 
03605   /* Loop through the string, looking for a place to start matching.  */
03606   for (;;)
03607     {
03608       /* If a fastmap is supplied, skip quickly over characters that
03609          cannot be the start of a match.  If the pattern can match the
03610          null string, however, we don't need to skip characters; we want
03611          the first null string.  */
03612       if (fastmap && startpos < total_size && !bufp->can_be_null)
03613     {
03614       if (range > 0)    /* Searching forwards.  */
03615         {
03616           register const char *d;
03617           register int lim = 0;
03618           int irange = range;
03619 
03620               if (startpos < size1 && startpos + range >= size1)
03621                 lim = range - (size1 - startpos);
03622 
03623           d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03624 
03625               /* Written out as an if-else to avoid testing `translate'
03626                  inside the loop.  */
03627           if (translate)
03628                 while (range > lim
03629                        && !fastmap[(unsigned char)
03630                    translate[(unsigned char) *d++]])
03631                   range--;
03632           else
03633                 while (range > lim && !fastmap[(unsigned char) *d++])
03634                   range--;
03635 
03636           startpos += irange - range;
03637         }
03638       else              /* Searching backwards.  */
03639         {
03640           register char c = (size1 == 0 || startpos >= size1
03641                                  ? string2[startpos - size1]
03642                                  : string1[startpos]);
03643 
03644           if (!fastmap[(unsigned char) TRANSLATE (c)])
03645         goto advance;
03646         }
03647     }
03648 
03649       /* If can't match the null string, and that's all we have left, fail.  */
03650       if (range >= 0 && startpos == total_size && fastmap
03651           && !bufp->can_be_null)
03652     return -1;
03653 
03654       val = re_match_2_internal (bufp, string1, size1, string2, size2,
03655                  startpos, regs, stop);
03656 #ifndef REGEX_MALLOC
03657 # ifdef C_ALLOCA
03658       alloca (0);
03659 # endif
03660 #endif
03661 
03662       if (val >= 0)
03663     return startpos;
03664 
03665       if (val == -2)
03666     return -2;
03667 
03668     advance:
03669       if (!range)
03670         break;
03671       else if (range > 0)
03672         {
03673           range--;
03674           startpos++;
03675         }
03676       else
03677         {
03678           range++;
03679           startpos--;
03680         }
03681     }
03682   return -1;
03683 } /* re_search_2 */
03684 #ifdef _LIBC
03685 weak_alias (__re_search_2, re_search_2)
03686 #endif
03687 
03688 /* This converts PTR, a pointer into one of the search strings `string1'
03689    and `string2' into an offset from the beginning of that string.  */
03690 #define POINTER_TO_OFFSET(ptr)          \
03691   (FIRST_STRING_P (ptr)             \
03692    ? ((regoff_t) ((ptr) - string1))     \
03693    : ((regoff_t) ((ptr) - string2 + size1)))
03694 
03695 /* Macros for dealing with the split strings in re_match_2.  */
03696 
03697 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
03698 
03699 /* Call before fetching a character with *d.  This switches over to
03700    string2 if necessary.  */
03701 #define PREFETCH()                          \
03702   while (d == dend)                             \
03703     {                                   \
03704       /* End of string2 => fail.  */                    \
03705       if (dend == end_match_2)                      \
03706         goto fail;                          \
03707       /* End of string1 => advance to string2.  */          \
03708       d = string2;                              \
03709       dend = end_match_2;                       \
03710     }
03711 
03712 
03713 /* Test if at very beginning or at very end of the virtual concatenation
03714    of `string1' and `string2'.  If only one string, it's `string2'.  */
03715 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03716 #define AT_STRINGS_END(d) ((d) == end2)
03717 
03718 
03719 /* Test if D points to a character which is word-constituent.  We have
03720    two special cases to check for: if past the end of string1, look at
03721    the first character in string2; and if before the beginning of
03722    string2, look at the last character in string1.  */
03723 #define WORDCHAR_P(d)                           \
03724   (SYNTAX ((d) == end1 ? *string2                   \
03725            : (d) == string2 - 1 ? *(end1 - 1) : *(d))           \
03726    == Sword)
03727 
03728 /* Disabled due to a compiler bug -- see comment at case wordbound */
03729 #if 0
03730 /* Test if the character before D and the one at D differ with respect
03731    to being word-constituent.  */
03732 #define AT_WORD_BOUNDARY(d)                     \
03733   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)             \
03734    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03735 #endif
03736 
03737 /* Free everything we malloc.  */
03738 #ifdef MATCH_MAY_ALLOCATE
03739 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03740 # define FREE_VARIABLES()                       \
03741   do {                                  \
03742     REGEX_FREE_STACK (fail_stack.stack);                \
03743     FREE_VAR (regstart);                        \
03744     FREE_VAR (regend);                          \
03745     FREE_VAR (old_regstart);                        \
03746     FREE_VAR (old_regend);                      \
03747     FREE_VAR (best_regstart);                       \
03748     FREE_VAR (best_regend);                     \
03749     FREE_VAR (reg_info);                        \
03750     FREE_VAR (reg_dummy);                       \
03751     FREE_VAR (reg_info_dummy);                      \
03752   } while (0)
03753 #else
03754 # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
03755 #endif /* not MATCH_MAY_ALLOCATE */
03756 
03757 /* These values must meet several constraints.  They must not be valid
03758    register values; since we have a limit of 255 registers (because
03759    we use only one byte in the pattern for the register number), we can
03760    use numbers larger than 255.  They must differ by 1, because of
03761    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
03762    be larger than the value for the highest register, so we do not try
03763    to actually save any registers when none are active.  */
03764 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03765 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03766 
03767 /* Matching routines.  */
03768 
03769 #ifndef emacs   /* Emacs never uses this.  */
03770 /* re_match is like re_match_2 except it takes only a single string.  */
03771 
03772 int
03773 re_match (bufp, string, size, pos, regs)
03774      struct re_pattern_buffer *bufp;
03775      const char *string;
03776      int size, pos;
03777      struct re_registers *regs;
03778 {
03779   int result = re_match_2_internal (bufp, NULL, 0, string, size,
03780                     pos, regs, size);
03781 # ifndef REGEX_MALLOC
03782 #  ifdef C_ALLOCA
03783   alloca (0);
03784 #  endif
03785 # endif
03786   return result;
03787 }
03788 # ifdef _LIBC
03789 weak_alias (__re_match, re_match)
03790 # endif
03791 #endif /* not emacs */
03792 
03793 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
03794                             unsigned char *end,
03795                         register_info_type *reg_info));
03796 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
03797                           unsigned char *end,
03798                         register_info_type *reg_info));
03799 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
03800                             unsigned char *end,
03801                         register_info_type *reg_info));
03802 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
03803                      int len, char *translate));
03804 
03805 /* re_match_2 matches the compiled pattern in BUFP against the
03806    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
03807    and SIZE2, respectively).  We start matching at POS, and stop
03808    matching at STOP.
03809 
03810    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
03811    store offsets for the substring each group matched in REGS.  See the
03812    documentation for exactly how many groups we fill.
03813 
03814    We return -1 if no match, -2 if an internal error (such as the
03815    failure stack overflowing).  Otherwise, we return the length of the
03816    matched substring.  */
03817 
03818 int
03819 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03820      struct re_pattern_buffer *bufp;
03821      const char *string1, *string2;
03822      int size1, size2;
03823      int pos;
03824      struct re_registers *regs;
03825      int stop;
03826 {
03827   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03828                     pos, regs, stop);
03829 #ifndef REGEX_MALLOC
03830 # ifdef C_ALLOCA
03831   alloca (0);
03832 # endif
03833 #endif
03834   return result;
03835 }
03836 #ifdef _LIBC
03837 weak_alias (__re_match_2, re_match_2)
03838 #endif
03839 
03840 /* This is a separate function so that we can force an alloca cleanup
03841    afterwards.  */
03842 static int
03843 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
03844      struct re_pattern_buffer *bufp;
03845      const char *string1, *string2;
03846      int size1, size2;
03847      int pos;
03848      struct re_registers *regs;
03849      int stop;
03850 {
03851   /* General temporaries.  */
03852   int mcnt;
03853   unsigned char *p1;
03854 
03855   /* Just past the end of the corresponding string.  */
03856   const char *end1, *end2;
03857 
03858   /* Pointers into string1 and string2, just past the last characters in
03859      each to consider matching.  */
03860   const char *end_match_1, *end_match_2;
03861 
03862   /* Where we are in the data, and the end of the current string.  */
03863   const char *d, *dend;
03864 
03865   /* Where we are in the pattern, and the end of the pattern.  */
03866   unsigned char *p = bufp->buffer;
03867   register unsigned char *pend = p + bufp->used;
03868 
03869   /* Mark the opcode just after a start_memory, so we can test for an
03870      empty subpattern when we get to the stop_memory.  */
03871   unsigned char *just_past_start_mem = 0;
03872 
03873   /* We use this to map every character in the string.  */
03874   RE_TRANSLATE_TYPE translate = bufp->translate;
03875 
03876   /* Failure point stack.  Each place that can handle a failure further
03877      down the line pushes a failure point on this stack.  It consists of
03878      restart, regend, and reg_info for all registers corresponding to
03879      the subexpressions we're currently inside, plus the number of such
03880      registers, and, finally, two char *'s.  The first char * is where
03881      to resume scanning the pattern; the second one is where to resume
03882      scanning the strings.  If the latter is zero, the failure point is
03883      a ``dummy''; if a failure happens and the failure point is a dummy,
03884      it gets discarded and the next next one is tried.  */
03885 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03886   fail_stack_type fail_stack;
03887 #endif
03888 #ifdef DEBUG
03889   static unsigned failure_id = 0;
03890   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03891 #endif
03892 
03893 #ifdef REL_ALLOC
03894   /* This holds the pointer to the failure stack, when
03895      it is allocated relocatably.  */
03896   fail_stack_elt_t *failure_stack_ptr;
03897 #endif
03898 
03899   /* We fill all the registers internally, independent of what we
03900      return, for use in backreferences.  The number here includes
03901      an element for register zero.  */
03902   size_t num_regs = bufp->re_nsub + 1;
03903 
03904   /* The currently active registers.  */
03905   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03906   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03907 
03908   /* Information on the contents of registers. These are pointers into
03909      the input strings; they record just what was matched (on this
03910      attempt) by a subexpression part of the pattern, that is, the
03911      regnum-th regstart pointer points to where in the pattern we began
03912      matching and the regnum-th regend points to right after where we
03913      stopped matching the regnum-th subexpression.  (The zeroth register
03914      keeps track of what the whole pattern matches.)  */
03915 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03916   const char **regstart, **regend;
03917 #endif
03918 
03919   /* If a group that's operated upon by a repetition operator fails to
03920      match anything, then the register for its start will need to be
03921      restored because it will have been set to wherever in the string we
03922      are when we last see its open-group operator.  Similarly for a
03923      register's end.  */
03924 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03925   const char **old_regstart, **old_regend;
03926 #endif
03927 
03928   /* The is_active field of reg_info helps us keep track of which (possibly
03929      nested) subexpressions we are currently in. The matched_something
03930      field of reg_info[reg_num] helps us tell whether or not we have
03931      matched any of the pattern so far this time through the reg_num-th
03932      subexpression.  These two fields get reset each time through any
03933      loop their register is in.  */
03934 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03935   register_info_type *reg_info;
03936 #endif
03937 
03938   /* The following record the register info as found in the above
03939      variables when we find a match better than any we've seen before.
03940      This happens as we backtrack through the failure points, which in
03941      turn happens only if we have not yet matched the entire string. */
03942   unsigned best_regs_set = false;
03943 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03944   const char **best_regstart, **best_regend;
03945 #endif
03946 
03947   /* Logically, this is `best_regend[0]'.  But we don't want to have to
03948      allocate space for that if we're not allocating space for anything
03949      else (see below).  Also, we never need info about register 0 for
03950      any of the other register vectors, and it seems rather a kludge to
03951      treat `best_regend' differently than the rest.  So we keep track of
03952      the end of the best match so far in a separate variable.  We
03953      initialize this to NULL so that when we backtrack the first time
03954      and need to test it, it's not garbage.  */
03955   const char *match_end = NULL;
03956 
03957   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
03958   int set_regs_matched_done = 0;
03959 
03960   /* Used when we pop values we don't care about.  */
03961 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03962   const char **reg_dummy;
03963   register_info_type *reg_info_dummy;
03964 #endif
03965 
03966 #ifdef DEBUG
03967   /* Counts the total number of registers pushed.  */
03968   unsigned num_regs_pushed = 0;
03969 #endif
03970 
03971   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03972 
03973   INIT_FAIL_STACK ();
03974 
03975 #ifdef MATCH_MAY_ALLOCATE
03976   /* Do not bother to initialize all the register variables if there are
03977      no groups in the pattern, as it takes a fair amount of time.  If
03978      there are groups, we include space for register 0 (the whole
03979      pattern), even though we never use it, since it simplifies the
03980      array indexing.  We should fix this.  */
03981   if (bufp->re_nsub)
03982     {
03983       regstart = REGEX_TALLOC (num_regs, const char *);
03984       regend = REGEX_TALLOC (num_regs, const char *);
03985       old_regstart = REGEX_TALLOC (num_regs, const char *);
03986       old_regend = REGEX_TALLOC (num_regs, const char *);
03987       best_regstart = REGEX_TALLOC (num_regs, const char *);
03988       best_regend = REGEX_TALLOC (num_regs, const char *);
03989       reg_info = REGEX_TALLOC (num_regs, register_info_type);
03990       reg_dummy = REGEX_TALLOC (num_regs, const char *);
03991       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03992 
03993       if (!(regstart && regend && old_regstart && old_regend && reg_info
03994             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03995         {
03996           FREE_VARIABLES ();
03997           return -2;
03998         }
03999     }
04000   else
04001     {
04002       /* We must initialize all our variables to NULL, so that
04003          `FREE_VARIABLES' doesn't try to free them.  */
04004       regstart = regend = old_regstart = old_regend = best_regstart
04005         = best_regend = reg_dummy = NULL;
04006       reg_info = reg_info_dummy = (register_info_type *) NULL;
04007     }
04008 #endif /* MATCH_MAY_ALLOCATE */
04009 
04010   /* The starting position is bogus.  */
04011   if (pos < 0 || pos > size1 + size2)
04012     {
04013       FREE_VARIABLES ();
04014       return -1;
04015     }
04016 
04017   /* Initialize subexpression text positions to -1 to mark ones that no
04018      start_memory/stop_memory has been seen for. Also initialize the
04019      register information struct.  */
04020   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04021     {
04022       regstart[mcnt] = regend[mcnt]
04023         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
04024 
04025       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
04026       IS_ACTIVE (reg_info[mcnt]) = 0;
04027       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
04028       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
04029     }
04030 
04031   /* We move `string1' into `string2' if the latter's empty -- but not if
04032      `string1' is null.  */
04033   if (size2 == 0 && string1 != NULL)
04034     {
04035       string2 = string1;
04036       size2 = size1;
04037       string1 = 0;
04038       size1 = 0;
04039     }
04040   end1 = string1 + size1;
04041   end2 = string2 + size2;
04042 
04043   /* Compute where to stop matching, within the two strings.  */
04044   if (stop <= size1)
04045     {
04046       end_match_1 = string1 + stop;
04047       end_match_2 = string2;
04048     }
04049   else
04050     {
04051       end_match_1 = end1;
04052       end_match_2 = string2 + stop - size1;
04053     }
04054 
04055   /* `p' scans through the pattern as `d' scans through the data.
04056      `dend' is the end of the input string that `d' points within.  `d'
04057      is advanced into the following input string whenever necessary, but
04058      this happens before fetching; therefore, at the beginning of the
04059      loop, `d' can be pointing at the end of a string, but it cannot
04060      equal `string2'.  */
04061   if (size1 > 0 && pos <= size1)
04062     {
04063       d = string1 + pos;
04064       dend = end_match_1;
04065     }
04066   else
04067     {
04068       d = string2 + pos - size1;
04069       dend = end_match_2;
04070     }
04071 
04072   DEBUG_PRINT1 ("The compiled pattern is:\n");
04073   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
04074   DEBUG_PRINT1 ("The string to match is: `");
04075   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
04076   DEBUG_PRINT1 ("'\n");
04077 
04078   /* This loops over pattern commands.  It exits by returning from the
04079      function if the match is complete, or it drops through if the match
04080      fails at this starting point in the input data.  */
04081   for (;;)
04082     {
04083 #ifdef _LIBC
04084       DEBUG_PRINT2 ("\n%p: ", p);
04085 #else
04086       DEBUG_PRINT2 ("\n0x%x: ", p);
04087 #endif
04088 
04089       if (p == pend)
04090     { /* End of pattern means we might have succeeded.  */
04091           DEBUG_PRINT1 ("end of pattern ... ");
04092 
04093       /* If we haven't matched the entire string, and we want the
04094              longest match, try backtracking.  */
04095           if (d != end_match_2)
04096         {
04097           /* 1 if this match ends in the same string (string1 or string2)
04098          as the best previous match.  */
04099           boolean same_str_p = (FIRST_STRING_P (match_end)
04100                     == MATCHING_IN_FIRST_STRING);
04101           /* 1 if this match is the best seen so far.  */
04102           boolean best_match_p;
04103 
04104           /* AIX compiler got confused when this was combined
04105          with the previous declaration.  */
04106           if (same_str_p)
04107         best_match_p = d > match_end;
04108           else
04109         best_match_p = !MATCHING_IN_FIRST_STRING;
04110 
04111               DEBUG_PRINT1 ("backtracking.\n");
04112 
04113               if (!FAIL_STACK_EMPTY ())
04114                 { /* More failure points to try.  */
04115 
04116                   /* If exceeds best match so far, save it.  */
04117                   if (!best_regs_set || best_match_p)
04118                     {
04119                       best_regs_set = true;
04120                       match_end = d;
04121 
04122                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
04123 
04124                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04125                         {
04126                           best_regstart[mcnt] = regstart[mcnt];
04127                           best_regend[mcnt] = regend[mcnt];
04128                         }
04129                     }
04130                   goto fail;
04131                 }
04132 
04133               /* If no failure points, don't restore garbage.  And if
04134                  last match is real best match, don't restore second
04135                  best one. */
04136               else if (best_regs_set && !best_match_p)
04137                 {
04138             restore_best_regs:
04139                   /* Restore best match.  It may happen that `dend ==
04140                      end_match_1' while the restored d is in string2.
04141                      For example, the pattern `x.*y.*z' against the
04142                      strings `x-' and `y-z-', if the two strings are
04143                      not consecutive in memory.  */
04144                   DEBUG_PRINT1 ("Restoring best registers.\n");
04145 
04146                   d = match_end;
04147                   dend = ((d >= string1 && d <= end1)
04148                    ? end_match_1 : end_match_2);
04149 
04150           for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04151             {
04152               regstart[mcnt] = best_regstart[mcnt];
04153               regend[mcnt] = best_regend[mcnt];
04154             }
04155                 }
04156             } /* d != end_match_2 */
04157 
04158     succeed_label:
04159           DEBUG_PRINT1 ("Accepting match.\n");
04160 
04161           /* If caller wants register contents data back, do it.  */
04162           if (regs && !bufp->no_sub)
04163         {
04164               /* Have the register data arrays been allocated?  */
04165               if (bufp->regs_allocated == REGS_UNALLOCATED)
04166                 { /* No.  So allocate them with malloc.  We need one
04167                      extra element beyond `num_regs' for the `-1' marker
04168                      GNU code uses.  */
04169                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
04170                   regs->start = TALLOC (regs->num_regs, regoff_t);
04171                   regs->end = TALLOC (regs->num_regs, regoff_t);
04172                   if (regs->start == NULL || regs->end == NULL)
04173             {
04174               FREE_VARIABLES ();
04175               return -2;
04176             }
04177                   bufp->regs_allocated = REGS_REALLOCATE;
04178                 }
04179               else if (bufp->regs_allocated == REGS_REALLOCATE)
04180                 { /* Yes.  If we need more elements than were already
04181                      allocated, reallocate them.  If we need fewer, just
04182                      leave it alone.  */
04183                   if (regs->num_regs < num_regs + 1)
04184                     {
04185                       regs->num_regs = num_regs + 1;
04186                       RETALLOC (regs->start, regs->num_regs, regoff_t);
04187                       RETALLOC (regs->end, regs->num_regs, regoff_t);
04188                       if (regs->start == NULL || regs->end == NULL)
04189             {
04190               FREE_VARIABLES ();
04191               return -2;
04192             }
04193                     }
04194                 }
04195               else
04196         {
04197           /* These braces fend off a "empty body in an else-statement"
04198              warning under GCC when assert expands to nothing.  */
04199           assert (bufp->regs_allocated == REGS_FIXED);
04200         }
04201 
04202               /* Convert the pointer data in `regstart' and `regend' to
04203                  indices.  Register zero has to be set differently,
04204                  since we haven't kept track of any info for it.  */
04205               if (regs->num_regs > 0)
04206                 {
04207                   regs->start[0] = pos;
04208                   regs->end[0] = (MATCHING_IN_FIRST_STRING
04209                   ? ((regoff_t) (d - string1))
04210                       : ((regoff_t) (d - string2 + size1)));
04211                 }
04212 
04213               /* Go through the first `min (num_regs, regs->num_regs)'
04214                  registers, since that is all we initialized.  */
04215           for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
04216            mcnt++)
04217         {
04218                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
04219                     regs->start[mcnt] = regs->end[mcnt] = -1;
04220                   else
04221                     {
04222               regs->start[mcnt]
04223             = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
04224                       regs->end[mcnt]
04225             = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
04226                     }
04227         }
04228 
04229               /* If the regs structure we return has more elements than
04230                  were in the pattern, set the extra elements to -1.  If
04231                  we (re)allocated the registers, this is the case,
04232                  because we always allocate enough to have at least one
04233                  -1 at the end.  */
04234               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
04235                 regs->start[mcnt] = regs->end[mcnt] = -1;
04236         } /* regs && !bufp->no_sub */
04237 
04238           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
04239                         nfailure_points_pushed, nfailure_points_popped,
04240                         nfailure_points_pushed - nfailure_points_popped);
04241           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
04242 
04243           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
04244                 ? string1
04245                 : string2 - size1);
04246 
04247           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
04248 
04249           FREE_VARIABLES ();
04250           return mcnt;
04251         }
04252 
04253       /* Otherwise match next pattern command.  */
04254       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04255     {
04256         /* Ignore these.  Used to ignore the n of succeed_n's which
04257            currently have n == 0.  */
04258         case no_op:
04259           DEBUG_PRINT1 ("EXECUTING no_op.\n");
04260           break;
04261 
04262     case succeed:
04263           DEBUG_PRINT1 ("EXECUTING succeed.\n");
04264       goto succeed_label;
04265 
04266         /* Match the next n pattern characters exactly.  The following
04267            byte in the pattern defines n, and the n bytes after that
04268            are the characters to match.  */
04269     case exactn:
04270       mcnt = *p++;
04271           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
04272 
04273           /* This is written out as an if-else so we don't waste time
04274              testing `translate' inside the loop.  */
04275           if (translate)
04276         {
04277           do
04278         {
04279           PREFETCH ();
04280           if ((unsigned char) translate[(unsigned char) *d++]
04281               != (unsigned char) *p++)
04282                     goto fail;
04283         }
04284           while (--mcnt);
04285         }
04286       else
04287         {
04288           do
04289         {
04290           PREFETCH ();
04291           if (*d++ != (char) *p++) goto fail;
04292         }
04293           while (--mcnt);
04294         }
04295       SET_REGS_MATCHED ();
04296           break;
04297 
04298 
04299         /* Match any character except possibly a newline or a null.  */
04300     case anychar:
04301           DEBUG_PRINT1 ("EXECUTING anychar.\n");
04302 
04303           PREFETCH ();
04304 
04305           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
04306               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
04307         goto fail;
04308 
04309           SET_REGS_MATCHED ();
04310           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
04311           d++;
04312       break;
04313 
04314 
04315     case charset:
04316     case charset_not:
04317       {
04318         register unsigned char c;
04319         boolean not = (re_opcode_t) *(p - 1) == charset_not;
04320 
04321             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
04322 
04323         PREFETCH ();
04324         c = TRANSLATE (*d); /* The character to match.  */
04325 
04326             /* Cast to `unsigned' instead of `unsigned char' in case the
04327                bit list is a full 32 bytes long.  */
04328         if (c < (unsigned) (*p * BYTEWIDTH)
04329         && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04330           not = !not;
04331 
04332         p += 1 + *p;
04333 
04334         if (!not) goto fail;
04335 
04336         SET_REGS_MATCHED ();
04337             d++;
04338         break;
04339       }
04340 
04341 
04342         /* The beginning of a group is represented by start_memory.
04343            The arguments are the register number in the next byte, and the
04344            number of groups inner to this one in the next.  The text
04345            matched within the group is recorded (in the internal
04346            registers data structure) under the register number.  */
04347         case start_memory:
04348       DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04349 
04350           /* Find out if this group can match the empty string.  */
04351       p1 = p;       /* To send to group_match_null_string_p.  */
04352 
04353           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04354             REG_MATCH_NULL_STRING_P (reg_info[*p])
04355               = group_match_null_string_p (&p1, pend, reg_info);
04356 
04357           /* Save the position in the string where we were the last time
04358              we were at this open-group operator in case the group is
04359              operated upon by a repetition operator, e.g., with `(a*)*b'
04360              against `ab'; then we want to ignore where we are now in
04361              the string in case this attempt to match fails.  */
04362           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04363                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04364                              : regstart[*p];
04365       DEBUG_PRINT2 ("  old_regstart: %d\n",
04366              POINTER_TO_OFFSET (old_regstart[*p]));
04367 
04368           regstart[*p] = d;
04369       DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04370 
04371           IS_ACTIVE (reg_info[*p]) = 1;
04372           MATCHED_SOMETHING (reg_info[*p]) = 0;
04373 
04374       /* Clear this whenever we change the register activity status.  */
04375       set_regs_matched_done = 0;
04376 
04377           /* This is the new highest active register.  */
04378           highest_active_reg = *p;
04379 
04380           /* If nothing was active before, this is the new lowest active
04381              register.  */
04382           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04383             lowest_active_reg = *p;
04384 
04385           /* Move past the register number and inner group count.  */
04386           p += 2;
04387       just_past_start_mem = p;
04388 
04389           break;
04390 
04391 
04392         /* The stop_memory opcode represents the end of a group.  Its
04393            arguments are the same as start_memory's: the register
04394            number, and the number of inner groups.  */
04395     case stop_memory:
04396       DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04397 
04398           /* We need to save the string position the last time we were at
04399              this close-group operator in case the group is operated
04400              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
04401              against `aba'; then we want to ignore where we are now in
04402              the string in case this attempt to match fails.  */
04403           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04404                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
04405                : regend[*p];
04406       DEBUG_PRINT2 ("      old_regend: %d\n",
04407              POINTER_TO_OFFSET (old_regend[*p]));
04408 
04409           regend[*p] = d;
04410       DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04411 
04412           /* This register isn't active anymore.  */
04413           IS_ACTIVE (reg_info[*p]) = 0;
04414 
04415       /* Clear this whenever we change the register activity status.  */
04416       set_regs_matched_done = 0;
04417 
04418           /* If this was the only register active, nothing is active
04419              anymore.  */
04420           if (lowest_active_reg == highest_active_reg)
04421             {
04422               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04423               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04424             }
04425           else
04426             { /* We must scan for the new highest active register, since
04427                  it isn't necessarily one less than now: consider
04428                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
04429                  new highest active register is 1.  */
04430               unsigned char r = *p - 1;
04431               while (r > 0 && !IS_ACTIVE (reg_info[r]))
04432                 r--;
04433 
04434               /* If we end up at register zero, that means that we saved
04435                  the registers as the result of an `on_failure_jump', not
04436                  a `start_memory', and we jumped to past the innermost
04437                  `stop_memory'.  For example, in ((.)*) we save
04438                  registers 1 and 2 as a result of the *, but when we pop
04439                  back to the second ), we are at the stop_memory 1.
04440                  Thus, nothing is active.  */
04441           if (r == 0)
04442                 {
04443                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04444                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04445                 }
04446               else
04447                 highest_active_reg = r;
04448             }
04449 
04450           /* If just failed to match something this time around with a
04451              group that's operated on by a repetition operator, try to
04452              force exit from the ``loop'', and restore the register
04453              information for this group that we had before trying this
04454              last match.  */
04455           if ((!MATCHED_SOMETHING (reg_info[*p])
04456                || just_past_start_mem == p - 1)
04457           && (p + 2) < pend)
04458             {
04459               boolean is_a_jump_n = false;
04460 
04461               p1 = p + 2;
04462               mcnt = 0;
04463               switch ((re_opcode_t) *p1++)
04464                 {
04465                   case jump_n:
04466             is_a_jump_n = true;
04467                   case pop_failure_jump:
04468           case maybe_pop_jump:
04469           case jump:
04470           case dummy_failure_jump:
04471                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04472             if (is_a_jump_n)
04473               p1 += 2;
04474                     break;
04475 
04476                   default:
04477                     /* do nothing */ ;
04478                 }
04479           p1 += mcnt;
04480 
04481               /* If the next operation is a jump backwards in the pattern
04482              to an on_failure_jump right before the start_memory
04483                  corresponding to this stop_memory, exit from the loop
04484                  by forcing a failure after pushing on the stack the
04485                  on_failure_jump's jump in the pattern, and d.  */
04486               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04487                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04488         {
04489                   /* If this group ever matched anything, then restore
04490                      what its registers were before trying this last
04491                      failed match, e.g., with `(a*)*b' against `ab' for
04492                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
04493                      against `aba' for regend[3].
04494 
04495                      Also restore the registers for inner groups for,
04496                      e.g., `((a*)(b*))*' against `aba' (register 3 would
04497                      otherwise get trashed).  */
04498 
04499                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04500             {
04501               unsigned r;
04502 
04503                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04504 
04505               /* Restore this and inner groups' (if any) registers.  */
04506                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
04507                r++)
04508                         {
04509                           regstart[r] = old_regstart[r];
04510 
04511                           /* xx why this test?  */
04512                           if (old_regend[r] >= regstart[r])
04513                             regend[r] = old_regend[r];
04514                         }
04515                     }
04516           p1++;
04517                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04518                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04519 
04520                   goto fail;
04521                 }
04522             }
04523 
04524           /* Move past the register number and the inner group count.  */
04525           p += 2;
04526           break;
04527 
04528 
04529     /* <digit> has been turned into a `duplicate' command which is
04530            followed by the numeric value of <digit> as the register number.  */
04531         case duplicate:
04532       {
04533         register const char *d2, *dend2;
04534         int regno = *p++;   /* Get which register to match against.  */
04535         DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04536 
04537         /* Can't back reference a group which we've never matched.  */
04538             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04539               goto fail;
04540 
04541             /* Where in input to try to start matching.  */
04542             d2 = regstart[regno];
04543 
04544             /* Where to stop matching; if both the place to start and
04545                the place to stop matching are in the same string, then
04546                set to the place to stop, otherwise, for now have to use
04547                the end of the first string.  */
04548 
04549             dend2 = ((FIRST_STRING_P (regstart[regno])
04550               == FIRST_STRING_P (regend[regno]))
04551              ? regend[regno] : end_match_1);
04552         for (;;)
04553           {
04554         /* If necessary, advance to next segment in register
04555                    contents.  */
04556         while (d2 == dend2)
04557           {
04558             if (dend2 == end_match_2) break;
04559             if (dend2 == regend[regno]) break;
04560 
04561                     /* End of string1 => advance to string2. */
04562                     d2 = string2;
04563                     dend2 = regend[regno];
04564           }
04565         /* At end of register contents => success */
04566         if (d2 == dend2) break;
04567 
04568         /* If necessary, advance to next segment in data.  */
04569         PREFETCH ();
04570 
04571         /* How many characters left in this segment to match.  */
04572         mcnt = dend - d;
04573 
04574         /* Want how many consecutive characters we can match in
04575                    one shot, so, if necessary, adjust the count.  */
04576                 if (mcnt > dend2 - d2)
04577           mcnt = dend2 - d2;
04578 
04579         /* Compare that many; failure if mismatch, else move
04580                    past them.  */
04581         if (translate
04582                     ? bcmp_translate (d, d2, mcnt, translate)
04583                     : memcmp (d, d2, mcnt))
04584           goto fail;
04585         d += mcnt, d2 += mcnt;
04586 
04587         /* Do this because we've match some characters.  */
04588         SET_REGS_MATCHED ();
04589           }
04590       }
04591       break;
04592 
04593 
04594         /* begline matches the empty string at the beginning of the string
04595            (unless `not_bol' is set in `bufp'), and, if
04596            `newline_anchor' is set, after newlines.  */
04597     case begline:
04598           DEBUG_PRINT1 ("EXECUTING begline.\n");
04599 
04600           if (AT_STRINGS_BEG (d))
04601             {
04602               if (!bufp->not_bol) break;
04603             }
04604           else if (d[-1] == '\n' && bufp->newline_anchor)
04605             {
04606               break;
04607             }
04608           /* In all other cases, we fail.  */
04609           goto fail;
04610 
04611 
04612         /* endline is the dual of begline.  */
04613     case endline:
04614           DEBUG_PRINT1 ("EXECUTING endline.\n");
04615 
04616           if (AT_STRINGS_END (d))
04617             {
04618               if (!bufp->not_eol) break;
04619             }
04620 
04621           /* We have to ``prefetch'' the next character.  */
04622           else if ((d == end1 ? *string2 : *d) == '\n'
04623                    && bufp->newline_anchor)
04624             {
04625               break;
04626             }
04627           goto fail;
04628 
04629 
04630     /* Match at the very beginning of the data.  */
04631         case begbuf:
04632           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04633           if (AT_STRINGS_BEG (d))
04634             break;
04635           goto fail;
04636 
04637 
04638     /* Match at the very end of the data.  */
04639         case endbuf:
04640           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04641       if (AT_STRINGS_END (d))
04642         break;
04643           goto fail;
04644 
04645 
04646         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
04647            pushes NULL as the value for the string on the stack.  Then
04648            `pop_failure_point' will keep the current value for the
04649            string, instead of restoring it.  To see why, consider
04650            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
04651            then the . fails against the \n.  But the next thing we want
04652            to do is match the \n against the \n; if we restored the
04653            string value, we would be back at the foo.
04654 
04655            Because this is used only in specific cases, we don't need to
04656            check all the things that `on_failure_jump' does, to make
04657            sure the right things get saved on the stack.  Hence we don't
04658            share its code.  The only reason to push anything on the
04659            stack at all is that otherwise we would have to change
04660            `anychar's code to do something besides goto fail in this
04661            case; that seems worse than this.  */
04662         case on_failure_keep_string_jump:
04663           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04664 
04665           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04666 #ifdef _LIBC
04667           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
04668 #else
04669           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04670 #endif
04671 
04672           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04673           break;
04674 
04675 
04676     /* Uses of on_failure_jump:
04677 
04678            Each alternative starts with an on_failure_jump that points
04679            to the beginning of the next alternative.  Each alternative
04680            except the last ends with a jump that in effect jumps past
04681            the rest of the alternatives.  (They really jump to the
04682            ending jump of the following alternative, because tensioning
04683            these jumps is a hassle.)
04684 
04685            Repeats start with an on_failure_jump that points past both
04686            the repetition text and either the following jump or
04687            pop_failure_jump back to this on_failure_jump.  */
04688     case on_failure_jump:
04689         on_failure:
04690           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04691 
04692           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04693 #ifdef _LIBC
04694           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
04695 #else
04696           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04697 #endif
04698 
04699           /* If this on_failure_jump comes right before a group (i.e.,
04700              the original * applied to a group), save the information
04701              for that group and all inner ones, so that if we fail back
04702              to this point, the group's information will be correct.
04703              For example, in \(a*\)*\1, we need the preceding group,
04704              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
04705 
04706           /* We can't use `p' to check ahead because we push
04707              a failure point to `p + mcnt' after we do this.  */
04708           p1 = p;
04709 
04710           /* We need to skip no_op's before we look for the
04711              start_memory in case this on_failure_jump is happening as
04712              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
04713              against aba.  */
04714           while (p1 < pend && (re_opcode_t) *p1 == no_op)
04715             p1++;
04716 
04717           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04718             {
04719               /* We have a new highest active register now.  This will
04720                  get reset at the start_memory we are about to get to,
04721                  but we will have saved all the registers relevant to
04722                  this repetition op, as described above.  */
04723               highest_active_reg = *(p1 + 1) + *(p1 + 2);
04724               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04725                 lowest_active_reg = *(p1 + 1);
04726             }
04727 
04728           DEBUG_PRINT1 (":\n");
04729           PUSH_FAILURE_POINT (p + mcnt, d, -2);
04730           break;
04731 
04732 
04733         /* A smart repeat ends with `maybe_pop_jump'.
04734        We change it to either `pop_failure_jump' or `jump'.  */
04735         case maybe_pop_jump:
04736           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04737           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04738           {
04739         register unsigned char *p2 = p;
04740 
04741             /* Compare the beginning of the repeat with what in the
04742                pattern follows its end. If we can establish that there
04743                is nothing that they would both match, i.e., that we
04744                would have to backtrack because of (as in, e.g., `a*a')
04745                then we can change to pop_failure_jump, because we'll
04746                never have to backtrack.
04747 
04748                This is not true in the case of alternatives: in
04749                `(a|ab)*' we do need to backtrack to the `ab' alternative
04750                (e.g., if the string was `ab').  But instead of trying to
04751                detect that here, the alternative has put on a dummy
04752                failure point which is what we will end up popping.  */
04753 
04754         /* Skip over open/close-group commands.
04755            If what follows this loop is a ...+ construct,
04756            look at what begins its body, since we will have to
04757            match at least one of that.  */
04758         while (1)
04759           {
04760         if (p2 + 2 < pend
04761             && ((re_opcode_t) *p2 == stop_memory
04762             || (re_opcode_t) *p2 == start_memory))
04763           p2 += 3;
04764         else if (p2 + 6 < pend
04765              && (re_opcode_t) *p2 == dummy_failure_jump)
04766           p2 += 6;
04767         else
04768           break;
04769           }
04770 
04771         p1 = p + mcnt;
04772         /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
04773            to the `maybe_finalize_jump' of this case.  Examine what
04774            follows.  */
04775 
04776             /* If we're at the end of the pattern, we can change.  */
04777             if (p2 == pend)
04778           {
04779         /* Consider what happens when matching ":\(.*\)"
04780            against ":/".  I don't really understand this code
04781            yet.  */
04782             p[-3] = (unsigned char) pop_failure_jump;
04783                 DEBUG_PRINT1
04784                   ("  End of pattern: change to `pop_failure_jump'.\n");
04785               }
04786 
04787             else if ((re_opcode_t) *p2 == exactn
04788              || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04789           {
04790         register unsigned char c
04791                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
04792 
04793                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04794                   {
04795             p[-3] = (unsigned char) pop_failure_jump;
04796                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04797                                   c, p1[5]);
04798                   }
04799 
04800         else if ((re_opcode_t) p1[3] == charset
04801              || (re_opcode_t) p1[3] == charset_not)
04802           {
04803             int not = (re_opcode_t) p1[3] == charset_not;
04804 
04805             if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04806             && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04807               not = !not;
04808 
04809                     /* `not' is equal to 1 if c would match, which means
04810                         that we can't change to pop_failure_jump.  */
04811             if (!not)
04812                       {
04813                 p[-3] = (unsigned char) pop_failure_jump;
04814                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04815                       }
04816           }
04817           }
04818             else if ((re_opcode_t) *p2 == charset)
04819           {
04820 #ifdef DEBUG
04821         register unsigned char c
04822                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
04823 #endif
04824 
04825 #if 0
04826                 if ((re_opcode_t) p1[3] == exactn
04827             && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
04828               && (p2[2 + p1[5] / BYTEWIDTH]
04829                   & (1 << (p1[5] % BYTEWIDTH)))))
04830 #else
04831                 if ((re_opcode_t) p1[3] == exactn
04832             && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04833               && (p2[2 + p1[4] / BYTEWIDTH]
04834                   & (1 << (p1[4] % BYTEWIDTH)))))
04835 #endif
04836                   {
04837             p[-3] = (unsigned char) pop_failure_jump;
04838                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04839                                   c, p1[5]);
04840                   }
04841 
04842         else if ((re_opcode_t) p1[3] == charset_not)
04843           {
04844             int idx;
04845             /* We win if the charset_not inside the loop
04846                lists every character listed in the charset after.  */
04847             for (idx = 0; idx < (int) p2[1]; idx++)
04848               if (! (p2[2 + idx] == 0
04849                  || (idx < (int) p1[4]
04850                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04851             break;
04852 
04853             if (idx == p2[1])
04854                       {
04855                 p[-3] = (unsigned char) pop_failure_jump;
04856                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04857                       }
04858           }
04859         else if ((re_opcode_t) p1[3] == charset)
04860           {
04861             int idx;
04862             /* We win if the charset inside the loop
04863                has no overlap with the one after the loop.  */
04864             for (idx = 0;
04865              idx < (int) p2[1] && idx < (int) p1[4];
04866              idx++)
04867               if ((p2[2 + idx] & p1[5 + idx]) != 0)
04868             break;
04869 
04870             if (idx == p2[1] || idx == p1[4])
04871                       {
04872                 p[-3] = (unsigned char) pop_failure_jump;
04873                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04874                       }
04875           }
04876           }
04877       }
04878       p -= 2;       /* Point at relative address again.  */
04879       if ((re_opcode_t) p[-1] != pop_failure_jump)
04880         {
04881           p[-1] = (unsigned char) jump;
04882               DEBUG_PRINT1 ("  Match => jump.\n");
04883           goto unconditional_jump;
04884         }
04885         /* Note fall through.  */
04886 
04887 
04888     /* The end of a simple repeat has a pop_failure_jump back to
04889            its matching on_failure_jump, where the latter will push a
04890            failure point.  The pop_failure_jump takes off failure
04891            points put on by this pop_failure_jump's matching
04892            on_failure_jump; we got through the pattern to here from the
04893            matching on_failure_jump, so didn't fail.  */
04894         case pop_failure_jump:
04895           {
04896             /* We need to pass separate storage for the lowest and
04897                highest registers, even though we don't care about the
04898                actual values.  Otherwise, we will restore only one
04899                register from the stack, since lowest will == highest in
04900                `pop_failure_point'.  */
04901             active_reg_t dummy_low_reg, dummy_high_reg;
04902             unsigned char *pdummy;
04903             const char *sdummy;
04904 
04905             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04906             POP_FAILURE_POINT (sdummy, pdummy,
04907                                dummy_low_reg, dummy_high_reg,
04908                                reg_dummy, reg_dummy, reg_info_dummy);
04909           }
04910       /* Note fall through.  */
04911 
04912     unconditional_jump:
04913 #ifdef _LIBC
04914       DEBUG_PRINT2 ("\n%p: ", p);
04915 #else
04916       DEBUG_PRINT2 ("\n0x%x: ", p);
04917 #endif
04918           /* Note fall through.  */
04919 
04920         /* Unconditionally jump (without popping any failure points).  */
04921         case jump:
04922       EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
04923           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04924       p += mcnt;                /* Do the jump.  */
04925 #ifdef _LIBC
04926           DEBUG_PRINT2 ("(to %p).\n", p);
04927 #else
04928           DEBUG_PRINT2 ("(to 0x%x).\n", p);
04929 #endif
04930       break;
04931 
04932 
04933         /* We need this opcode so we can detect where alternatives end
04934            in `group_match_null_string_p' et al.  */
04935         case jump_past_alt:
04936           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04937           goto unconditional_jump;
04938 
04939 
04940         /* Normally, the on_failure_jump pushes a failure point, which
04941            then gets popped at pop_failure_jump.  We will end up at
04942            pop_failure_jump, also, and with a pattern of, say, `a+', we
04943            are skipping over the on_failure_jump, so we have to push
04944            something meaningless for pop_failure_jump to pop.  */
04945         case dummy_failure_jump:
04946           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04947           /* It doesn't matter what we push for the string here.  What
04948              the code at `fail' tests is the value for the pattern.  */
04949           PUSH_FAILURE_POINT (NULL, NULL, -2);
04950           goto unconditional_jump;
04951 
04952 
04953         /* At the end of an alternative, we need to push a dummy failure
04954            point in case we are followed by a `pop_failure_jump', because
04955            we don't want the failure point for the alternative to be
04956            popped.  For example, matching `(a|ab)*' against `aab'
04957            requires that we match the `ab' alternative.  */
04958         case push_dummy_failure:
04959           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04960           /* See comments just above at `dummy_failure_jump' about the
04961              two zeroes.  */
04962           PUSH_FAILURE_POINT (NULL, NULL, -2);
04963           break;
04964 
04965         /* Have to succeed matching what follows at least n times.
04966            After that, handle like `on_failure_jump'.  */
04967         case succeed_n:
04968           EXTRACT_NUMBER (mcnt, p + 2);
04969           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04970 
04971           assert (mcnt >= 0);
04972           /* Originally, this is how many times we HAVE to succeed.  */
04973           if (mcnt > 0)
04974             {
04975                mcnt--;
04976            p += 2;
04977                STORE_NUMBER_AND_INCR (p, mcnt);
04978 #ifdef _LIBC
04979                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
04980 #else
04981                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
04982 #endif
04983             }
04984       else if (mcnt == 0)
04985             {
04986 #ifdef _LIBC
04987               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
04988 #else
04989               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
04990 #endif
04991           p[2] = (unsigned char) no_op;
04992               p[3] = (unsigned char) no_op;
04993               goto on_failure;
04994             }
04995           break;
04996 
04997         case jump_n:
04998           EXTRACT_NUMBER (mcnt, p + 2);
04999           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
05000 
05001           /* Originally, this is how many times we CAN jump.  */
05002           if (mcnt)
05003             {
05004                mcnt--;
05005                STORE_NUMBER (p + 2, mcnt);
05006 #ifdef _LIBC
05007                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
05008 #else
05009                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
05010 #endif
05011            goto unconditional_jump;
05012             }
05013           /* If don't have to jump any more, skip over the rest of command.  */
05014       else
05015         p += 4;
05016           break;
05017 
05018     case set_number_at:
05019       {
05020             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
05021 
05022             EXTRACT_NUMBER_AND_INCR (mcnt, p);
05023             p1 = p + mcnt;
05024             EXTRACT_NUMBER_AND_INCR (mcnt, p);
05025 #ifdef _LIBC
05026             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
05027 #else
05028             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
05029 #endif
05030         STORE_NUMBER (p1, mcnt);
05031             break;
05032           }
05033 
05034 #if 0
05035     /* The DEC Alpha C compiler 3.x generates incorrect code for the
05036        test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
05037        AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
05038        macro and introducing temporary variables works around the bug.  */
05039 
05040     case wordbound:
05041       DEBUG_PRINT1 ("EXECUTING wordbound.\n");
05042       if (AT_WORD_BOUNDARY (d))
05043         break;
05044       goto fail;
05045 
05046     case notwordbound:
05047       DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
05048       if (AT_WORD_BOUNDARY (d))
05049         goto fail;
05050       break;
05051 #else
05052     case wordbound:
05053     {
05054       boolean prevchar, thischar;
05055 
05056       DEBUG_PRINT1 ("EXECUTING wordbound.\n");
05057       if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
05058         break;
05059 
05060       prevchar = WORDCHAR_P (d - 1);
05061       thischar = WORDCHAR_P (d);
05062       if (prevchar != thischar)
05063         break;
05064       goto fail;
05065     }
05066 
05067       case notwordbound:
05068     {
05069       boolean prevchar, thischar;
05070 
05071       DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
05072       if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
05073         goto fail;
05074 
05075       prevchar = WORDCHAR_P (d - 1);
05076       thischar = WORDCHAR_P (d);
05077       if (prevchar != thischar)
05078         goto fail;
05079       break;
05080     }
05081 #endif
05082 
05083     case wordbeg:
05084           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
05085       if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
05086         break;
05087           goto fail;
05088 
05089     case wordend:
05090           DEBUG_PRINT1 ("EXECUTING wordend.\n");
05091       if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
05092               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
05093         break;
05094           goto fail;
05095 
05096 #ifdef emacs
05097     case before_dot:
05098           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
05099       if (PTR_CHAR_POS ((unsigned char *) d) >= point)
05100         goto fail;
05101       break;
05102 
05103     case at_dot:
05104           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
05105       if (PTR_CHAR_POS ((unsigned char *) d) != point)
05106         goto fail;
05107       break;
05108 
05109     case after_dot:
05110           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
05111           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
05112         goto fail;
05113       break;
05114 
05115     case syntaxspec:
05116           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
05117       mcnt = *p++;
05118       goto matchsyntax;
05119 
05120         case wordchar:
05121           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
05122       mcnt = (int) Sword;
05123         matchsyntax:
05124       PREFETCH ();
05125       /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05126       d++;
05127       if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
05128         goto fail;
05129           SET_REGS_MATCHED ();
05130       break;
05131 
05132     case notsyntaxspec:
05133           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
05134       mcnt = *p++;
05135       goto matchnotsyntax;
05136 
05137         case notwordchar:
05138           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
05139       mcnt = (int) Sword;
05140         matchnotsyntax:
05141       PREFETCH ();
05142       /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05143       d++;
05144       if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
05145         goto fail;
05146       SET_REGS_MATCHED ();
05147           break;
05148 
05149 #else /* not emacs */
05150     case wordchar:
05151           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
05152       PREFETCH ();
05153           if (!WORDCHAR_P (d))
05154             goto fail;
05155       SET_REGS_MATCHED ();
05156           d++;
05157       break;
05158 
05159     case notwordchar:
05160           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
05161       PREFETCH ();
05162       if (WORDCHAR_P (d))
05163             goto fail;
05164           SET_REGS_MATCHED ();
05165           d++;
05166       break;
05167 #endif /* not emacs */
05168 
05169         default:
05170           abort ();
05171     }
05172       continue;  /* Successfully executed one pattern command; keep going.  */
05173 
05174 
05175     /* We goto here if a matching operation fails. */
05176     fail:
05177       if (!FAIL_STACK_EMPTY ())
05178     { /* A restart point is known.  Restore to that state.  */
05179           DEBUG_PRINT1 ("\nFAIL:\n");
05180           POP_FAILURE_POINT (d, p,
05181                              lowest_active_reg, highest_active_reg,
05182                              regstart, regend, reg_info);
05183 
05184           /* If this failure point is a dummy, try the next one.  */
05185           if (!p)
05186         goto fail;
05187 
05188           /* If we failed to the end of the pattern, don't examine *p.  */
05189       assert (p <= pend);
05190           if (p < pend)
05191             {
05192               boolean is_a_jump_n = false;
05193 
05194               /* If failed to a backwards jump that's part of a repetition
05195                  loop, need to pop this failure point and use the next one.  */
05196               switch ((re_opcode_t) *p)
05197                 {
05198                 case jump_n:
05199                   is_a_jump_n = true;
05200                 case maybe_pop_jump:
05201                 case pop_failure_jump:
05202                 case jump:
05203                   p1 = p + 1;
05204                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05205                   p1 += mcnt;
05206 
05207                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
05208                       || (!is_a_jump_n
05209                           && (re_opcode_t) *p1 == on_failure_jump))
05210                     goto fail;
05211                   break;
05212                 default:
05213                   /* do nothing */ ;
05214                 }
05215             }
05216 
05217           if (d >= string1 && d <= end1)
05218         dend = end_match_1;
05219         }
05220       else
05221         break;   /* Matching at this starting point really fails.  */
05222     } /* for (;;) */
05223 
05224   if (best_regs_set)
05225     goto restore_best_regs;
05226 
05227   FREE_VARIABLES ();
05228 
05229   return -1;                    /* Failure to match.  */
05230 } /* re_match_2 */
05231 
05232 /* Subroutine definitions for re_match_2.  */
05233 
05234 
05235 /* We are passed P pointing to a register number after a start_memory.
05236 
05237    Return true if the pattern up to the corresponding stop_memory can
05238    match the empty string, and false otherwise.
05239 
05240    If we find the matching stop_memory, sets P to point to one past its number.
05241    Otherwise, sets P to an undefined byte less than or equal to END.
05242 
05243    We don't handle duplicates properly (yet).  */
05244 
05245 static boolean
05246 group_match_null_string_p (p, end, reg_info)
05247     unsigned char **p, *end;
05248     register_info_type *reg_info;
05249 {
05250   int mcnt;
05251   /* Point to after the args to the start_memory.  */
05252   unsigned char *p1 = *p + 2;
05253 
05254   while (p1 < end)
05255     {
05256       /* Skip over opcodes that can match nothing, and return true or
05257      false, as appropriate, when we get to one that can't, or to the
05258          matching stop_memory.  */
05259 
05260       switch ((re_opcode_t) *p1)
05261         {
05262         /* Could be either a loop or a series of alternatives.  */
05263         case on_failure_jump:
05264           p1++;
05265           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05266 
05267           /* If the next operation is not a jump backwards in the
05268          pattern.  */
05269 
05270       if (mcnt >= 0)
05271         {
05272               /* Go through the on_failure_jumps of the alternatives,
05273                  seeing if any of the alternatives cannot match nothing.
05274                  The last alternative starts with only a jump,
05275                  whereas the rest start with on_failure_jump and end
05276                  with a jump, e.g., here is the pattern for `a|b|c':
05277 
05278                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
05279                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
05280                  /exactn/1/c
05281 
05282                  So, we have to first go through the first (n-1)
05283                  alternatives and then deal with the last one separately.  */
05284 
05285 
05286               /* Deal with the first (n-1) alternatives, which start
05287                  with an on_failure_jump (see above) that jumps to right
05288                  past a jump_past_alt.  */
05289 
05290               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
05291                 {
05292                   /* `mcnt' holds how many bytes long the alternative
05293                      is, including the ending `jump_past_alt' and
05294                      its number.  */
05295 
05296                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
05297                                       reg_info))
05298                     return false;
05299 
05300                   /* Move to right after this alternative, including the
05301              jump_past_alt.  */
05302                   p1 += mcnt;
05303 
05304                   /* Break if it's the beginning of an n-th alternative
05305                      that doesn't begin with an on_failure_jump.  */
05306                   if ((re_opcode_t) *p1 != on_failure_jump)
05307                     break;
05308 
05309           /* Still have to check that it's not an n-th
05310              alternative that starts with an on_failure_jump.  */
05311           p1++;
05312                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05313                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
05314                     {
05315               /* Get to the beginning of the n-th alternative.  */
05316                       p1 -= 3;
05317                       break;
05318                     }
05319                 }
05320 
05321               /* Deal with the last alternative: go back and get number
05322                  of the `jump_past_alt' just before it.  `mcnt' contains
05323                  the length of the alternative.  */
05324               EXTRACT_NUMBER (mcnt, p1 - 2);
05325 
05326               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
05327                 return false;
05328 
05329               p1 += mcnt;   /* Get past the n-th alternative.  */
05330             } /* if mcnt > 0 */
05331           break;
05332 
05333 
05334         case stop_memory:
05335       assert (p1[1] == **p);
05336           *p = p1 + 2;
05337           return true;
05338 
05339 
05340         default:
05341           if (!common_op_match_null_string_p (&p1, end, reg_info))
05342             return false;
05343         }
05344     } /* while p1 < end */
05345 
05346   return false;
05347 } /* group_match_null_string_p */
05348 
05349 
05350 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
05351    It expects P to be the first byte of a single alternative and END one
05352    byte past the last. The alternative can contain groups.  */
05353 
05354 static boolean
05355 alt_match_null_string_p (p, end, reg_info)
05356     unsigned char *p, *end;
05357     register_info_type *reg_info;
05358 {
05359   int mcnt;
05360   unsigned char *p1 = p;
05361 
05362   while (p1 < end)
05363     {
05364       /* Skip over opcodes that can match nothing, and break when we get
05365          to one that can't.  */
05366 
05367       switch ((re_opcode_t) *p1)
05368         {
05369     /* It's a loop.  */
05370         case on_failure_jump:
05371           p1++;
05372           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05373           p1 += mcnt;
05374           break;
05375 
05376     default:
05377           if (!common_op_match_null_string_p (&p1, end, reg_info))
05378             return false;
05379         }
05380     }  /* while p1 < end */
05381 
05382   return true;
05383 } /* alt_match_null_string_p */
05384 
05385 
05386 /* Deals with the ops common to group_match_null_string_p and
05387    alt_match_null_string_p.
05388 
05389    Sets P to one after the op and its arguments, if any.  */
05390 
05391 static boolean
05392 common_op_match_null_string_p (p, end, reg_info)
05393     unsigned char **p, *end;
05394     register_info_type *reg_info;
05395 {
05396   int mcnt;
05397   boolean ret;
05398   int reg_no;
05399   unsigned char *p1 = *p;
05400 
05401   switch ((re_opcode_t) *p1++)
05402     {
05403     case no_op:
05404     case begline:
05405     case endline:
05406     case begbuf:
05407     case endbuf:
05408     case wordbeg:
05409     case wordend:
05410     case wordbound:
05411     case notwordbound:
05412 #ifdef emacs
05413     case before_dot:
05414     case at_dot:
05415     case after_dot:
05416 #endif
05417       break;
05418 
05419     case start_memory:
05420       reg_no = *p1;
05421       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05422       ret = group_match_null_string_p (&p1, end, reg_info);
05423 
05424       /* Have to set this here in case we're checking a group which
05425          contains a group and a back reference to it.  */
05426 
05427       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05428         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05429 
05430       if (!ret)
05431         return false;
05432       break;
05433 
05434     /* If this is an optimized succeed_n for zero times, make the jump.  */
05435     case jump:
05436       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05437       if (mcnt >= 0)
05438         p1 += mcnt;
05439       else
05440         return false;
05441       break;
05442 
05443     case succeed_n:
05444       /* Get to the number of times to succeed.  */
05445       p1 += 2;
05446       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05447 
05448       if (mcnt == 0)
05449         {
05450           p1 -= 4;
05451           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05452           p1 += mcnt;
05453         }
05454       else
05455         return false;
05456       break;
05457 
05458     case duplicate:
05459       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05460         return false;
05461       break;
05462 
05463     case set_number_at:
05464       p1 += 4;
05465 
05466     default:
05467       /* All other opcodes mean we cannot match the empty string.  */
05468       return false;
05469   }
05470 
05471   *p = p1;
05472   return true;
05473 } /* common_op_match_null_string_p */
05474 
05475 
05476 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
05477    bytes; nonzero otherwise.  */
05478 
05479 static int
05480 bcmp_translate (s1, s2, len, translate)
05481      const char *s1, *s2;
05482      register int len;
05483      RE_TRANSLATE_TYPE translate;
05484 {
05485   register const unsigned char *p1 = (const unsigned char *) s1;
05486   register const unsigned char *p2 = (const unsigned char *) s2;
05487   while (len)
05488     {
05489       if (translate[*p1++] != translate[*p2++]) return 1;
05490       len--;
05491     }
05492   return 0;
05493 }
05494 
05495 /* Entry points for GNU code.  */
05496 
05497 /* re_compile_pattern is the GNU regular expression compiler: it
05498    compiles PATTERN (of length SIZE) and puts the result in BUFP.
05499    Returns 0 if the pattern was valid, otherwise an error string.
05500 
05501    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
05502    are set in BUFP on entry.
05503 
05504    We call regex_compile to do the actual compilation.  */
05505 
05506 const char *
05507 re_compile_pattern (pattern, length, bufp)
05508      const char *pattern;
05509      size_t length;
05510      struct re_pattern_buffer *bufp;
05511 {
05512   reg_errcode_t ret;
05513 
05514   /* GNU code is written to assume at least RE_NREGS registers will be set
05515      (and at least one extra will be -1).  */
05516   bufp->regs_allocated = REGS_UNALLOCATED;
05517 
05518   /* And GNU code determines whether or not to get register information
05519      by passing null for the REGS argument to re_match, etc., not by
05520      setting no_sub.  */
05521   bufp->no_sub = 0;
05522 
05523   /* Match anchors at newline.  */
05524   bufp->newline_anchor = 1;
05525 
05526   ret = regex_compile (pattern, length, re_syntax_options, bufp);
05527 
05528   if (!ret)
05529     return NULL;
05530   return gettext (re_error_msgid[(int) ret]);
05531 }
05532 #ifdef _LIBC
05533 weak_alias (__re_compile_pattern, re_compile_pattern)
05534 #endif
05535 
05536 /* Entry points compatible with 4.2 BSD regex library.  We don't define
05537    them unless specifically requested.  */
05538 
05539 #if defined _REGEX_RE_COMP || defined _LIBC
05540 
05541 /* BSD has one and only one pattern buffer.  */
05542 static struct re_pattern_buffer re_comp_buf;
05543 
05544 char *
05545 #ifdef _LIBC
05546 /* Make these definitions weak in libc, so POSIX programs can redefine
05547    these names if they don't use our functions, and still use
05548    regcomp/regexec below without link errors.  */
05549 weak_function
05550 #endif
05551 re_comp (s)
05552     const char *s;
05553 {
05554   reg_errcode_t ret;
05555 
05556   if (!s)
05557     {
05558       if (!re_comp_buf.buffer)
05559     return gettext ("No previous regular expression");
05560       return 0;
05561     }
05562 
05563   if (!re_comp_buf.buffer)
05564     {
05565       re_comp_buf.buffer = (unsigned char *) malloc (200);
05566       if (re_comp_buf.buffer == NULL)
05567         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
05568       re_comp_buf.allocated = 200;
05569 
05570       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05571       if (re_comp_buf.fastmap == NULL)
05572     return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
05573     }
05574 
05575   /* Since `re_exec' always passes NULL for the `regs' argument, we
05576      don't need to initialize the pattern buffer fields which affect it.  */
05577 
05578   /* Match anchors at newlines.  */
05579   re_comp_buf.newline_anchor = 1;
05580 
05581   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05582 
05583   if (!ret)
05584     return NULL;
05585 
05586   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
05587   return (char *) gettext (re_error_msgid[(int) ret]);
05588 }
05589 
05590 
05591 int
05592 #ifdef _LIBC
05593 weak_function
05594 #endif
05595 re_exec (s)
05596     const char *s;
05597 {
05598   const int len = strlen (s);
05599   return
05600     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05601 }
05602 
05603 #endif /* _REGEX_RE_COMP */
05604 
05605 /* POSIX.2 functions.  Don't define these for Emacs.  */
05606 
05607 #ifndef emacs
05608 
05609 /* regcomp takes a regular expression as a string and compiles it.
05610 
05611    PREG is a regex_t *.  We do not expect any fields to be initialized,
05612    since POSIX says we shouldn't.  Thus, we set
05613 
05614      `buffer' to the compiled pattern;
05615      `used' to the length of the compiled pattern;
05616      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
05617        REG_EXTENDED bit in CFLAGS is set; otherwise, to
05618        RE_SYNTAX_POSIX_BASIC;
05619      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
05620      `fastmap' and `fastmap_accurate' to zero;
05621      `re_nsub' to the number of subexpressions in PATTERN.
05622 
05623    PATTERN is the address of the pattern string.
05624 
05625    CFLAGS is a series of bits which affect compilation.
05626 
05627      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
05628      use POSIX basic syntax.
05629 
05630      If REG_NEWLINE is set, then . and [^...] don't match newline.
05631      Also, regexec will try a match beginning after every newline.
05632 
05633      If REG_ICASE is set, then we considers upper- and lowercase
05634      versions of letters to be equivalent when matching.
05635 
05636      If REG_NOSUB is set, then when PREG is passed to regexec, that
05637      routine will report only success or failure, and nothing about the
05638      registers.
05639 
05640    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
05641    the return codes and their meanings.)  */
05642 
05643 int
05644 regcomp (preg, pattern, cflags)
05645     regex_t *preg;
05646     const char *pattern;
05647     int cflags;
05648 {
05649   reg_errcode_t ret;
05650   reg_syntax_t syntax
05651     = (cflags & REG_EXTENDED) ?
05652       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05653 
05654   /* regex_compile will allocate the space for the compiled pattern.  */
05655   preg->buffer = 0;
05656   preg->allocated = 0;
05657   preg->used = 0;
05658 
05659   /* Don't bother to use a fastmap when searching.  This simplifies the
05660      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
05661      characters after newlines into the fastmap.  This way, we just try
05662      every character.  */
05663   preg->fastmap = 0;
05664 
05665   if (cflags & REG_ICASE)
05666     {
05667       unsigned i;
05668 
05669       preg->translate
05670     = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
05671                       * sizeof (*(RE_TRANSLATE_TYPE)0));
05672       if (preg->translate == NULL)
05673         return (int) REG_ESPACE;
05674 
05675       /* Map uppercase characters to corresponding lowercase ones.  */
05676       for (i = 0; i < CHAR_SET_SIZE; i++)
05677         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05678     }
05679   else
05680     preg->translate = NULL;
05681 
05682   /* If REG_NEWLINE is set, newlines are treated differently.  */
05683   if (cflags & REG_NEWLINE)
05684     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
05685       syntax &= ~RE_DOT_NEWLINE;
05686       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05687       /* It also changes the matching behavior.  */
05688       preg->newline_anchor = 1;
05689     }
05690   else
05691     preg->newline_anchor = 0;
05692 
05693   preg->no_sub = !!(cflags & REG_NOSUB);
05694 
05695   /* POSIX says a null character in the pattern terminates it, so we
05696      can use strlen here in compiling the pattern.  */
05697   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05698 
05699   /* POSIX doesn't distinguish between an unmatched open-group and an
05700      unmatched close-group: both are REG_EPAREN.  */
05701   if (ret == REG_ERPAREN) ret = REG_EPAREN;
05702 
05703   return (int) ret;
05704 }
05705 #ifdef _LIBC
05706 weak_alias (__regcomp, regcomp)
05707 #endif
05708 
05709 
05710 /* regexec searches for a given pattern, specified by PREG, in the
05711    string STRING.
05712 
05713    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
05714    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
05715    least NMATCH elements, and we set them to the offsets of the
05716    corresponding matched substrings.
05717 
05718    EFLAGS specifies `execution flags' which affect matching: if
05719    REG_NOTBOL is set, then ^ does not match at the beginning of the
05720    string; if REG_NOTEOL is set, then $ does not match at the end.
05721 
05722    We return 0 if we find a match and REG_NOMATCH if not.  */
05723 
05724 int
05725 regexec (preg, string, nmatch, pmatch, eflags)
05726     const regex_t *preg;
05727     const char *string;
05728     size_t nmatch;
05729     regmatch_t pmatch[];
05730     int eflags;
05731 {
05732   int ret;
05733   struct re_registers regs;
05734   regex_t private_preg;
05735   int len = strlen (string);
05736   boolean want_reg_info = !preg->no_sub && nmatch > 0;
05737 
05738   private_preg = *preg;
05739 
05740   private_preg.not_bol = !!(eflags & REG_NOTBOL);
05741   private_preg.not_eol = !!(eflags & REG_NOTEOL);
05742 
05743   /* The user has told us exactly how many registers to return
05744      information about, via `nmatch'.  We have to pass that on to the
05745      matching routines.  */
05746   private_preg.regs_allocated = REGS_FIXED;
05747 
05748   if (want_reg_info)
05749     {
05750       regs.num_regs = nmatch;
05751       regs.start = TALLOC (nmatch, regoff_t);
05752       regs.end = TALLOC (nmatch, regoff_t);
05753       if (regs.start == NULL || regs.end == NULL)
05754         return (int) REG_NOMATCH;
05755     }
05756 
05757   /* Perform the searching operation.  */
05758   ret = re_search (&private_preg, string, len,
05759                    /* start: */ 0, /* range: */ len,
05760                    want_reg_info ? &regs : (struct re_registers *) 0);
05761 
05762   /* Copy the register information to the POSIX structure.  */
05763   if (want_reg_info)
05764     {
05765       if (ret >= 0)
05766         {
05767           unsigned r;
05768 
05769           for (r = 0; r < nmatch; r++)
05770             {
05771               pmatch[r].rm_so = regs.start[r];
05772               pmatch[r].rm_eo = regs.end[r];
05773             }
05774         }
05775 
05776       /* If we needed the temporary register info, free the space now.  */
05777       free (regs.start);
05778       free (regs.end);
05779     }
05780 
05781   /* We want zero return to mean success, unlike `re_search'.  */
05782   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05783 }
05784 #ifdef _LIBC
05785 weak_alias (__regexec, regexec)
05786 #endif
05787 
05788 
05789 /* Returns a message corresponding to an error code, ERRCODE, returned
05790    from either regcomp or regexec.   We don't use PREG here.  */
05791 
05792 size_t
05793 regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
05794 {
05795   const char *msg;
05796   size_t msg_size;
05797 
05798   if (errcode < 0
05799       || errcode >= (int) (sizeof (re_error_msgid)
05800                / sizeof (re_error_msgid[0])))
05801     /* Only error codes returned by the rest of the code should be passed
05802        to this routine.  If we are given anything else, or if other regex
05803        code generates an invalid error code, then the program has a bug.
05804        Dump core so we can fix it.  */
05805     abort ();
05806 
05807   msg = gettext (re_error_msgid[errcode]);
05808 
05809   msg_size = strlen (msg) + 1; /* Includes the null.  */
05810 
05811   if (errbuf_size != 0)
05812     {
05813       if (msg_size > errbuf_size)
05814         {
05815 #if defined HAVE_MEMPCPY || defined _LIBC
05816       *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
05817 #else
05818           memcpy (errbuf, msg, errbuf_size - 1);
05819           errbuf[errbuf_size - 1] = 0;
05820 #endif
05821         }
05822       else
05823         memcpy (errbuf, msg, msg_size);
05824     }
05825 
05826   return msg_size;
05827 }
05828 #ifdef _LIBC
05829 weak_alias (__regerror, regerror)
05830 #endif
05831 
05832 
05833 /* Free dynamically allocated space used by PREG.  */
05834 
05835 void
05836 regfree (preg)
05837     regex_t *preg;
05838 {
05839   if (preg->buffer != NULL)
05840     free (preg->buffer);
05841   preg->buffer = NULL;
05842 
05843   preg->allocated = 0;
05844   preg->used = 0;
05845 
05846   if (preg->fastmap != NULL)
05847     free (preg->fastmap);
05848   preg->fastmap = NULL;
05849   preg->fastmap_accurate = 0;
05850 
05851   if (preg->translate != NULL)
05852     free (preg->translate);
05853   preg->translate = NULL;
05854 }
05855 #ifdef _LIBC
05856 weak_alias (__regfree, regfree)
05857 #endif
05858 
05859 #endif /* not emacs  */

Generated on Sat May 26 2012 04:22:00 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.