ReactOS  0.4.12-dev-685-gf36cbf7
match.c
Go to the documentation of this file.
1 /* @(#)match.c 1.27 17/08/13 Copyright 1985, 1995-2017 J. Schilling */
2 #include <schily/standard.h>
3 #include <schily/patmatch.h>
4 #define POSIX_CLASS /* Support [[:alpha:]] by default */
5 #ifdef NO_POSIX_CLASS /* Allow to disable [[:alpha:]] */
6 #undef POSIX_CLASS
7 #endif
8 #ifdef POSIX_CLASS
9 #include <schily/wchar.h> /* With [[:alpha:]], we need wctype() */
10 #include <schily/wctype.h> /* and thus wchar.h and wctype.h */
11 #endif
12 /*
13  * Pattern matching functions
14  *
15  * Copyright (c) 1985, 1995-2017 J. Schilling
16  */
17 /*
18  * The contents of this file are subject to the terms of the
19  * Common Development and Distribution License, Version 1.0 only
20  * (the "License"). You may not use this file except in compliance
21  * with the License.
22  *
23  * See the file CDDL.Schily.txt in this distribution for details.
24  * A copy of the CDDL is also available via the Internet at
25  * http://www.opensource.org/licenses/cddl1.txt
26  *
27  * When distributing Covered Code, include this CDDL HEADER in each
28  * file and include the License file CDDL.Schily.txt from this distribution.
29  */
30 /*
31  * The pattern matching functions below are based on the algorithm
32  * presented by Martin Richards in:
33  *
34  * "A Compact Function for Regular Expression Pattern Matching",
35  * Software-Practice and Experience, Vol. 9, 527-534 (1979)
36  *
37  * Several changes have been made to the original source which has been
38  * written in BCPL:
39  *
40  * '/' is replaced by '!' (to allow UNIX filenames)
41  * '(',')' are replaced by '{', '}'
42  * '\'' is replaced by '\\' (UNIX compatible quote)
43  *
44  * Character classes have been added to allow "[<character list>]"
45  * to be used.
46  * POSIX features like [[:alpha:]] have been added.
47  * Start of line '^' and end of line '$' have been added.
48  */
49 
50 #undef CHAR
51 
52 #ifdef __LINE_MATCH
53 #ifdef __WIDE_CHAR
54 #define patmatch patwlmatch
55 #else
56 #define opatmatch opatlmatch
57 #define patmatch patlmatch
58 #endif
59 #endif
60 
61 #ifdef __WIDE_CHAR
62 #ifndef __LINE_MATCH
63 #define patcompile patwcompile
64 #define patmatch patwmatch
65 #endif
66 #define CHAR wchar_t
67 #define PCHAR wchar_t
68 #endif
69 
70 #ifdef __MB_CHAR
71 #undef patmatch
72 #ifdef __LINE_MATCH
73 #define patmatch patmblmatch
74 #else
75 #define patmatch patmbmatch
76 #endif
77 #define PCHAR wchar_t
78 #endif
79 
80 #ifndef CHAR
81 typedef unsigned char Uchar;
82 #define DID_UCHAR_TYPE
83 #define CHAR Uchar
84 #endif
85 
86 #ifndef PCHAR
87 #ifndef DID_UCHAR_TYPE
88 typedef unsigned char Uchar;
89 #endif
90 #define PCHAR Uchar
91 #endif
92 
93 #define ENDSTATE (-1)
94 
95 #define CL_SIZE 32 /* Max size for '[: :]' */
96 
97 /*
98  * The Interpreter
99  */
100 
101 
102 /*
103  * put adds a new state to the active list
104  */
105 #define put(ret, state, sp, n) { \
106  register int *lstate = state; \
107  register int *lsp = sp; \
108  register int ln = n; \
109  \
110  while (lstate < lsp) { \
111  if (*lstate++ == ln) { \
112  ret = lsp; \
113  lsp = 0; \
114  break; \
115  } \
116  } \
117  if (lsp) { \
118  *lstate++ = ln; \
119  ret = lstate; \
120  } \
121 }
122 
123 
124 /*
125  * match a character in class
126  *
127  * Syntax errors do not appear here, they are handled by the compiler,
128  * so in theory we could remove the "return (0)" statements from the
129  * the POSIX class code.
130  */
131 #ifdef POSIX_CLASS
132 #define CHK_POSIX_CLASS \
133  else if (*lpat == LCLASS) { \
134  if (lpat[1] == ':') { \
135  char class[CL_SIZE+1]; \
136  char *pc = class; \
137  \
138  lpat += 2; /* Eat ':' */ \
139  for (;;) { \
140  if (*lpat == '\0') { \
141  ok = FALSE; \
142  goto out; \
143  } \
144  if (*lpat == ':' && lpat[1] == RCLASS) \
145  break; \
146  if (pc >= &class[CL_SIZE]) { \
147  ok = FALSE; \
148  goto out; \
149  } \
150  *pc++ = *lpat++; \
151  } \
152  if (pc == class) { \
153  ok = FALSE; \
154  goto out; \
155  } \
156  *pc = '\0'; \
157  lpat += 2; /* Skip ":]" */ \
158  if (iswctype(lc, wctype(class))) { \
159  ok = !ok; \
160  goto out; \
161  } \
162  continue; \
163  } \
164  }
165 #else
166 #define CHK_POSIX_CLASS
167 #endif
168 #define in_class(found, pat, c) { \
169  register const PCHAR *lpat = pat; \
170  register int lc = c; \
171  int lo_bound; \
172  int hi_bound; \
173  BOOL ok = FALSE; \
174  \
175  if (*lpat == NOT) { \
176  lpat++; \
177  ok = TRUE; \
178  } \
179  while (*lpat != RCLASS) { \
180  if (*lpat == QUOTE) \
181  lpat++; \
182  CHK_POSIX_CLASS \
183  lo_bound = *lpat++; \
184  if (*lpat == RANGE) { \
185  lpat++; \
186  if (*lpat == QUOTE) \
187  lpat++; \
188  hi_bound = *lpat++; \
189  } else { \
190  hi_bound = lo_bound; \
191  } \
192  if (lo_bound <= lc && lc <= hi_bound) { \
193  ok = !ok; \
194  goto out; \
195  } \
196  } \
197 out: \
198  found = ok; \
199 }
200 
201 /*
202  * opatmatch - the old external interpreter interface.
203  *
204  * Trys to match a string beginning at offset
205  * against the compiled pattern.
206  */
207 #if !defined(__WIDE_CHAR) && !defined(__MB_CHAR)
208 EXPORT CHAR
209 *opatmatch(pat, aux, str, soff, slen, alt)
210  const PCHAR *pat;
211  const int *aux;
212  const CHAR *str;
213  int soff;
214  int slen;
215  int alt;
216 {
217  int state[MAXPAT];
218 
219  return (patmatch(pat, aux, str, soff, slen, alt, state));
220 }
221 #endif
222 
223 /*
224  * patmatch - the external interpreter interface.
225  *
226  * Trys to match a string beginning at offset
227  * against the compiled pattern.
228  */
229 EXPORT CHAR *
230 patmatch(pat, aux, str, soff, slen, alt, state)
231  const PCHAR *pat;
232  const int *aux;
233  const CHAR *str;
234  int soff;
235  int slen;
236  int alt;
237  int state[];
238 {
239  register int *sp;
240  register int *n;
241  register int *i;
242  register int p;
243  register int q, s, k;
244 #ifdef __MB_CHAR
245  wchar_t c;
246  int mlen = 1;
247 #else
248  int c;
249 #endif
250  const CHAR *lastp = (CHAR *)NULL;
251 
252 #ifdef __LINE_MATCH
253 for (; soff <= slen; soff++) {
254 #endif
255 
256  sp = state;
257  put(sp, state, state, 0);
258  if (alt != ENDSTATE)
259  put(sp, state, sp, alt);
260 
261 #ifdef __MB_CHAR
262  mbtowc(NULL, NULL, 0);
263  for (s = soff; ; s += mlen) {
264 #else
265  for (s = soff; ; s++) {
266 #endif
267  /*
268  * next char from input string
269  */
270  if (s >= slen) {
271  c = 0;
272  } else {
273 #ifdef __MB_CHAR
274  mlen = mbtowc(&c, (char *)str, slen - s);
275  if (mlen < 0) {
276  mbtowc(NULL, NULL, 0);
277  c = str[s];
278  mlen = 1;
279  }
280 #else
281  c = str[s];
282 #endif
283  }
284  /*
285  * first complete the closure
286  */
287  for (n = state; n < sp; ) {
288  p = *n++; /* next state number */
289  if (p == ENDSTATE)
290  continue;
291  q = aux[p]; /* get next state for pat */
292  k = pat[p]; /* get next char from pat */
293  switch (k) {
294 
295  case REP:
296  put(sp, state, sp, p+1);
297  /* FALLTHRU */
298  case NIL: /* NIL matches always */
299  case STAR:
300  put(sp, state, sp, q);
301  break;
302  case LBRACK: /* alternations */
303  case ALT:
304  put(sp, state, sp, p+1);
305  if (q != ENDSTATE)
306  put(sp, state, sp, q);
307  break;
308  case START:
309  if (s == 0)
310  put(sp, state, sp, q);
311  break;
312  case END:
313  if (c == '\0')
314  put(sp, state, sp, q);
315  break;
316  }
317  }
318 
319  for (i = state; i < sp; ) {
320  if (*i++ == ENDSTATE) {
321  lastp = &str[s];
322  break;
323  }
324  }
325  if (c == 0)
326  return ((CHAR *)lastp);
327 
328  /*
329  * now try to match next character
330  */
331  n = sp;
332  sp = state;
333  for (i = sp; i < n; ) {
334  p = *i++; /* next active state number */
335  if (p == ENDSTATE)
336  continue;
337  k = pat[p];
338  switch (k) {
339 
340  case ALT:
341  case REP:
342  case NIL:
343  case LBRACK:
344  case START:
345  case END:
346  continue;
347  case LCLASS:
348  in_class(q, &pat[p+1], c);
349  if (!q)
350  continue;
351  break;
352  case STAR:
353  put(sp, state, sp, p);
354  continue;
355  case QUOTE:
356  k = pat[p+1];
357  default:
358  if (k != c)
359  continue;
360  /* FALLTHRU */
361  case ANY:
362  break;
363  }
364  put(sp, state, sp, aux[p]);
365  }
366  if (sp == state) { /* if no new states return */
367 #ifdef __LINE_MATCH
368 
369  if (lastp || (soff == slen - 1))
370  return ((CHAR *)lastp);
371  else
372  break;
373 #else
374  return ((CHAR *)lastp);
375 #endif
376  }
377  }
378 #ifdef __LINE_MATCH
379 }
380 return ((CHAR *)lastp);
381 #endif
382 }
383 
384 
385 #if !defined(__LINE_MATCH) && !defined(__MB_CHAR)
386 /*
387  * The Compiler
388  */
389 
390 typedef struct args {
391  const PCHAR *pattern;
392  int *aux;
393  int patp;
394  int length;
396 } arg_t;
397 
398 LOCAL void nextitem __PR((arg_t *));
399 LOCAL int prim __PR((arg_t *));
400 LOCAL int expr __PR((arg_t *, int *));
401 LOCAL void setexits __PR((int *, int, int));
402 LOCAL int join __PR((int *, int, int));
403 
404 /*
405  * 'read' the next character from pattern
406  */
407 #define rch(ap) \
408 { \
409  if (++(ap)->patp >= (ap)->length) \
410  (ap)->Ch = 0; \
411  else \
412  (ap)->Ch = (ap)->pattern[(ap)->patp]; \
413 }
414 
415 /*
416  * 'peek' the next character from pattern
417  */
418 #define pch(ap) \
419  ((((ap)->patp + 1) >= (ap)->length) ? \
420  0 \
421  : \
422  (ap)->pattern[(ap)->patp+1]) \
423 
424 /*
425  * get the next item from pattern
426  */
427 LOCAL void
429  arg_t *ap;
430 {
431  if (ap->Ch == QUOTE)
432  rch(ap);
433  rch(ap);
434 }
435 
436 /*
437  * parse a primary
438  */
439 LOCAL int
441  arg_t *ap;
442 {
443  int a = ap->patp;
444  int op = ap->Ch;
445  int t;
446 
447  nextitem(ap);
448  switch (op) {
449 
450  case '\0':
451  case ALT:
452  case RBRACK:
453  return (ENDSTATE);
454  case LCLASS:
455  while (ap->Ch != RCLASS && ap->Ch != '\0') {
456 #ifdef POSIX_CLASS
457  if (ap->Ch == LCLASS) {
458  if (pch(ap) == ':') { /* [:alpha:] */
459  char class[CL_SIZE+1];
460  char *pc = class;
461 
462  nextitem(ap);
463  nextitem(ap);
464  while (ap->Ch != ':' &&
465  ap->Ch != '\0') {
466  if (pc > &class[CL_SIZE])
467  return (ENDSTATE);
468  *pc = ap->Ch;
469  if (*pc++ != ap->Ch)
470  return (ENDSTATE);
471  nextitem(ap);
472  }
473  if (pc == class)
474  return (ENDSTATE);
475  *pc = '\0';
476  if (ap->Ch == '\0')
477  return (ENDSTATE);
478  if (wctype(class) == 0)
479  return (ENDSTATE);
480  nextitem(ap);
481  }
482  if (ap->Ch != RCLASS)
483  return (ENDSTATE);
484  }
485 #endif
486  nextitem(ap);
487  }
488  if (ap->Ch == '\0')
489  return (ENDSTATE);
490  nextitem(ap);
491  break;
492  case REP:
493  t = prim(ap);
494  if (t == ENDSTATE)
495  return (ENDSTATE);
496  setexits(ap->aux, t, a);
497  break;
498  case LBRACK:
499  a = expr(ap, &ap->aux[a]);
500  if (a == ENDSTATE || ap->Ch != RBRACK)
501  return (ENDSTATE);
502  nextitem(ap);
503  break;
504  }
505  return (a);
506 }
507 
508 /*
509  * parse an expression (a sequence of primaries)
510  */
511 LOCAL int
512 expr(ap, altp)
513  arg_t *ap;
514  int *altp;
515 {
516  int exits = ENDSTATE;
517  int a;
518  int *aux = ap->aux;
519  PCHAR Ch;
520 
521  for (;;) {
522  a = prim(ap);
523  if (a == ENDSTATE)
524  return (ENDSTATE);
525  Ch = ap->Ch;
526  if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
527  exits = join(aux, exits, a);
528  if (Ch != ALT)
529  return (exits);
530  *altp = ap->patp;
531  altp = &aux[ap->patp];
532  nextitem(ap);
533  } else
534  setexits(aux, a, ap->patp);
535  }
536 }
537 
538 /*
539  * set all exits in a list to a specified value
540  */
541 LOCAL void
543  int *aux;
544  int list;
545  int val;
546 {
547  int a;
548 
549  while (list != ENDSTATE) {
550  a = aux[list];
551  aux[list] = val;
552  list = a;
553  }
554 }
555 
556 /*
557  * concatenate two lists
558  */
559 LOCAL int
561  int *aux;
562  int a;
563  int b;
564 {
565  int t;
566 
567  if (a == ENDSTATE)
568  return (b);
569  t = a;
570  while (aux[t] != ENDSTATE)
571  t = aux[t];
572  aux[t] = b;
573  return (a);
574 }
575 
576 /*
577  * patcompile - the external compiler interface.
578  *
579  * The pattern is compiled into the aux array.
580  * Return value on success, is the outermost alternate which is != 0.
581  * Error is indicated by return of 0.
582  */
583 EXPORT int
585  const PCHAR *pat;
586  int len;
587  int *aux;
588 {
589  arg_t a;
590  int alt = ENDSTATE;
591  int i;
592 
593  a.pattern = pat;
594  a.length = len;
595  a.aux = aux;
596  a.patp = -1;
597 
598  for (i = 0; i < len; i++)
599  aux[i] = ENDSTATE;
600  rch(&a);
601  i = expr(&a, &alt);
602  if (i == ENDSTATE)
603  return (0);
604  setexits(aux, i, ENDSTATE);
605  return (alt);
606 }
607 #endif /* !defined(__LINE_MATCH) && !defined(__MB_CHAR) */
LOCAL void nextitem(arg_t *ap)
Definition: match.c:428
signed char * PCHAR
Definition: retypes.h:7
const PCHAR * pattern
Definition: match.c:391
LOCAL int expr(arg_t *ap, int *altp)
Definition: match.c:512
#define rch(ap)
Definition: match.c:407
#define QUOTE
Definition: kbdpo.c:223
char CHAR
Definition: xmlstorage.h:175
GLdouble n
Definition: glext.h:7729
EXPORT CHAR * patmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt, state) const
Definition: match.c:230
GLdouble GLdouble t
Definition: gl.h:2047
#define ENDSTATE
Definition: match.c:93
#define in_class(found, pat, c)
Definition: match.c:168
#define Ch(x, y, z)
Definition: sha2.c:141
Definition: match.c:390
Definition: query.h:86
PCHAR Ch
Definition: match.c:395
#define pch(ap)
Definition: match.c:418
#define RCLASS
Definition: patmatch.h:60
#define put(ret, state, sp, n)
Definition: match.c:105
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define REP
Definition: patmatch.h:52
#define a
Definition: ke_i.h:78
int length
Definition: match.c:394
#define RBRACK
Definition: patmatch.h:58
const WCHAR * str
struct star STAR
smooth NULL
Definition: ftsmooth.c:416
unsigned char Uchar
Definition: match.c:81
EXPORT int patcompile(PCHAR *pat, int len, int *aux) const
Definition: match.c:584
LOCAL int join(int *aux, int a, int b)
Definition: match.c:560
LOCAL void nextitem __PR((arg_t *))
#define b
Definition: ke_i.h:79
#define LOCAL(type)
Definition: jmorecfg.h:289
GLuint GLfloat * val
Definition: glext.h:7180
Definition: infcodes.c:25
#define ALT
Definition: patmatch.h:51
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
#define MAXPAT
Definition: patmatch.h:79
const GLubyte * c
Definition: glext.h:8905
#define mbtowc(wp, cp, len)
Definition: wchar.h:155
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
#define LCLASS
Definition: patmatch.h:59
static int state
Definition: maze.c:121
GLenum GLsizei len
Definition: glext.h:6722
GLdouble s
Definition: gl.h:2039
Definition: _list.h:228
EXPORT CHAR * opatmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt) const
Definition: match.c:209
#define CL_SIZE
Definition: match.c:95
Definition: infcodes.c:17
wctype_t __cdecl wctype(const char *)
#define ANY
Definition: patmatch.h:55
#define list
Definition: rosglue.h:35
#define NIL
Definition: trio.c:109
LOCAL int prim(arg_t *ap)
Definition: match.c:440
static const WCHAR aux[]
void int int ULONGLONG int va_list * ap
Definition: winesup.h:32
#define c
Definition: ke_i.h:80
static const WCHAR sp[]
Definition: suminfo.c:288
LOCAL void setexits(int *aux, int list, int val)
Definition: match.c:542
UINT op
Definition: effect.c:223
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
int k
Definition: mpi.c:3369
int patp
Definition: match.c:393
struct args arg_t
#define LBRACK
Definition: patmatch.h:57
int * aux
Definition: match.c:392