ReactOS  r76032
match.c
Go to the documentation of this file.
1 /* @(#)match.c 1.27 17/08/13 Copyright 1985, 1995-2017 J. Schilling */
2 #include <schily/standard.h>
3 #include <schily/patmatch.h>
4 #define POSIX_CLASS /* Support [[:alpha:]] by default */
5 #ifdef NO_POSIX_CLASS /* Allow to disable [[:alpha:]] */
6 #undef POSIX_CLASS
7 #endif
8 #ifdef POSIX_CLASS
9 #include <schily/wchar.h> /* With [[:alpha:]], we need wctype() */
10 #include <schily/wctype.h> /* and thus wchar.h and wctype.h */
11 #endif
12 /*
13  * Pattern matching functions
14  *
15  * Copyright (c) 1985, 1995-2017 J. Schilling
16  */
17 /*
18  * The contents of this file are subject to the terms of the
19  * Common Development and Distribution License, Version 1.0 only
20  * (the "License"). You may not use this file except in compliance
21  * with the License.
22  *
23  * See the file CDDL.Schily.txt in this distribution for details.
24  * A copy of the CDDL is also available via the Internet at
25  * http://www.opensource.org/licenses/cddl1.txt
26  *
27  * When distributing Covered Code, include this CDDL HEADER in each
28  * file and include the License file CDDL.Schily.txt from this distribution.
29  */
30 /*
31  * The pattern matching functions below are based on the algorithm
32  * presented by Martin Richards in:
33  *
34  * "A Compact Function for Regular Expression Pattern Matching",
35  * Software-Practice and Experience, Vol. 9, 527-534 (1979)
36  *
37  * Several changes have been made to the original source which has been
38  * written in BCPL:
39  *
40  * '/' is replaced by '!' (to allow UNIX filenames)
41  * '(',')' are replaced by '{', '}'
42  * '\'' is replaced by '\\' (UNIX compatible quote)
43  *
44  * Character classes have been added to allow "[<character list>]"
45  * to be used.
46  * POSIX features like [[:alpha:]] have been added.
47  * Start of line '^' and end of line '$' have been added.
48  */
49 
50 #undef CHAR
51 
52 #ifdef __LINE_MATCH
53 #ifdef __WIDE_CHAR
54 #define patmatch patwlmatch
55 #else
56 #define opatmatch opatlmatch
57 #define patmatch patlmatch
58 #endif
59 #endif
60 
61 #ifdef __WIDE_CHAR
62 #ifndef __LINE_MATCH
63 #define patcompile patwcompile
64 #define patmatch patwmatch
65 #endif
66 #define CHAR wchar_t
67 #define PCHAR wchar_t
68 #endif
69 
70 #ifdef __MB_CHAR
71 #undef patmatch
72 #ifdef __LINE_MATCH
73 #define patmatch patmblmatch
74 #else
75 #define patmatch patmbmatch
76 #endif
77 #define PCHAR wchar_t
78 #endif
79 
80 #ifndef CHAR
81 typedef unsigned char Uchar;
82 #define DID_UCHAR_TYPE
83 #define CHAR Uchar
84 #endif
85 
86 #ifndef PCHAR
87 #ifndef DID_UCHAR_TYPE
88 typedef unsigned char Uchar;
89 #endif
90 #define PCHAR Uchar
91 #endif
92 
93 #define ENDSTATE (-1)
94 
95 #define CL_SIZE 32 /* Max size for '[: :]' */
96 
97 /*
98  * The Interpreter
99  */
100 
101 
102 /*
103  * put adds a new state to the active list
104  */
105 #define put(ret, state, sp, n) { \
106  register int *lstate = state; \
107  register int *lsp = sp; \
108  register int ln = n; \
109  \
110  while (lstate < lsp) { \
111  if (*lstate++ == ln) { \
112  ret = lsp; \
113  lsp = 0; \
114  break; \
115  } \
116  } \
117  if (lsp) { \
118  *lstate++ = ln; \
119  ret = lstate; \
120  } \
121 }
122 
123 
124 /*
125  * match a character in class
126  *
127  * Syntax errors do not appear here, they are handled by the compiler,
128  * so in theory we could remove the "return (0)" statements from the
129  * the POSIX class code.
130  */
131 #ifdef POSIX_CLASS
132 #define CHK_POSIX_CLASS \
133  else if (*lpat == LCLASS) { \
134  if (lpat[1] == ':') { \
135  char class[CL_SIZE+1]; \
136  char *pc = class; \
137  \
138  lpat += 2; /* Eat ':' */ \
139  for (;;) { \
140  if (*lpat == '\0') { \
141  ok = FALSE; \
142  goto out; \
143  } \
144  if (*lpat == ':' && lpat[1] == RCLASS) \
145  break; \
146  if (pc >= &class[CL_SIZE]) { \
147  ok = FALSE; \
148  goto out; \
149  } \
150  *pc++ = *lpat++; \
151  } \
152  if (pc == class) { \
153  ok = FALSE; \
154  goto out; \
155  } \
156  *pc = '\0'; \
157  lpat += 2; /* Skip ":]" */ \
158  if (iswctype(lc, wctype(class))) { \
159  ok = !ok; \
160  goto out; \
161  } \
162  continue; \
163  } \
164  }
165 #else
166 #define CHK_POSIX_CLASS
167 #endif
168 #define in_class(found, pat, c) { \
169  register const PCHAR *lpat = pat; \
170  register int lc = c; \
171  int lo_bound; \
172  int hi_bound; \
173  BOOL ok = FALSE; \
174  \
175  if (*lpat == NOT) { \
176  lpat++; \
177  ok = TRUE; \
178  } \
179  while (*lpat != RCLASS) { \
180  if (*lpat == QUOTE) \
181  lpat++; \
182  CHK_POSIX_CLASS \
183  lo_bound = *lpat++; \
184  if (*lpat == RANGE) { \
185  lpat++; \
186  if (*lpat == QUOTE) \
187  lpat++; \
188  hi_bound = *lpat++; \
189  } else { \
190  hi_bound = lo_bound; \
191  } \
192  if (lo_bound <= lc && lc <= hi_bound) { \
193  ok = !ok; \
194  goto out; \
195  } \
196  } \
197 out: \
198  found = ok; \
199 }
200 
201 /*
202  * opatmatch - the old external interpreter interface.
203  *
204  * Trys to match a string beginning at offset
205  * against the compiled pattern.
206  */
207 #if !defined(__WIDE_CHAR) && !defined(__MB_CHAR)
208 EXPORT CHAR
209 *opatmatch(pat, aux, str, soff, slen, alt)
210  const PCHAR *pat;
211  const int *aux;
212  const CHAR *str;
213  int soff;
214  int slen;
215  int alt;
216 {
217  int state[MAXPAT];
218 
219  return (patmatch(pat, aux, str, soff, slen, alt, state));
220 }
221 #endif
222 
223 /*
224  * patmatch - the external interpreter interface.
225  *
226  * Trys to match a string beginning at offset
227  * against the compiled pattern.
228  */
229 EXPORT CHAR *
230 patmatch(pat, aux, str, soff, slen, alt, state)
231  const PCHAR *pat;
232  const int *aux;
233  const CHAR *str;
234  int soff;
235  int slen;
236  int alt;
237  int state[];
238 {
239  register int *sp;
240  register int *n;
241  register int *i;
242  register int p;
243  register int q, s, k;
244 #ifdef __MB_CHAR
245  wchar_t c;
246  int mlen = 1;
247 #else
248  int c;
249 #endif
250  const CHAR *lastp = (CHAR *)NULL;
251 
252 #ifdef __LINE_MATCH
253 for (; soff <= slen; soff++) {
254 #endif
255 
256  sp = state;
257  put(sp, state, state, 0);
258  if (alt != ENDSTATE)
259  put(sp, state, sp, alt);
260 
261 #ifdef __MB_CHAR
262  mbtowc(NULL, NULL, 0);
263  for (s = soff; ; s += mlen) {
264 #else
265  for (s = soff; ; s++) {
266 #endif
267  /*
268  * next char from input string
269  */
270  if (s >= slen) {
271  c = 0;
272  } else {
273 #ifdef __MB_CHAR
274  mlen = mbtowc(&c, (char *)str, slen - s);
275  if (mlen < 0) {
276  mbtowc(NULL, NULL, 0);
277  c = str[s];
278  mlen = 1;
279  }
280 #else
281  c = str[s];
282 #endif
283  }
284  /*
285  * first complete the closure
286  */
287  for (n = state; n < sp; ) {
288  p = *n++; /* next state number */
289  if (p == ENDSTATE)
290  continue;
291  q = aux[p]; /* get next state for pat */
292  k = pat[p]; /* get next char from pat */
293  switch (k) {
294 
295  case REP:
296  put(sp, state, sp, p+1);
297  /* FALLTHRU */
298  case NIL: /* NIL matches always */
299  case STAR:
300  put(sp, state, sp, q);
301  break;
302  case LBRACK: /* alternations */
303  case ALT:
304  put(sp, state, sp, p+1);
305  if (q != ENDSTATE)
306  put(sp, state, sp, q);
307  break;
308  case START:
309  if (s == 0)
310  put(sp, state, sp, q);
311  break;
312  case END:
313  if (c == '\0')
314  put(sp, state, sp, q);
315  break;
316  }
317  }
318 
319  for (i = state; i < sp; ) {
320  if (*i++ == ENDSTATE) {
321  lastp = &str[s];
322  break;
323  }
324  }
325  if (c == 0)
326  return ((CHAR *)lastp);
327 
328  /*
329  * now try to match next character
330  */
331  n = sp;
332  sp = state;
333  for (i = sp; i < n; ) {
334  p = *i++; /* next active state number */
335  if (p == ENDSTATE)
336  continue;
337  k = pat[p];
338  switch (k) {
339 
340  case ALT:
341  case REP:
342  case NIL:
343  case LBRACK:
344  case START:
345  case END:
346  continue;
347  case LCLASS:
348  in_class(q, &pat[p+1], c);
349  if (!q)
350  continue;
351  break;
352  case STAR:
353  put(sp, state, sp, p);
354  continue;
355  case QUOTE:
356  k = pat[p+1];
357  default:
358  if (k != c)
359  continue;
360  /* FALLTHRU */
361  case ANY:
362  break;
363  }
364  put(sp, state, sp, aux[p]);
365  }
366  if (sp == state) { /* if no new states return */
367 #ifdef __LINE_MATCH
368 
369  if (lastp || (soff == slen - 1))
370  return ((CHAR *)lastp);
371  else
372  break;
373 #else
374  return ((CHAR *)lastp);
375 #endif
376  }
377  }
378 #ifdef __LINE_MATCH
379 }
380 return ((CHAR *)lastp);
381 #endif
382 }
383 
384 
385 #if !defined(__LINE_MATCH) && !defined(__MB_CHAR)
386 /*
387  * The Compiler
388  */
389 
390 typedef struct args {
391  const PCHAR *pattern;
392  int *aux;
393  int patp;
394  int length;
396 } arg_t;
397 
398 LOCAL void nextitem __PR((arg_t *));
399 LOCAL int prim __PR((arg_t *));
400 LOCAL int expr __PR((arg_t *, int *));
401 LOCAL void setexits __PR((int *, int, int));
402 LOCAL int join __PR((int *, int, int));
403 
404 /*
405  * 'read' the next character from pattern
406  */
407 #define rch(ap) \
408 { \
409  if (++(ap)->patp >= (ap)->length) \
410  (ap)->Ch = 0; \
411  else \
412  (ap)->Ch = (ap)->pattern[(ap)->patp]; \
413 }
414 
415 /*
416  * 'peek' the next character from pattern
417  */
418 #define pch(ap) \
419  ((((ap)->patp + 1) >= (ap)->length) ? \
420  0 \
421  : \
422  (ap)->pattern[(ap)->patp+1]) \
423 
424 /*
425  * get the next item from pattern
426  */
427 LOCAL void
429  arg_t *ap;
430 {
431  if (ap->Ch == QUOTE)
432  rch(ap);
433  rch(ap);
434 }
435 
436 /*
437  * parse a primary
438  */
439 LOCAL int
441  arg_t *ap;
442 {
443  int a = ap->patp;
444  int op = ap->Ch;
445  int t;
446 
447  nextitem(ap);
448  switch (op) {
449 
450  case '\0':
451  case ALT:
452  case RBRACK:
453  return (ENDSTATE);
454  case LCLASS:
455  while (ap->Ch != RCLASS && ap->Ch != '\0') {
456 #ifdef POSIX_CLASS
457  if (ap->Ch == LCLASS) {
458  if (pch(ap) == ':') { /* [:alpha:] */
459  char class[CL_SIZE+1];
460  char *pc = class;
461 
462  nextitem(ap);
463  nextitem(ap);
464  while (ap->Ch != ':' &&
465  ap->Ch != '\0') {
466  if (pc > &class[CL_SIZE])
467  return (ENDSTATE);
468  *pc = ap->Ch;
469  if (*pc++ != ap->Ch)
470  return (ENDSTATE);
471  nextitem(ap);
472  }
473  if (pc == class)
474  return (ENDSTATE);
475  *pc = '\0';
476  if (ap->Ch == '\0')
477  return (ENDSTATE);
478  if (wctype(class) == 0)
479  return (ENDSTATE);
480  nextitem(ap);
481  }
482  if (ap->Ch != RCLASS)
483  return (ENDSTATE);
484  }
485 #endif
486  nextitem(ap);
487  }
488  if (ap->Ch == '\0')
489  return (ENDSTATE);
490  nextitem(ap);
491  break;
492  case REP:
493  t = prim(ap);
494  if (t == ENDSTATE)
495  return (ENDSTATE);
496  setexits(ap->aux, t, a);
497  break;
498  case LBRACK:
499  a = expr(ap, &ap->aux[a]);
500  if (a == ENDSTATE || ap->Ch != RBRACK)
501  return (ENDSTATE);
502  nextitem(ap);
503  break;
504  }
505  return (a);
506 }
507 
508 /*
509  * parse an expression (a sequence of primaries)
510  */
511 LOCAL int
512 expr(ap, altp)
513  arg_t *ap;
514  int *altp;
515 {
516  int exits = ENDSTATE;
517  int a;
518  int *aux = ap->aux;
519  PCHAR Ch;
520 
521  for (;;) {
522  a = prim(ap);
523  if (a == ENDSTATE)
524  return (ENDSTATE);
525  Ch = ap->Ch;
526  if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
527  exits = join(aux, exits, a);
528  if (Ch != ALT)
529  return (exits);
530  *altp = ap->patp;
531  altp = &aux[ap->patp];
532  nextitem(ap);
533  } else
534  setexits(aux, a, ap->patp);
535  }
536 }
537 
538 /*
539  * set all exits in a list to a specified value
540  */
541 LOCAL void
543  int *aux;
544  int list;
545  int val;
546 {
547  int a;
548 
549  while (list != ENDSTATE) {
550  a = aux[list];
551  aux[list] = val;
552  list = a;
553  }
554 }
555 
556 /*
557  * concatenate two lists
558  */
559 LOCAL int
560 join(aux, a, b)
561  int *aux;
562  int a;
563  int b;
564 {
565  int t;
566 
567  if (a == ENDSTATE)
568  return (b);
569  t = a;
570  while (aux[t] != ENDSTATE)
571  t = aux[t];
572  aux[t] = b;
573  return (a);
574 }
575 
576 /*
577  * patcompile - the external compiler interface.
578  *
579  * The pattern is compiled into the aux array.
580  * Return value on success, is the outermost alternate which is != 0.
581  * Error is indicated by return of 0.
582  */
583 EXPORT int
584 patcompile(pat, len, aux)
585  const PCHAR *pat;
586  int len;
587  int *aux;
588 {
589  arg_t a;
590  int alt = ENDSTATE;
591  int i;
592 
593  a.pattern = pat;
594  a.length = len;
595  a.aux = aux;
596  a.patp = -1;
597 
598  for (i = 0; i < len; i++)
599  aux[i] = ENDSTATE;
600  rch(&a);
601  i = expr(&a, &alt);
602  if (i == ENDSTATE)
603  return (0);
604  setexits(aux, i, ENDSTATE);
605  return (alt);
606 }
607 #endif /* !defined(__LINE_MATCH) && !defined(__MB_CHAR) */
LOCAL void nextitem(arg_t *ap)
Definition: match.c:428
signed char * PCHAR
Definition: retypes.h:7
const PCHAR * pattern
Definition: match.c:391
LOCAL int expr(arg_t *ap, int *altp)
Definition: match.c:512
#define rch(ap)
Definition: match.c:407
#define QUOTE
Definition: kbdpo.c:223
char CHAR
Definition: xmlstorage.h:175
EXPORT CHAR * patmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt, state) const
Definition: match.c:230
GLdouble GLdouble t
Definition: gl.h:2047
#define ENDSTATE
Definition: match.c:93
#define in_class(found, pat, c)
Definition: match.c:168
#define Ch(x, y, z)
Definition: sha2.c:141
Definition: match.c:390
Definition: query.h:74
PCHAR Ch
Definition: match.c:395
#define pch(ap)
Definition: match.c:418
#define RCLASS
Definition: patmatch.h:60
#define put(ret, state, sp, n)
Definition: match.c:105
GLuint n
Definition: s_context.h:57
GLenum GLclampf GLint i
Definition: glfuncs.h:14
UINT op
Definition: effect.c:219
#define a
Definition: ke_i.h:78
int length
Definition: match.c:394
#define RBRACK
Definition: patmatch.h:58
const WCHAR * str
smooth NULL
Definition: ftsmooth.c:557
unsigned char Uchar
Definition: match.c:81
EXPORT int patcompile(PCHAR *pat, int len, int *aux) const
Definition: match.c:584
LOCAL int join(int *aux, int a, int b)
Definition: match.c:560
LOCAL void nextitem __PR((arg_t *))
#define b
Definition: ke_i.h:79
#define LOCAL(type)
Definition: jmorecfg.h:289
GLuint GLfloat * val
Definition: glext.h:7180
Definition: infcodes.c:25
#define ALT
Definition: patmatch.h:51
#define const
Definition: zconf.h:230
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
#define MAXPAT
Definition: patmatch.h:79
#define mbtowc(wp, cp, len)
Definition: wchar.h:155
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
#define LCLASS
Definition: patmatch.h:59
static int state
Definition: maze.c:121
GLdouble s
Definition: gl.h:2039
Definition: _list.h:228
GLenum GLsizei len
Definition: glext.h:6722
EXPORT CHAR * opatmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt) const
Definition: match.c:209
#define CL_SIZE
Definition: match.c:95
Definition: infcodes.c:17
#define REP
Definition: assyntax.h:594
wctype_t __cdecl wctype(const char *)
#define ANY
Definition: patmatch.h:55
#define NIL
Definition: trio.c:109
BSTR alt
LOCAL int prim(arg_t *ap)
Definition: match.c:440
static const WCHAR aux[]
void int int ULONGLONG int va_list * ap
Definition: winesup.h:32
#define c
Definition: ke_i.h:80
#define list
Definition: rosglue.h:35
static const WCHAR sp[]
Definition: suminfo.c:288
LOCAL void setexits(int *aux, int list, int val)
Definition: match.c:542
CONST PIXELFORMATDESCRIPTOR *typedef int
Definition: wingl.c:32
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
int k
Definition: mpi.c:3369
int patp
Definition: match.c:393
struct args arg_t
#define LBRACK
Definition: patmatch.h:57
int * aux
Definition: match.c:392