ReactOS 0.4.16-dev-424-ge4748fe
match.c
Go to the documentation of this file.
1/* @(#)match.c 1.27 17/08/13 Copyright 1985, 1995-2017 J. Schilling */
2#include <schily/standard.h>
3#include <schily/patmatch.h>
4#define POSIX_CLASS /* Support [[:alpha:]] by default */
5#ifdef NO_POSIX_CLASS /* Allow to disable [[:alpha:]] */
6#undef POSIX_CLASS
7#endif
8#ifdef POSIX_CLASS
9#include <schily/wchar.h> /* With [[:alpha:]], we need wctype() */
10#include <schily/wctype.h> /* and thus wchar.h and wctype.h */
11#endif
12/*
13 * Pattern matching functions
14 *
15 * Copyright (c) 1985, 1995-2017 J. Schilling
16 */
17/*
18 * The contents of this file are subject to the terms of the
19 * Common Development and Distribution License, Version 1.0 only
20 * (the "License"). You may not use this file except in compliance
21 * with the License.
22 *
23 * See the file CDDL.Schily.txt in this distribution for details.
24 * A copy of the CDDL is also available via the Internet at
25 * http://www.opensource.org/licenses/cddl1.txt
26 *
27 * When distributing Covered Code, include this CDDL HEADER in each
28 * file and include the License file CDDL.Schily.txt from this distribution.
29 */
30/*
31 * The pattern matching functions below are based on the algorithm
32 * presented by Martin Richards in:
33 *
34 * "A Compact Function for Regular Expression Pattern Matching",
35 * Software-Practice and Experience, Vol. 9, 527-534 (1979)
36 *
37 * Several changes have been made to the original source which has been
38 * written in BCPL:
39 *
40 * '/' is replaced by '!' (to allow UNIX filenames)
41 * '(',')' are replaced by '{', '}'
42 * '\'' is replaced by '\\' (UNIX compatible quote)
43 *
44 * Character classes have been added to allow "[<character list>]"
45 * to be used.
46 * POSIX features like [[:alpha:]] have been added.
47 * Start of line '^' and end of line '$' have been added.
48 */
49
50#undef CHAR
51
52#ifdef __LINE_MATCH
53#ifdef __WIDE_CHAR
54#define patmatch patwlmatch
55#else
56#define opatmatch opatlmatch
57#define patmatch patlmatch
58#endif
59#endif
60
61#ifdef __WIDE_CHAR
62#ifndef __LINE_MATCH
63#define patcompile patwcompile
64#define patmatch patwmatch
65#endif
66#define CHAR wchar_t
67#define PCHAR wchar_t
68#endif
69
70#ifdef __MB_CHAR
71#undef patmatch
72#ifdef __LINE_MATCH
73#define patmatch patmblmatch
74#else
75#define patmatch patmbmatch
76#endif
77#define PCHAR wchar_t
78#endif
79
80#ifndef CHAR
81typedef unsigned char Uchar;
82#define DID_UCHAR_TYPE
83#define CHAR Uchar
84#endif
85
86#ifndef PCHAR
87#ifndef DID_UCHAR_TYPE
88typedef unsigned char Uchar;
89#endif
90#define PCHAR Uchar
91#endif
92
93#define ENDSTATE (-1)
94
95#define CL_SIZE 32 /* Max size for '[: :]' */
96
97/*
98 * The Interpreter
99 */
100
101
102/*
103 * put adds a new state to the active list
104 */
105#define put(ret, state, sp, n) { \
106 register int *lstate = state; \
107 register int *lsp = sp; \
108 register int ln = n; \
109 \
110 while (lstate < lsp) { \
111 if (*lstate++ == ln) { \
112 ret = lsp; \
113 lsp = 0; \
114 break; \
115 } \
116 } \
117 if (lsp) { \
118 *lstate++ = ln; \
119 ret = lstate; \
120 } \
121}
122
123
124/*
125 * match a character in class
126 *
127 * Syntax errors do not appear here, they are handled by the compiler,
128 * so in theory we could remove the "return (0)" statements from the
129 * the POSIX class code.
130 */
131#ifdef POSIX_CLASS
132#define CHK_POSIX_CLASS \
133 else if (*lpat == LCLASS) { \
134 if (lpat[1] == ':') { \
135 char class[CL_SIZE+1]; \
136 char *pc = class; \
137 \
138 lpat += 2; /* Eat ':' */ \
139 for (;;) { \
140 if (*lpat == '\0') { \
141 ok = FALSE; \
142 goto out; \
143 } \
144 if (*lpat == ':' && lpat[1] == RCLASS) \
145 break; \
146 if (pc >= &class[CL_SIZE]) { \
147 ok = FALSE; \
148 goto out; \
149 } \
150 *pc++ = *lpat++; \
151 } \
152 if (pc == class) { \
153 ok = FALSE; \
154 goto out; \
155 } \
156 *pc = '\0'; \
157 lpat += 2; /* Skip ":]" */ \
158 if (iswctype(lc, wctype(class))) { \
159 ok = !ok; \
160 goto out; \
161 } \
162 continue; \
163 } \
164 }
165#else
166#define CHK_POSIX_CLASS
167#endif
168#define in_class(found, pat, c) { \
169 register const PCHAR *lpat = pat; \
170 register int lc = c; \
171 int lo_bound; \
172 int hi_bound; \
173 BOOL ok = FALSE; \
174 \
175 if (*lpat == NOT) { \
176 lpat++; \
177 ok = TRUE; \
178 } \
179 while (*lpat != RCLASS) { \
180 if (*lpat == QUOTE) \
181 lpat++; \
182 CHK_POSIX_CLASS \
183 lo_bound = *lpat++; \
184 if (*lpat == RANGE) { \
185 lpat++; \
186 if (*lpat == QUOTE) \
187 lpat++; \
188 hi_bound = *lpat++; \
189 } else { \
190 hi_bound = lo_bound; \
191 } \
192 if (lo_bound <= lc && lc <= hi_bound) { \
193 ok = !ok; \
194 goto out; \
195 } \
196 } \
197out: \
198 found = ok; \
199}
200
201/*
202 * opatmatch - the old external interpreter interface.
203 *
204 * Trys to match a string beginning at offset
205 * against the compiled pattern.
206 */
207#if !defined(__WIDE_CHAR) && !defined(__MB_CHAR)
209*opatmatch(pat, aux, str, soff, slen, alt)
210 const PCHAR *pat;
211 const int *aux;
212 const CHAR *str;
213 int soff;
214 int slen;
215 int alt;
216{
217 int state[MAXPAT];
218
219 return (patmatch(pat, aux, str, soff, slen, alt, state));
220}
221#endif
222
223/*
224 * patmatch - the external interpreter interface.
225 *
226 * Trys to match a string beginning at offset
227 * against the compiled pattern.
228 */
229EXPORT CHAR *
230patmatch(pat, aux, str, soff, slen, alt, state)
231 const PCHAR *pat;
232 const int *aux;
233 const CHAR *str;
234 int soff;
235 int slen;
236 int alt;
237 int state[];
238{
239 register int *sp;
240 register int *n;
241 register int *i;
242 register int p;
243 register int q, s, k;
244#ifdef __MB_CHAR
245 wchar_t c;
246 int mlen = 1;
247#else
248 int c;
249#endif
250 const CHAR *lastp = (CHAR *)NULL;
251
252#ifdef __LINE_MATCH
253for (; soff <= slen; soff++) {
254#endif
255
256 sp = state;
257 put(sp, state, state, 0);
258 if (alt != ENDSTATE)
259 put(sp, state, sp, alt);
260
261#ifdef __MB_CHAR
262 mbtowc(NULL, NULL, 0);
263 for (s = soff; ; s += mlen) {
264#else
265 for (s = soff; ; s++) {
266#endif
267 /*
268 * next char from input string
269 */
270 if (s >= slen) {
271 c = 0;
272 } else {
273#ifdef __MB_CHAR
274 mlen = mbtowc(&c, (char *)str, slen - s);
275 if (mlen < 0) {
276 mbtowc(NULL, NULL, 0);
277 c = str[s];
278 mlen = 1;
279 }
280#else
281 c = str[s];
282#endif
283 }
284 /*
285 * first complete the closure
286 */
287 for (n = state; n < sp; ) {
288 p = *n++; /* next state number */
289 if (p == ENDSTATE)
290 continue;
291 q = aux[p]; /* get next state for pat */
292 k = pat[p]; /* get next char from pat */
293 switch (k) {
294
295 case REP:
296 put(sp, state, sp, p+1);
297 /* FALLTHRU */
298 case NIL: /* NIL matches always */
299 case STAR:
300 put(sp, state, sp, q);
301 break;
302 case LBRACK: /* alternations */
303 case ALT:
304 put(sp, state, sp, p+1);
305 if (q != ENDSTATE)
306 put(sp, state, sp, q);
307 break;
308 case START:
309 if (s == 0)
310 put(sp, state, sp, q);
311 break;
312 case END:
313 if (c == '\0')
314 put(sp, state, sp, q);
315 break;
316 }
317 }
318
319 for (i = state; i < sp; ) {
320 if (*i++ == ENDSTATE) {
321 lastp = &str[s];
322 break;
323 }
324 }
325 if (c == 0)
326 return ((CHAR *)lastp);
327
328 /*
329 * now try to match next character
330 */
331 n = sp;
332 sp = state;
333 for (i = sp; i < n; ) {
334 p = *i++; /* next active state number */
335 if (p == ENDSTATE)
336 continue;
337 k = pat[p];
338 switch (k) {
339
340 case ALT:
341 case REP:
342 case NIL:
343 case LBRACK:
344 case START:
345 case END:
346 continue;
347 case LCLASS:
348 in_class(q, &pat[p+1], c);
349 if (!q)
350 continue;
351 break;
352 case STAR:
353 put(sp, state, sp, p);
354 continue;
355 case QUOTE:
356 k = pat[p+1];
357 default:
358 if (k != c)
359 continue;
360 /* FALLTHRU */
361 case ANY:
362 break;
363 }
364 put(sp, state, sp, aux[p]);
365 }
366 if (sp == state) { /* if no new states return */
367#ifdef __LINE_MATCH
368
369 if (lastp || (soff == slen - 1))
370 return ((CHAR *)lastp);
371 else
372 break;
373#else
374 return ((CHAR *)lastp);
375#endif
376 }
377 }
378#ifdef __LINE_MATCH
379}
380return ((CHAR *)lastp);
381#endif
382}
383
384
385#if !defined(__LINE_MATCH) && !defined(__MB_CHAR)
386/*
387 * The Compiler
388 */
389
390typedef struct args {
392 int *aux;
393 int patp;
397
399LOCAL int prim __PR((arg_t *));
400LOCAL int expr __PR((arg_t *, int *));
401LOCAL void setexits __PR((int *, int, int));
402LOCAL int join __PR((int *, int, int));
403
404/*
405 * 'read' the next character from pattern
406 */
407#define rch(ap) \
408{ \
409 if (++(ap)->patp >= (ap)->length) \
410 (ap)->Ch = 0; \
411 else \
412 (ap)->Ch = (ap)->pattern[(ap)->patp]; \
413}
414
415/*
416 * 'peek' the next character from pattern
417 */
418#define pch(ap) \
419 ((((ap)->patp + 1) >= (ap)->length) ? \
420 0 \
421 : \
422 (ap)->pattern[(ap)->patp+1]) \
423
424/*
425 * get the next item from pattern
426 */
427LOCAL void
429 arg_t *ap;
430{
431 if (ap->Ch == QUOTE)
432 rch(ap);
433 rch(ap);
434}
435
436/*
437 * parse a primary
438 */
439LOCAL int
441 arg_t *ap;
442{
443 int a = ap->patp;
444 int op = ap->Ch;
445 int t;
446
447 nextitem(ap);
448 switch (op) {
449
450 case '\0':
451 case ALT:
452 case RBRACK:
453 return (ENDSTATE);
454 case LCLASS:
455 while (ap->Ch != RCLASS && ap->Ch != '\0') {
456#ifdef POSIX_CLASS
457 if (ap->Ch == LCLASS) {
458 if (pch(ap) == ':') { /* [:alpha:] */
459 char class[CL_SIZE+1];
460 char *pc = class;
461
462 nextitem(ap);
463 nextitem(ap);
464 while (ap->Ch != ':' &&
465 ap->Ch != '\0') {
466 if (pc > &class[CL_SIZE])
467 return (ENDSTATE);
468 *pc = ap->Ch;
469 if (*pc++ != ap->Ch)
470 return (ENDSTATE);
471 nextitem(ap);
472 }
473 if (pc == class)
474 return (ENDSTATE);
475 *pc = '\0';
476 if (ap->Ch == '\0')
477 return (ENDSTATE);
478 if (wctype(class) == 0)
479 return (ENDSTATE);
480 nextitem(ap);
481 }
482 if (ap->Ch != RCLASS)
483 return (ENDSTATE);
484 }
485#endif
486 nextitem(ap);
487 }
488 if (ap->Ch == '\0')
489 return (ENDSTATE);
490 nextitem(ap);
491 break;
492 case REP:
493 t = prim(ap);
494 if (t == ENDSTATE)
495 return (ENDSTATE);
496 setexits(ap->aux, t, a);
497 break;
498 case LBRACK:
499 a = expr(ap, &ap->aux[a]);
500 if (a == ENDSTATE || ap->Ch != RBRACK)
501 return (ENDSTATE);
502 nextitem(ap);
503 break;
504 }
505 return (a);
506}
507
508/*
509 * parse an expression (a sequence of primaries)
510 */
511LOCAL int
512expr(ap, altp)
513 arg_t *ap;
514 int *altp;
515{
516 int exits = ENDSTATE;
517 int a;
518 int *aux = ap->aux;
519 PCHAR Ch;
520
521 for (;;) {
522 a = prim(ap);
523 if (a == ENDSTATE)
524 return (ENDSTATE);
525 Ch = ap->Ch;
526 if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
527 exits = join(aux, exits, a);
528 if (Ch != ALT)
529 return (exits);
530 *altp = ap->patp;
531 altp = &aux[ap->patp];
532 nextitem(ap);
533 } else
534 setexits(aux, a, ap->patp);
535 }
536}
537
538/*
539 * set all exits in a list to a specified value
540 */
541LOCAL void
543 int *aux;
544 int list;
545 int val;
546{
547 int a;
548
549 while (list != ENDSTATE) {
550 a = aux[list];
551 aux[list] = val;
552 list = a;
553 }
554}
555
556/*
557 * concatenate two lists
558 */
559LOCAL int
561 int *aux;
562 int a;
563 int b;
564{
565 int t;
566
567 if (a == ENDSTATE)
568 return (b);
569 t = a;
570 while (aux[t] != ENDSTATE)
571 t = aux[t];
572 aux[t] = b;
573 return (a);
574}
575
576/*
577 * patcompile - the external compiler interface.
578 *
579 * The pattern is compiled into the aux array.
580 * Return value on success, is the outermost alternate which is != 0.
581 * Error is indicated by return of 0.
582 */
583EXPORT int
585 const PCHAR *pat;
586 int len;
587 int *aux;
588{
589 arg_t a;
590 int alt = ENDSTATE;
591 int i;
592
593 a.pattern = pat;
594 a.length = len;
595 a.aux = aux;
596 a.patp = -1;
597
598 for (i = 0; i < len; i++)
599 aux[i] = ENDSTATE;
600 rch(&a);
601 i = expr(&a, &alt);
602 if (i == ENDSTATE)
603 return (0);
605 return (alt);
606}
607#endif /* !defined(__LINE_MATCH) && !defined(__MB_CHAR) */
static int state
Definition: maze.c:121
Definition: list.h:37
#define NULL
Definition: types.h:112
UINT op
Definition: effect.c:236
GLdouble s
Definition: gl.h:2039
GLdouble GLdouble t
Definition: gl.h:2047
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
GLdouble n
Definition: glext.h:7729
const GLubyte * c
Definition: glext.h:8905
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLuint GLfloat * val
Definition: glext.h:7180
GLfloat GLfloat p
Definition: glext.h:8902
GLenum GLsizei len
Definition: glext.h:6722
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
wctype_t __cdecl wctype(const char *)
#define LOCAL(type)
Definition: jmorecfg.h:289
#define QUOTE
Definition: kbdpo.c:220
#define a
Definition: ke_i.h:78
#define c
Definition: ke_i.h:80
#define b
Definition: ke_i.h:79
static const WCHAR aux[]
#define put(ret, state, sp, n)
Definition: match.c:105
struct args arg_t
#define rch(ap)
Definition: match.c:407
EXPORT CHAR * patmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt, state) const
Definition: match.c:230
#define in_class(found, pat, c)
Definition: match.c:168
unsigned char Uchar
Definition: match.c:81
#define pch(ap)
Definition: match.c:418
LOCAL int prim(arg_t *ap)
Definition: match.c:440
EXPORT CHAR * opatmatch(PCHAR *pat, const int *aux, const CHAR *str, int soff, int slen, int alt) const
Definition: match.c:209
#define CL_SIZE
Definition: match.c:95
EXPORT int patcompile(PCHAR *pat, int len, int *aux) const
Definition: match.c:584
LOCAL void nextitem(arg_t *ap)
Definition: match.c:428
LOCAL int join(int *aux, int a, int b)
Definition: match.c:560
#define ENDSTATE
Definition: match.c:93
LOCAL void setexits(int *aux, int list, int val)
Definition: match.c:542
#define END
Definition: options.h:105
static const WCHAR sp[]
Definition: suminfo.c:287
int k
Definition: mpi.c:3369
#define LBRACK
Definition: patmatch.h:57
#define START
Definition: patmatch.h:63
#define REP
Definition: patmatch.h:52
#define ANY
Definition: patmatch.h:55
#define RCLASS
Definition: patmatch.h:60
#define LCLASS
Definition: patmatch.h:59
#define STAR
Definition: patmatch.h:54
#define ALT
Definition: patmatch.h:51
#define MAXPAT
Definition: patmatch.h:79
#define RBRACK
Definition: patmatch.h:58
#define __PR(a)
Definition: prototyp.h:106
#define list
Definition: rosglue.h:35
const WCHAR * str
#define Ch(x, y, z)
Definition: sha2.c:141
Definition: match.c:390
int length
Definition: match.c:394
int patp
Definition: match.c:393
int * aux
Definition: match.c:392
const PCHAR * pattern
Definition: match.c:391
PCHAR Ch
Definition: match.c:395
Definition: query.h:86
#define mbtowc(wp, cp, len)
Definition: wchar.h:155
#define NIL
Definition: trio.c:106
char * PCHAR
Definition: typedefs.h:51
void int int ULONGLONG int va_list * ap
Definition: winesup.h:36
char CHAR
Definition: xmlstorage.h:175