wildmatch.con commit wildmatch: make dowild() take arbitrary flags (0c52816)
   1/*
   2**  Do shell-style pattern matching for ?, \, [], and * characters.
   3**  It is 8bit clean.
   4**
   5**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
   6**  Rich $alz is now <rsalz@bbn.com>.
   7**
   8**  Modified by Wayne Davison to special-case '/' matching, to make '**'
   9**  work differently than '*', and to fix the character-class code.
  10*/
  11
  12#include "cache.h"
  13#include "wildmatch.h"
  14
  15typedef unsigned char uchar;
  16
  17/* What character marks an inverted character class? */
  18#define NEGATE_CLASS    '!'
  19#define NEGATE_CLASS2   '^'
  20
  21#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
  22                                    && *(class) == *(litmatch) \
  23                                    && strncmp((char*)class, litmatch, len) == 0)
  24
  25#if defined STDC_HEADERS || !defined isascii
  26# define ISASCII(c) 1
  27#else
  28# define ISASCII(c) isascii(c)
  29#endif
  30
  31#ifdef isblank
  32# define ISBLANK(c) (ISASCII(c) && isblank(c))
  33#else
  34# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
  35#endif
  36
  37#ifdef isgraph
  38# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
  39#else
  40# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
  41#endif
  42
  43#define ISPRINT(c) (ISASCII(c) && isprint(c))
  44#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
  45#define ISALNUM(c) (ISASCII(c) && isalnum(c))
  46#define ISALPHA(c) (ISASCII(c) && isalpha(c))
  47#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
  48#define ISLOWER(c) (ISASCII(c) && islower(c))
  49#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
  50#define ISSPACE(c) (ISASCII(c) && isspace(c))
  51#define ISUPPER(c) (ISASCII(c) && isupper(c))
  52#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
  53
  54/* Match pattern "p" against "text" */
  55static int dowild(const uchar *p, const uchar *text, unsigned int flags)
  56{
  57        uchar p_ch;
  58        const uchar *pattern = p;
  59
  60        for ( ; (p_ch = *p) != '\0'; text++, p++) {
  61                int matched, match_slash, negated;
  62                uchar t_ch, prev_ch;
  63                if ((t_ch = *text) == '\0' && p_ch != '*')
  64                        return WM_ABORT_ALL;
  65                if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
  66                        t_ch = tolower(t_ch);
  67                if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
  68                        p_ch = tolower(p_ch);
  69                switch (p_ch) {
  70                case '\\':
  71                        /* Literal match with following character.  Note that the test
  72                         * in "default" handles the p[1] == '\0' failure case. */
  73                        p_ch = *++p;
  74                        /* FALLTHROUGH */
  75                default:
  76                        if (t_ch != p_ch)
  77                                return WM_NOMATCH;
  78                        continue;
  79                case '?':
  80                        /* Match anything but '/'. */
  81                        if (t_ch == '/')
  82                                return WM_NOMATCH;
  83                        continue;
  84                case '*':
  85                        if (*++p == '*') {
  86                                const uchar *prev_p = p - 2;
  87                                while (*++p == '*') {}
  88                                if ((prev_p < pattern || *prev_p == '/') &&
  89                                    (*p == '\0' || *p == '/' ||
  90                                     (p[0] == '\\' && p[1] == '/'))) {
  91                                        /*
  92                                         * Assuming we already match 'foo/' and are at
  93                                         * <star star slash>, just assume it matches
  94                                         * nothing and go ahead match the rest of the
  95                                         * pattern with the remaining string. This
  96                                         * helps make foo/<*><*>/bar (<> because
  97                                         * otherwise it breaks C comment syntax) match
  98                                         * both foo/bar and foo/a/bar.
  99                                         */
 100                                        if (p[0] == '/' &&
 101                                            dowild(p + 1, text, flags) == WM_MATCH)
 102                                                return WM_MATCH;
 103                                        match_slash = 1;
 104                                } else
 105                                        return WM_ABORT_MALFORMED;
 106                        } else
 107                                match_slash = 0;
 108                        if (*p == '\0') {
 109                                /* Trailing "**" matches everything.  Trailing "*" matches
 110                                 * only if there are no more slash characters. */
 111                                if (!match_slash) {
 112                                        if (strchr((char*)text, '/') != NULL)
 113                                                return WM_NOMATCH;
 114                                }
 115                                return WM_MATCH;
 116                        }
 117                        while (1) {
 118                                if (t_ch == '\0')
 119                                        break;
 120                                if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
 121                                        if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
 122                                                return matched;
 123                                } else if (!match_slash && t_ch == '/')
 124                                        return WM_ABORT_TO_STARSTAR;
 125                                t_ch = *++text;
 126                        }
 127                        return WM_ABORT_ALL;
 128                case '[':
 129                        p_ch = *++p;
 130#ifdef NEGATE_CLASS2
 131                        if (p_ch == NEGATE_CLASS2)
 132                                p_ch = NEGATE_CLASS;
 133#endif
 134                        /* Assign literal 1/0 because of "matched" comparison. */
 135                        negated = p_ch == NEGATE_CLASS ? 1 : 0;
 136                        if (negated) {
 137                                /* Inverted character class. */
 138                                p_ch = *++p;
 139                        }
 140                        prev_ch = 0;
 141                        matched = 0;
 142                        do {
 143                                if (!p_ch)
 144                                        return WM_ABORT_ALL;
 145                                if (p_ch == '\\') {
 146                                        p_ch = *++p;
 147                                        if (!p_ch)
 148                                                return WM_ABORT_ALL;
 149                                        if (t_ch == p_ch)
 150                                                matched = 1;
 151                                } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
 152                                        p_ch = *++p;
 153                                        if (p_ch == '\\') {
 154                                                p_ch = *++p;
 155                                                if (!p_ch)
 156                                                        return WM_ABORT_ALL;
 157                                        }
 158                                        if (t_ch <= p_ch && t_ch >= prev_ch)
 159                                                matched = 1;
 160                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 161                                } else if (p_ch == '[' && p[1] == ':') {
 162                                        const uchar *s;
 163                                        int i;
 164                                        for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
 165                                        if (!p_ch)
 166                                                return WM_ABORT_ALL;
 167                                        i = p - s - 1;
 168                                        if (i < 0 || p[-1] != ':') {
 169                                                /* Didn't find ":]", so treat like a normal set. */
 170                                                p = s - 2;
 171                                                p_ch = '[';
 172                                                if (t_ch == p_ch)
 173                                                        matched = 1;
 174                                                continue;
 175                                        }
 176                                        if (CC_EQ(s,i, "alnum")) {
 177                                                if (ISALNUM(t_ch))
 178                                                        matched = 1;
 179                                        } else if (CC_EQ(s,i, "alpha")) {
 180                                                if (ISALPHA(t_ch))
 181                                                        matched = 1;
 182                                        } else if (CC_EQ(s,i, "blank")) {
 183                                                if (ISBLANK(t_ch))
 184                                                        matched = 1;
 185                                        } else if (CC_EQ(s,i, "cntrl")) {
 186                                                if (ISCNTRL(t_ch))
 187                                                        matched = 1;
 188                                        } else if (CC_EQ(s,i, "digit")) {
 189                                                if (ISDIGIT(t_ch))
 190                                                        matched = 1;
 191                                        } else if (CC_EQ(s,i, "graph")) {
 192                                                if (ISGRAPH(t_ch))
 193                                                        matched = 1;
 194                                        } else if (CC_EQ(s,i, "lower")) {
 195                                                if (ISLOWER(t_ch))
 196                                                        matched = 1;
 197                                        } else if (CC_EQ(s,i, "print")) {
 198                                                if (ISPRINT(t_ch))
 199                                                        matched = 1;
 200                                        } else if (CC_EQ(s,i, "punct")) {
 201                                                if (ISPUNCT(t_ch))
 202                                                        matched = 1;
 203                                        } else if (CC_EQ(s,i, "space")) {
 204                                                if (ISSPACE(t_ch))
 205                                                        matched = 1;
 206                                        } else if (CC_EQ(s,i, "upper")) {
 207                                                if (ISUPPER(t_ch))
 208                                                        matched = 1;
 209                                        } else if (CC_EQ(s,i, "xdigit")) {
 210                                                if (ISXDIGIT(t_ch))
 211                                                        matched = 1;
 212                                        } else /* malformed [:class:] string */
 213                                                return WM_ABORT_ALL;
 214                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 215                                } else if (t_ch == p_ch)
 216                                        matched = 1;
 217                        } while (prev_ch = p_ch, (p_ch = *++p) != ']');
 218                        if (matched == negated || t_ch == '/')
 219                                return WM_NOMATCH;
 220                        continue;
 221                }
 222        }
 223
 224        return *text ? WM_NOMATCH : WM_MATCH;
 225}
 226
 227/* Match the "pattern" against the "text" string. */
 228int wildmatch(const char *pattern, const char *text,
 229              unsigned int flags, struct wildopts *wo)
 230{
 231        return dowild((const uchar*)pattern, (const uchar*)text, flags);
 232}