wildmatch.con commit rebase -i: rewrite init_basic_state() in C (d59cd14)
   1/*
   2**  Do shell-style pattern matching for ?, \, [], and * characters.
   3**  It is 8bit clean.
   4**
   5**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
   6**  Rich $alz is now <rsalz@bbn.com>.
   7**
   8**  Modified by Wayne Davison to special-case '/' matching, to make '**'
   9**  work differently than '*', and to fix the character-class code.
  10*/
  11
  12#include "cache.h"
  13#include "wildmatch.h"
  14
  15typedef unsigned char uchar;
  16
  17/* What character marks an inverted character class? */
  18#define NEGATE_CLASS    '!'
  19#define NEGATE_CLASS2   '^'
  20
  21#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
  22                                    && *(class) == *(litmatch) \
  23                                    && strncmp((char*)class, litmatch, len) == 0)
  24
  25#if defined STDC_HEADERS || !defined isascii
  26# define ISASCII(c) 1
  27#else
  28# define ISASCII(c) isascii(c)
  29#endif
  30
  31#ifdef isblank
  32# define ISBLANK(c) (ISASCII(c) && isblank(c))
  33#else
  34# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
  35#endif
  36
  37#ifdef isgraph
  38# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
  39#else
  40# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
  41#endif
  42
  43#define ISPRINT(c) (ISASCII(c) && isprint(c))
  44#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
  45#define ISALNUM(c) (ISASCII(c) && isalnum(c))
  46#define ISALPHA(c) (ISASCII(c) && isalpha(c))
  47#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
  48#define ISLOWER(c) (ISASCII(c) && islower(c))
  49#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
  50#define ISSPACE(c) (ISASCII(c) && isspace(c))
  51#define ISUPPER(c) (ISASCII(c) && isupper(c))
  52#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
  53
  54/* Match pattern "p" against "text" */
  55static int dowild(const uchar *p, const uchar *text, unsigned int flags)
  56{
  57        uchar p_ch;
  58        const uchar *pattern = p;
  59
  60        for ( ; (p_ch = *p) != '\0'; text++, p++) {
  61                int matched, match_slash, negated;
  62                uchar t_ch, prev_ch;
  63                if ((t_ch = *text) == '\0' && p_ch != '*')
  64                        return WM_ABORT_ALL;
  65                if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
  66                        t_ch = tolower(t_ch);
  67                if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
  68                        p_ch = tolower(p_ch);
  69                switch (p_ch) {
  70                case '\\':
  71                        /* Literal match with following character.  Note that the test
  72                         * in "default" handles the p[1] == '\0' failure case. */
  73                        p_ch = *++p;
  74                        /* FALLTHROUGH */
  75                default:
  76                        if (t_ch != p_ch)
  77                                return WM_NOMATCH;
  78                        continue;
  79                case '?':
  80                        /* Match anything but '/'. */
  81                        if ((flags & WM_PATHNAME) && t_ch == '/')
  82                                return WM_NOMATCH;
  83                        continue;
  84                case '*':
  85                        if (*++p == '*') {
  86                                const uchar *prev_p = p - 2;
  87                                while (*++p == '*') {}
  88                                if (!(flags & WM_PATHNAME))
  89                                        /* without WM_PATHNAME, '*' == '**' */
  90                                        match_slash = 1;
  91                                else if ((prev_p < pattern || *prev_p == '/') &&
  92                                    (*p == '\0' || *p == '/' ||
  93                                     (p[0] == '\\' && p[1] == '/'))) {
  94                                        /*
  95                                         * Assuming we already match 'foo/' and are at
  96                                         * <star star slash>, just assume it matches
  97                                         * nothing and go ahead match the rest of the
  98                                         * pattern with the remaining string. This
  99                                         * helps make foo/<*><*>/bar (<> because
 100                                         * otherwise it breaks C comment syntax) match
 101                                         * both foo/bar and foo/a/bar.
 102                                         */
 103                                        if (p[0] == '/' &&
 104                                            dowild(p + 1, text, flags) == WM_MATCH)
 105                                                return WM_MATCH;
 106                                        match_slash = 1;
 107                                } else
 108                                        return WM_ABORT_MALFORMED;
 109                        } else
 110                                /* without WM_PATHNAME, '*' == '**' */
 111                                match_slash = flags & WM_PATHNAME ? 0 : 1;
 112                        if (*p == '\0') {
 113                                /* Trailing "**" matches everything.  Trailing "*" matches
 114                                 * only if there are no more slash characters. */
 115                                if (!match_slash) {
 116                                        if (strchr((char*)text, '/') != NULL)
 117                                                return WM_NOMATCH;
 118                                }
 119                                return WM_MATCH;
 120                        } else if (!match_slash && *p == '/') {
 121                                /*
 122                                 * _one_ asterisk followed by a slash
 123                                 * with WM_PATHNAME matches the next
 124                                 * directory
 125                                 */
 126                                const char *slash = strchr((char*)text, '/');
 127                                if (!slash)
 128                                        return WM_NOMATCH;
 129                                text = (const uchar*)slash;
 130                                /* the slash is consumed by the top-level for loop */
 131                                break;
 132                        }
 133                        while (1) {
 134                                if (t_ch == '\0')
 135                                        break;
 136                                /*
 137                                 * Try to advance faster when an asterisk is
 138                                 * followed by a literal. We know in this case
 139                                 * that the string before the literal
 140                                 * must belong to "*".
 141                                 * If match_slash is false, do not look past
 142                                 * the first slash as it cannot belong to '*'.
 143                                 */
 144                                if (!is_glob_special(*p)) {
 145                                        p_ch = *p;
 146                                        if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
 147                                                p_ch = tolower(p_ch);
 148                                        while ((t_ch = *text) != '\0' &&
 149                                               (match_slash || t_ch != '/')) {
 150                                                if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
 151                                                        t_ch = tolower(t_ch);
 152                                                if (t_ch == p_ch)
 153                                                        break;
 154                                                text++;
 155                                        }
 156                                        if (t_ch != p_ch)
 157                                                return WM_NOMATCH;
 158                                }
 159                                if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
 160                                        if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
 161                                                return matched;
 162                                } else if (!match_slash && t_ch == '/')
 163                                        return WM_ABORT_TO_STARSTAR;
 164                                t_ch = *++text;
 165                        }
 166                        return WM_ABORT_ALL;
 167                case '[':
 168                        p_ch = *++p;
 169#ifdef NEGATE_CLASS2
 170                        if (p_ch == NEGATE_CLASS2)
 171                                p_ch = NEGATE_CLASS;
 172#endif
 173                        /* Assign literal 1/0 because of "matched" comparison. */
 174                        negated = p_ch == NEGATE_CLASS ? 1 : 0;
 175                        if (negated) {
 176                                /* Inverted character class. */
 177                                p_ch = *++p;
 178                        }
 179                        prev_ch = 0;
 180                        matched = 0;
 181                        do {
 182                                if (!p_ch)
 183                                        return WM_ABORT_ALL;
 184                                if (p_ch == '\\') {
 185                                        p_ch = *++p;
 186                                        if (!p_ch)
 187                                                return WM_ABORT_ALL;
 188                                        if (t_ch == p_ch)
 189                                                matched = 1;
 190                                } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
 191                                        p_ch = *++p;
 192                                        if (p_ch == '\\') {
 193                                                p_ch = *++p;
 194                                                if (!p_ch)
 195                                                        return WM_ABORT_ALL;
 196                                        }
 197                                        if (t_ch <= p_ch && t_ch >= prev_ch)
 198                                                matched = 1;
 199                                        else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
 200                                                uchar t_ch_upper = toupper(t_ch);
 201                                                if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
 202                                                        matched = 1;
 203                                        }
 204                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 205                                } else if (p_ch == '[' && p[1] == ':') {
 206                                        const uchar *s;
 207                                        int i;
 208                                        for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
 209                                        if (!p_ch)
 210                                                return WM_ABORT_ALL;
 211                                        i = p - s - 1;
 212                                        if (i < 0 || p[-1] != ':') {
 213                                                /* Didn't find ":]", so treat like a normal set. */
 214                                                p = s - 2;
 215                                                p_ch = '[';
 216                                                if (t_ch == p_ch)
 217                                                        matched = 1;
 218                                                continue;
 219                                        }
 220                                        if (CC_EQ(s,i, "alnum")) {
 221                                                if (ISALNUM(t_ch))
 222                                                        matched = 1;
 223                                        } else if (CC_EQ(s,i, "alpha")) {
 224                                                if (ISALPHA(t_ch))
 225                                                        matched = 1;
 226                                        } else if (CC_EQ(s,i, "blank")) {
 227                                                if (ISBLANK(t_ch))
 228                                                        matched = 1;
 229                                        } else if (CC_EQ(s,i, "cntrl")) {
 230                                                if (ISCNTRL(t_ch))
 231                                                        matched = 1;
 232                                        } else if (CC_EQ(s,i, "digit")) {
 233                                                if (ISDIGIT(t_ch))
 234                                                        matched = 1;
 235                                        } else if (CC_EQ(s,i, "graph")) {
 236                                                if (ISGRAPH(t_ch))
 237                                                        matched = 1;
 238                                        } else if (CC_EQ(s,i, "lower")) {
 239                                                if (ISLOWER(t_ch))
 240                                                        matched = 1;
 241                                        } else if (CC_EQ(s,i, "print")) {
 242                                                if (ISPRINT(t_ch))
 243                                                        matched = 1;
 244                                        } else if (CC_EQ(s,i, "punct")) {
 245                                                if (ISPUNCT(t_ch))
 246                                                        matched = 1;
 247                                        } else if (CC_EQ(s,i, "space")) {
 248                                                if (ISSPACE(t_ch))
 249                                                        matched = 1;
 250                                        } else if (CC_EQ(s,i, "upper")) {
 251                                                if (ISUPPER(t_ch))
 252                                                        matched = 1;
 253                                                else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
 254                                                        matched = 1;
 255                                        } else if (CC_EQ(s,i, "xdigit")) {
 256                                                if (ISXDIGIT(t_ch))
 257                                                        matched = 1;
 258                                        } else /* malformed [:class:] string */
 259                                                return WM_ABORT_ALL;
 260                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 261                                } else if (t_ch == p_ch)
 262                                        matched = 1;
 263                        } while (prev_ch = p_ch, (p_ch = *++p) != ']');
 264                        if (matched == negated ||
 265                            ((flags & WM_PATHNAME) && t_ch == '/'))
 266                                return WM_NOMATCH;
 267                        continue;
 268                }
 269        }
 270
 271        return *text ? WM_NOMATCH : WM_MATCH;
 272}
 273
 274/* Match the "pattern" against the "text" string. */
 275int wildmatch(const char *pattern, const char *text, unsigned int flags)
 276{
 277        return dowild((const uchar*)pattern, (const uchar*)text, flags);
 278}