wildmatch.con commit CodingGuidelines: add Python coding guidelines (9ef43dd)
   1/*
   2**  Do shell-style pattern matching for ?, \, [], and * characters.
   3**  It is 8bit clean.
   4**
   5**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
   6**  Rich $alz is now <rsalz@bbn.com>.
   7**
   8**  Modified by Wayne Davison to special-case '/' matching, to make '**'
   9**  work differently than '*', and to fix the character-class code.
  10*/
  11
  12#include "cache.h"
  13#include "wildmatch.h"
  14
  15typedef unsigned char uchar;
  16
  17/* What character marks an inverted character class? */
  18#define NEGATE_CLASS    '!'
  19#define NEGATE_CLASS2   '^'
  20
  21#define FALSE 0
  22#define TRUE 1
  23
  24#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
  25                                    && *(class) == *(litmatch) \
  26                                    && strncmp((char*)class, litmatch, len) == 0)
  27
  28#if defined STDC_HEADERS || !defined isascii
  29# define ISASCII(c) 1
  30#else
  31# define ISASCII(c) isascii(c)
  32#endif
  33
  34#ifdef isblank
  35# define ISBLANK(c) (ISASCII(c) && isblank(c))
  36#else
  37# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
  38#endif
  39
  40#ifdef isgraph
  41# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
  42#else
  43# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
  44#endif
  45
  46#define ISPRINT(c) (ISASCII(c) && isprint(c))
  47#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
  48#define ISALNUM(c) (ISASCII(c) && isalnum(c))
  49#define ISALPHA(c) (ISASCII(c) && isalpha(c))
  50#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
  51#define ISLOWER(c) (ISASCII(c) && islower(c))
  52#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
  53#define ISSPACE(c) (ISASCII(c) && isspace(c))
  54#define ISUPPER(c) (ISASCII(c) && isupper(c))
  55#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
  56
  57/* Match pattern "p" against "text" */
  58static int dowild(const uchar *p, const uchar *text, int force_lower_case)
  59{
  60        uchar p_ch;
  61        const uchar *pattern = p;
  62
  63        for ( ; (p_ch = *p) != '\0'; text++, p++) {
  64                int matched, match_slash, negated;
  65                uchar t_ch, prev_ch;
  66                if ((t_ch = *text) == '\0' && p_ch != '*')
  67                        return ABORT_ALL;
  68                if (force_lower_case && ISUPPER(t_ch))
  69                        t_ch = tolower(t_ch);
  70                if (force_lower_case && ISUPPER(p_ch))
  71                        p_ch = tolower(p_ch);
  72                switch (p_ch) {
  73                case '\\':
  74                        /* Literal match with following character.  Note that the test
  75                         * in "default" handles the p[1] == '\0' failure case. */
  76                        p_ch = *++p;
  77                        /* FALLTHROUGH */
  78                default:
  79                        if (t_ch != p_ch)
  80                                return NOMATCH;
  81                        continue;
  82                case '?':
  83                        /* Match anything but '/'. */
  84                        if (t_ch == '/')
  85                                return NOMATCH;
  86                        continue;
  87                case '*':
  88                        if (*++p == '*') {
  89                                const uchar *prev_p = p - 2;
  90                                while (*++p == '*') {}
  91                                if ((prev_p < pattern || *prev_p == '/') &&
  92                                    (*p == '\0' || *p == '/' ||
  93                                     (p[0] == '\\' && p[1] == '/'))) {
  94                                        /*
  95                                         * Assuming we already match 'foo/' and are at
  96                                         * <star star slash>, just assume it matches
  97                                         * nothing and go ahead match the rest of the
  98                                         * pattern with the remaining string. This
  99                                         * helps make foo/<*><*>/bar (<> because
 100                                         * otherwise it breaks C comment syntax) match
 101                                         * both foo/bar and foo/a/bar.
 102                                         */
 103                                        if (p[0] == '/' &&
 104                                            dowild(p + 1, text, force_lower_case) == MATCH)
 105                                                return MATCH;
 106                                        match_slash = TRUE;
 107                                } else
 108                                        return ABORT_MALFORMED;
 109                        } else
 110                                match_slash = FALSE;
 111                        if (*p == '\0') {
 112                                /* Trailing "**" matches everything.  Trailing "*" matches
 113                                 * only if there are no more slash characters. */
 114                                if (!match_slash) {
 115                                        if (strchr((char*)text, '/') != NULL)
 116                                                return NOMATCH;
 117                                }
 118                                return MATCH;
 119                        }
 120                        while (1) {
 121                                if (t_ch == '\0')
 122                                        break;
 123                                if ((matched = dowild(p, text,  force_lower_case)) != NOMATCH) {
 124                                        if (!match_slash || matched != ABORT_TO_STARSTAR)
 125                                                return matched;
 126                                } else if (!match_slash && t_ch == '/')
 127                                        return ABORT_TO_STARSTAR;
 128                                t_ch = *++text;
 129                        }
 130                        return ABORT_ALL;
 131                case '[':
 132                        p_ch = *++p;
 133#ifdef NEGATE_CLASS2
 134                        if (p_ch == NEGATE_CLASS2)
 135                                p_ch = NEGATE_CLASS;
 136#endif
 137                        /* Assign literal TRUE/FALSE because of "matched" comparison. */
 138                        negated = p_ch == NEGATE_CLASS? TRUE : FALSE;
 139                        if (negated) {
 140                                /* Inverted character class. */
 141                                p_ch = *++p;
 142                        }
 143                        prev_ch = 0;
 144                        matched = FALSE;
 145                        do {
 146                                if (!p_ch)
 147                                        return ABORT_ALL;
 148                                if (p_ch == '\\') {
 149                                        p_ch = *++p;
 150                                        if (!p_ch)
 151                                                return ABORT_ALL;
 152                                        if (t_ch == p_ch)
 153                                                matched = TRUE;
 154                                } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
 155                                        p_ch = *++p;
 156                                        if (p_ch == '\\') {
 157                                                p_ch = *++p;
 158                                                if (!p_ch)
 159                                                        return ABORT_ALL;
 160                                        }
 161                                        if (t_ch <= p_ch && t_ch >= prev_ch)
 162                                                matched = TRUE;
 163                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 164                                } else if (p_ch == '[' && p[1] == ':') {
 165                                        const uchar *s;
 166                                        int i;
 167                                        for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
 168                                        if (!p_ch)
 169                                                return ABORT_ALL;
 170                                        i = p - s - 1;
 171                                        if (i < 0 || p[-1] != ':') {
 172                                                /* Didn't find ":]", so treat like a normal set. */
 173                                                p = s - 2;
 174                                                p_ch = '[';
 175                                                if (t_ch == p_ch)
 176                                                        matched = TRUE;
 177                                                continue;
 178                                        }
 179                                        if (CC_EQ(s,i, "alnum")) {
 180                                                if (ISALNUM(t_ch))
 181                                                        matched = TRUE;
 182                                        } else if (CC_EQ(s,i, "alpha")) {
 183                                                if (ISALPHA(t_ch))
 184                                                        matched = TRUE;
 185                                        } else if (CC_EQ(s,i, "blank")) {
 186                                                if (ISBLANK(t_ch))
 187                                                        matched = TRUE;
 188                                        } else if (CC_EQ(s,i, "cntrl")) {
 189                                                if (ISCNTRL(t_ch))
 190                                                        matched = TRUE;
 191                                        } else if (CC_EQ(s,i, "digit")) {
 192                                                if (ISDIGIT(t_ch))
 193                                                        matched = TRUE;
 194                                        } else if (CC_EQ(s,i, "graph")) {
 195                                                if (ISGRAPH(t_ch))
 196                                                        matched = TRUE;
 197                                        } else if (CC_EQ(s,i, "lower")) {
 198                                                if (ISLOWER(t_ch))
 199                                                        matched = TRUE;
 200                                        } else if (CC_EQ(s,i, "print")) {
 201                                                if (ISPRINT(t_ch))
 202                                                        matched = TRUE;
 203                                        } else if (CC_EQ(s,i, "punct")) {
 204                                                if (ISPUNCT(t_ch))
 205                                                        matched = TRUE;
 206                                        } else if (CC_EQ(s,i, "space")) {
 207                                                if (ISSPACE(t_ch))
 208                                                        matched = TRUE;
 209                                        } else if (CC_EQ(s,i, "upper")) {
 210                                                if (ISUPPER(t_ch))
 211                                                        matched = TRUE;
 212                                        } else if (CC_EQ(s,i, "xdigit")) {
 213                                                if (ISXDIGIT(t_ch))
 214                                                        matched = TRUE;
 215                                        } else /* malformed [:class:] string */
 216                                                return ABORT_ALL;
 217                                        p_ch = 0; /* This makes "prev_ch" get set to 0. */
 218                                } else if (t_ch == p_ch)
 219                                        matched = TRUE;
 220                        } while (prev_ch = p_ch, (p_ch = *++p) != ']');
 221                        if (matched == negated || t_ch == '/')
 222                                return NOMATCH;
 223                        continue;
 224                }
 225        }
 226
 227        return *text ? NOMATCH : MATCH;
 228}
 229
 230/* Match the "pattern" against the "text" string. */
 231int wildmatch(const char *pattern, const char *text, int flags)
 232{
 233        return dowild((const uchar*)pattern, (const uchar*)text,
 234                      flags & FNM_CASEFOLD ? 1 :0);
 235}