pathspec.con commit ref-filter.c: find disjoint pattern prefixes (b31e268)
   1#include "cache.h"
   2#include "config.h"
   3#include "dir.h"
   4#include "pathspec.h"
   5#include "attr.h"
   6
   7/*
   8 * Finds which of the given pathspecs match items in the index.
   9 *
  10 * For each pathspec, sets the corresponding entry in the seen[] array
  11 * (which should be specs items long, i.e. the same size as pathspec)
  12 * to the nature of the "closest" (i.e. most specific) match found for
  13 * that pathspec in the index, if it was a closer type of match than
  14 * the existing entry.  As an optimization, matching is skipped
  15 * altogether if seen[] already only contains non-zero entries.
  16 *
  17 * If seen[] has not already been written to, it may make sense
  18 * to use find_pathspecs_matching_against_index() instead.
  19 */
  20void add_pathspec_matches_against_index(const struct pathspec *pathspec,
  21                                        const struct index_state *istate,
  22                                        char *seen)
  23{
  24        int num_unmatched = 0, i;
  25
  26        /*
  27         * Since we are walking the index as if we were walking the directory,
  28         * we have to mark the matched pathspec as seen; otherwise we will
  29         * mistakenly think that the user gave a pathspec that did not match
  30         * anything.
  31         */
  32        for (i = 0; i < pathspec->nr; i++)
  33                if (!seen[i])
  34                        num_unmatched++;
  35        if (!num_unmatched)
  36                return;
  37        for (i = 0; i < istate->cache_nr; i++) {
  38                const struct cache_entry *ce = istate->cache[i];
  39                ce_path_match(istate, ce, pathspec, seen);
  40        }
  41}
  42
  43/*
  44 * Finds which of the given pathspecs match items in the index.
  45 *
  46 * This is a one-shot wrapper around add_pathspec_matches_against_index()
  47 * which allocates, populates, and returns a seen[] array indicating the
  48 * nature of the "closest" (i.e. most specific) matches which each of the
  49 * given pathspecs achieves against all items in the index.
  50 */
  51char *find_pathspecs_matching_against_index(const struct pathspec *pathspec,
  52                                            const struct index_state *istate)
  53{
  54        char *seen = xcalloc(pathspec->nr, 1);
  55        add_pathspec_matches_against_index(pathspec, istate, seen);
  56        return seen;
  57}
  58
  59/*
  60 * Magic pathspec
  61 *
  62 * Possible future magic semantics include stuff like:
  63 *
  64 *      { PATHSPEC_RECURSIVE, '*', "recursive" },
  65 *      { PATHSPEC_REGEXP, '\0', "regexp" },
  66 *
  67 */
  68
  69static struct pathspec_magic {
  70        unsigned bit;
  71        char mnemonic; /* this cannot be ':'! */
  72        const char *name;
  73} pathspec_magic[] = {
  74        { PATHSPEC_FROMTOP,  '/', "top" },
  75        { PATHSPEC_LITERAL, '\0', "literal" },
  76        { PATHSPEC_GLOB,    '\0', "glob" },
  77        { PATHSPEC_ICASE,   '\0', "icase" },
  78        { PATHSPEC_EXCLUDE,  '!', "exclude" },
  79        { PATHSPEC_ATTR,    '\0', "attr" },
  80};
  81
  82static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
  83{
  84        int i;
  85        strbuf_addstr(sb, ":(");
  86        for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
  87                if (magic & pathspec_magic[i].bit) {
  88                        if (sb->buf[sb->len - 1] != '(')
  89                                strbuf_addch(sb, ',');
  90                        strbuf_addstr(sb, pathspec_magic[i].name);
  91                }
  92        strbuf_addf(sb, ",prefix:%d)", prefixlen);
  93}
  94
  95static size_t strcspn_escaped(const char *s, const char *stop)
  96{
  97        const char *i;
  98
  99        for (i = s; *i; i++) {
 100                /* skip the escaped character */
 101                if (i[0] == '\\' && i[1]) {
 102                        i++;
 103                        continue;
 104                }
 105
 106                if (strchr(stop, *i))
 107                        break;
 108        }
 109        return i - s;
 110}
 111
 112static inline int invalid_value_char(const char ch)
 113{
 114        if (isalnum(ch) || strchr(",-_", ch))
 115                return 0;
 116        return -1;
 117}
 118
 119static char *attr_value_unescape(const char *value)
 120{
 121        const char *src;
 122        char *dst, *ret;
 123
 124        ret = xmallocz(strlen(value));
 125        for (src = value, dst = ret; *src; src++, dst++) {
 126                if (*src == '\\') {
 127                        if (!src[1])
 128                                die(_("Escape character '\\' not allowed as "
 129                                      "last character in attr value"));
 130                        src++;
 131                }
 132                if (invalid_value_char(*src))
 133                        die("cannot use '%c' for value matching", *src);
 134                *dst = *src;
 135        }
 136        *dst = '\0';
 137        return ret;
 138}
 139
 140static void parse_pathspec_attr_match(struct pathspec_item *item, const char *value)
 141{
 142        struct string_list_item *si;
 143        struct string_list list = STRING_LIST_INIT_DUP;
 144
 145        if (item->attr_check || item->attr_match)
 146                die(_("Only one 'attr:' specification is allowed."));
 147
 148        if (!value || !*value)
 149                die(_("attr spec must not be empty"));
 150
 151        string_list_split(&list, value, ' ', -1);
 152        string_list_remove_empty_items(&list, 0);
 153
 154        item->attr_check = attr_check_alloc();
 155        item->attr_match = xcalloc(list.nr, sizeof(struct attr_match));
 156
 157        for_each_string_list_item(si, &list) {
 158                size_t attr_len;
 159                char *attr_name;
 160                const struct git_attr *a;
 161
 162                int j = item->attr_match_nr++;
 163                const char *attr = si->string;
 164                struct attr_match *am = &item->attr_match[j];
 165
 166                switch (*attr) {
 167                case '!':
 168                        am->match_mode = MATCH_UNSPECIFIED;
 169                        attr++;
 170                        attr_len = strlen(attr);
 171                        break;
 172                case '-':
 173                        am->match_mode = MATCH_UNSET;
 174                        attr++;
 175                        attr_len = strlen(attr);
 176                        break;
 177                default:
 178                        attr_len = strcspn(attr, "=");
 179                        if (attr[attr_len] != '=')
 180                                am->match_mode = MATCH_SET;
 181                        else {
 182                                const char *v = &attr[attr_len + 1];
 183                                am->match_mode = MATCH_VALUE;
 184                                am->value = attr_value_unescape(v);
 185                        }
 186                        break;
 187                }
 188
 189                attr_name = xmemdupz(attr, attr_len);
 190                a = git_attr(attr_name);
 191                if (!a)
 192                        die(_("invalid attribute name %s"), attr_name);
 193
 194                attr_check_append(item->attr_check, a);
 195
 196                free(attr_name);
 197        }
 198
 199        if (item->attr_check->nr != item->attr_match_nr)
 200                BUG("should have same number of entries");
 201
 202        string_list_clear(&list, 0);
 203}
 204
 205static inline int get_literal_global(void)
 206{
 207        static int literal = -1;
 208
 209        if (literal < 0)
 210                literal = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
 211
 212        return literal;
 213}
 214
 215static inline int get_glob_global(void)
 216{
 217        static int glob = -1;
 218
 219        if (glob < 0)
 220                glob = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
 221
 222        return glob;
 223}
 224
 225static inline int get_noglob_global(void)
 226{
 227        static int noglob = -1;
 228
 229        if (noglob < 0)
 230                noglob = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0);
 231
 232        return noglob;
 233}
 234
 235static inline int get_icase_global(void)
 236{
 237        static int icase = -1;
 238
 239        if (icase < 0)
 240                icase = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
 241
 242        return icase;
 243}
 244
 245static int get_global_magic(int element_magic)
 246{
 247        int global_magic = 0;
 248
 249        if (get_literal_global())
 250                global_magic |= PATHSPEC_LITERAL;
 251
 252        /* --glob-pathspec is overridden by :(literal) */
 253        if (get_glob_global() && !(element_magic & PATHSPEC_LITERAL))
 254                global_magic |= PATHSPEC_GLOB;
 255
 256        if (get_glob_global() && get_noglob_global())
 257                die(_("global 'glob' and 'noglob' pathspec settings are incompatible"));
 258
 259        if (get_icase_global())
 260                global_magic |= PATHSPEC_ICASE;
 261
 262        if ((global_magic & PATHSPEC_LITERAL) &&
 263            (global_magic & ~PATHSPEC_LITERAL))
 264                die(_("global 'literal' pathspec setting is incompatible "
 265                      "with all other global pathspec settings"));
 266
 267        /* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
 268        if (get_noglob_global() && !(element_magic & PATHSPEC_GLOB))
 269                global_magic |= PATHSPEC_LITERAL;
 270
 271        return global_magic;
 272}
 273
 274/*
 275 * Parse the pathspec element looking for long magic
 276 *
 277 * saves all magic in 'magic'
 278 * if prefix magic is used, save the prefix length in 'prefix_len'
 279 * returns the position in 'elem' after all magic has been parsed
 280 */
 281static const char *parse_long_magic(unsigned *magic, int *prefix_len,
 282                                    struct pathspec_item *item,
 283                                    const char *elem)
 284{
 285        const char *pos;
 286        const char *nextat;
 287
 288        for (pos = elem + 2; *pos && *pos != ')'; pos = nextat) {
 289                size_t len = strcspn_escaped(pos, ",)");
 290                int i;
 291
 292                if (pos[len] == ',')
 293                        nextat = pos + len + 1; /* handle ',' */
 294                else
 295                        nextat = pos + len; /* handle ')' and '\0' */
 296
 297                if (!len)
 298                        continue;
 299
 300                if (starts_with(pos, "prefix:")) {
 301                        char *endptr;
 302                        *prefix_len = strtol(pos + 7, &endptr, 10);
 303                        if (endptr - pos != len)
 304                                die(_("invalid parameter for pathspec magic 'prefix'"));
 305                        continue;
 306                }
 307
 308                if (starts_with(pos, "attr:")) {
 309                        char *attr_body = xmemdupz(pos + 5, len - 5);
 310                        parse_pathspec_attr_match(item, attr_body);
 311                        *magic |= PATHSPEC_ATTR;
 312                        free(attr_body);
 313                        continue;
 314                }
 315
 316                for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 317                        if (strlen(pathspec_magic[i].name) == len &&
 318                            !strncmp(pathspec_magic[i].name, pos, len)) {
 319                                *magic |= pathspec_magic[i].bit;
 320                                break;
 321                        }
 322                }
 323
 324                if (ARRAY_SIZE(pathspec_magic) <= i)
 325                        die(_("Invalid pathspec magic '%.*s' in '%s'"),
 326                            (int) len, pos, elem);
 327        }
 328
 329        if (*pos != ')')
 330                die(_("Missing ')' at the end of pathspec magic in '%s'"),
 331                    elem);
 332        pos++;
 333
 334        return pos;
 335}
 336
 337/*
 338 * Parse the pathspec element looking for short magic
 339 *
 340 * saves all magic in 'magic'
 341 * returns the position in 'elem' after all magic has been parsed
 342 */
 343static const char *parse_short_magic(unsigned *magic, const char *elem)
 344{
 345        const char *pos;
 346
 347        for (pos = elem + 1; *pos && *pos != ':'; pos++) {
 348                char ch = *pos;
 349                int i;
 350
 351                /* Special case alias for '!' */
 352                if (ch == '^') {
 353                        *magic |= PATHSPEC_EXCLUDE;
 354                        continue;
 355                }
 356
 357                if (!is_pathspec_magic(ch))
 358                        break;
 359
 360                for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 361                        if (pathspec_magic[i].mnemonic == ch) {
 362                                *magic |= pathspec_magic[i].bit;
 363                                break;
 364                        }
 365                }
 366
 367                if (ARRAY_SIZE(pathspec_magic) <= i)
 368                        die(_("Unimplemented pathspec magic '%c' in '%s'"),
 369                            ch, elem);
 370        }
 371
 372        if (*pos == ':')
 373                pos++;
 374
 375        return pos;
 376}
 377
 378static const char *parse_element_magic(unsigned *magic, int *prefix_len,
 379                                       struct pathspec_item *item,
 380                                       const char *elem)
 381{
 382        if (elem[0] != ':' || get_literal_global())
 383                return elem; /* nothing to do */
 384        else if (elem[1] == '(')
 385                /* longhand */
 386                return parse_long_magic(magic, prefix_len, item, elem);
 387        else
 388                /* shorthand */
 389                return parse_short_magic(magic, elem);
 390}
 391
 392/*
 393 * Perform the initialization of a pathspec_item based on a pathspec element.
 394 */
 395static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
 396                               const char *prefix, int prefixlen,
 397                               const char *elt)
 398{
 399        unsigned magic = 0, element_magic = 0;
 400        const char *copyfrom = elt;
 401        char *match;
 402        int pathspec_prefix = -1;
 403
 404        item->attr_check = NULL;
 405        item->attr_match = NULL;
 406        item->attr_match_nr = 0;
 407
 408        /* PATHSPEC_LITERAL_PATH ignores magic */
 409        if (flags & PATHSPEC_LITERAL_PATH) {
 410                magic = PATHSPEC_LITERAL;
 411        } else {
 412                copyfrom = parse_element_magic(&element_magic,
 413                                               &pathspec_prefix,
 414                                               item,
 415                                               elt);
 416                magic |= element_magic;
 417                magic |= get_global_magic(element_magic);
 418        }
 419
 420        item->magic = magic;
 421
 422        if (pathspec_prefix >= 0 &&
 423            (prefixlen || (prefix && *prefix)))
 424                BUG("'prefix' magic is supposed to be used at worktree's root");
 425
 426        if ((magic & PATHSPEC_LITERAL) && (magic & PATHSPEC_GLOB))
 427                die(_("%s: 'literal' and 'glob' are incompatible"), elt);
 428
 429        /* Create match string which will be used for pathspec matching */
 430        if (pathspec_prefix >= 0) {
 431                match = xstrdup(copyfrom);
 432                prefixlen = pathspec_prefix;
 433        } else if (magic & PATHSPEC_FROMTOP) {
 434                match = xstrdup(copyfrom);
 435                prefixlen = 0;
 436        } else {
 437                match = prefix_path_gently(prefix, prefixlen,
 438                                           &prefixlen, copyfrom);
 439                if (!match)
 440                        die(_("%s: '%s' is outside repository"), elt, copyfrom);
 441        }
 442
 443        item->match = match;
 444        item->len = strlen(item->match);
 445        item->prefix = prefixlen;
 446
 447        /*
 448         * Prefix the pathspec (keep all magic) and assign to
 449         * original. Useful for passing to another command.
 450         */
 451        if ((flags & PATHSPEC_PREFIX_ORIGIN) &&
 452            !get_literal_global()) {
 453                struct strbuf sb = STRBUF_INIT;
 454
 455                /* Preserve the actual prefix length of each pattern */
 456                prefix_magic(&sb, prefixlen, element_magic);
 457
 458                strbuf_addstr(&sb, match);
 459                item->original = strbuf_detach(&sb, NULL);
 460        } else {
 461                item->original = xstrdup(elt);
 462        }
 463
 464        if (magic & PATHSPEC_LITERAL) {
 465                item->nowildcard_len = item->len;
 466        } else {
 467                item->nowildcard_len = simple_length(item->match);
 468                if (item->nowildcard_len < prefixlen)
 469                        item->nowildcard_len = prefixlen;
 470        }
 471
 472        item->flags = 0;
 473        if (magic & PATHSPEC_GLOB) {
 474                /*
 475                 * FIXME: should we enable ONESTAR in _GLOB for
 476                 * pattern "* * / * . c"?
 477                 */
 478        } else {
 479                if (item->nowildcard_len < item->len &&
 480                    item->match[item->nowildcard_len] == '*' &&
 481                    no_wildcard(item->match + item->nowildcard_len + 1))
 482                        item->flags |= PATHSPEC_ONESTAR;
 483        }
 484
 485        /* sanity checks, pathspec matchers assume these are sane */
 486        if (item->nowildcard_len > item->len ||
 487            item->prefix         > item->len) {
 488                BUG("error initializing pathspec_item");
 489        }
 490}
 491
 492static int pathspec_item_cmp(const void *a_, const void *b_)
 493{
 494        struct pathspec_item *a, *b;
 495
 496        a = (struct pathspec_item *)a_;
 497        b = (struct pathspec_item *)b_;
 498        return strcmp(a->match, b->match);
 499}
 500
 501static void NORETURN unsupported_magic(const char *pattern,
 502                                       unsigned magic)
 503{
 504        struct strbuf sb = STRBUF_INIT;
 505        int i;
 506        for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 507                const struct pathspec_magic *m = pathspec_magic + i;
 508                if (!(magic & m->bit))
 509                        continue;
 510                if (sb.len)
 511                        strbuf_addstr(&sb, ", ");
 512
 513                if (m->mnemonic)
 514                        strbuf_addf(&sb, _("'%s' (mnemonic: '%c')"),
 515                                    m->name, m->mnemonic);
 516                else
 517                        strbuf_addf(&sb, "'%s'", m->name);
 518        }
 519        /*
 520         * We may want to substitute "this command" with a command
 521         * name. E.g. when add--interactive dies when running
 522         * "checkout -p"
 523         */
 524        die(_("%s: pathspec magic not supported by this command: %s"),
 525            pattern, sb.buf);
 526}
 527
 528void parse_pathspec(struct pathspec *pathspec,
 529                    unsigned magic_mask, unsigned flags,
 530                    const char *prefix, const char **argv)
 531{
 532        struct pathspec_item *item;
 533        const char *entry = argv ? *argv : NULL;
 534        int i, n, prefixlen, nr_exclude = 0;
 535
 536        memset(pathspec, 0, sizeof(*pathspec));
 537
 538        if (flags & PATHSPEC_MAXDEPTH_VALID)
 539                pathspec->magic |= PATHSPEC_MAXDEPTH;
 540
 541        /* No arguments, no prefix -> no pathspec */
 542        if (!entry && !prefix)
 543                return;
 544
 545        if ((flags & PATHSPEC_PREFER_CWD) &&
 546            (flags & PATHSPEC_PREFER_FULL))
 547                BUG("PATHSPEC_PREFER_CWD and PATHSPEC_PREFER_FULL are incompatible");
 548
 549        /* No arguments with prefix -> prefix pathspec */
 550        if (!entry) {
 551                if (flags & PATHSPEC_PREFER_FULL)
 552                        return;
 553
 554                if (!(flags & PATHSPEC_PREFER_CWD))
 555                        BUG("PATHSPEC_PREFER_CWD requires arguments");
 556
 557                pathspec->items = item = xcalloc(1, sizeof(*item));
 558                item->match = xstrdup(prefix);
 559                item->original = xstrdup(prefix);
 560                item->nowildcard_len = item->len = strlen(prefix);
 561                item->prefix = item->len;
 562                pathspec->nr = 1;
 563                return;
 564        }
 565
 566        n = 0;
 567        while (argv[n]) {
 568                if (*argv[n] == '\0')
 569                        die("empty string is not a valid pathspec. "
 570                                  "please use . instead if you meant to match all paths");
 571                n++;
 572        }
 573
 574        pathspec->nr = n;
 575        ALLOC_ARRAY(pathspec->items, n + 1);
 576        item = pathspec->items;
 577        prefixlen = prefix ? strlen(prefix) : 0;
 578
 579        for (i = 0; i < n; i++) {
 580                entry = argv[i];
 581
 582                init_pathspec_item(item + i, flags, prefix, prefixlen, entry);
 583
 584                if (item[i].magic & PATHSPEC_EXCLUDE)
 585                        nr_exclude++;
 586                if (item[i].magic & magic_mask)
 587                        unsupported_magic(entry, item[i].magic & magic_mask);
 588
 589                if ((flags & PATHSPEC_SYMLINK_LEADING_PATH) &&
 590                    has_symlink_leading_path(item[i].match, item[i].len)) {
 591                        die(_("pathspec '%s' is beyond a symbolic link"), entry);
 592                }
 593
 594                if (item[i].nowildcard_len < item[i].len)
 595                        pathspec->has_wildcard = 1;
 596                pathspec->magic |= item[i].magic;
 597        }
 598
 599        /*
 600         * If everything is an exclude pattern, add one positive pattern
 601         * that matches everything. We allocated an extra one for this.
 602         */
 603        if (nr_exclude == n) {
 604                int plen = (!(flags & PATHSPEC_PREFER_CWD)) ? 0 : prefixlen;
 605                init_pathspec_item(item + n, 0, prefix, plen, "");
 606                pathspec->nr++;
 607        }
 608
 609        if (pathspec->magic & PATHSPEC_MAXDEPTH) {
 610                if (flags & PATHSPEC_KEEP_ORDER)
 611                        BUG("PATHSPEC_MAXDEPTH_VALID and PATHSPEC_KEEP_ORDER are incompatible");
 612                QSORT(pathspec->items, pathspec->nr, pathspec_item_cmp);
 613        }
 614}
 615
 616void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 617{
 618        int i, j;
 619
 620        *dst = *src;
 621        ALLOC_ARRAY(dst->items, dst->nr);
 622        COPY_ARRAY(dst->items, src->items, dst->nr);
 623
 624        for (i = 0; i < dst->nr; i++) {
 625                struct pathspec_item *d = &dst->items[i];
 626                struct pathspec_item *s = &src->items[i];
 627
 628                d->match = xstrdup(s->match);
 629                d->original = xstrdup(s->original);
 630
 631                ALLOC_ARRAY(d->attr_match, d->attr_match_nr);
 632                COPY_ARRAY(d->attr_match, s->attr_match, d->attr_match_nr);
 633                for (j = 0; j < d->attr_match_nr; j++) {
 634                        const char *value = s->attr_match[j].value;
 635                        d->attr_match[j].value = xstrdup_or_null(value);
 636                }
 637
 638                d->attr_check = attr_check_dup(s->attr_check);
 639        }
 640}
 641
 642void clear_pathspec(struct pathspec *pathspec)
 643{
 644        int i, j;
 645
 646        for (i = 0; i < pathspec->nr; i++) {
 647                free(pathspec->items[i].match);
 648                free(pathspec->items[i].original);
 649
 650                for (j = 0; j < pathspec->items[i].attr_match_nr; j++)
 651                        free(pathspec->items[i].attr_match[j].value);
 652                free(pathspec->items[i].attr_match);
 653
 654                if (pathspec->items[i].attr_check)
 655                        attr_check_free(pathspec->items[i].attr_check);
 656        }
 657
 658        FREE_AND_NULL(pathspec->items);
 659        pathspec->nr = 0;
 660}
 661
 662int match_pathspec_attrs(const struct index_state *istate,
 663                         const char *name, int namelen,
 664                         const struct pathspec_item *item)
 665{
 666        int i;
 667        char *to_free = NULL;
 668
 669        if (name[namelen])
 670                name = to_free = xmemdupz(name, namelen);
 671
 672        git_check_attr(istate, name, item->attr_check);
 673
 674        free(to_free);
 675
 676        for (i = 0; i < item->attr_match_nr; i++) {
 677                const char *value;
 678                int matched;
 679                enum attr_match_mode match_mode;
 680
 681                value = item->attr_check->items[i].value;
 682                match_mode = item->attr_match[i].match_mode;
 683
 684                if (ATTR_TRUE(value))
 685                        matched = (match_mode == MATCH_SET);
 686                else if (ATTR_FALSE(value))
 687                        matched = (match_mode == MATCH_UNSET);
 688                else if (ATTR_UNSET(value))
 689                        matched = (match_mode == MATCH_UNSPECIFIED);
 690                else
 691                        matched = (match_mode == MATCH_VALUE &&
 692                                   !strcmp(item->attr_match[i].value, value));
 693                if (!matched)
 694                        return 0;
 695        }
 696
 697        return 1;
 698}