tree-walk.con commit pathspec: support :(literal) syntax for noglob pathspec (5c6933d)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6#include "pathspec.h"
   7
   8static const char *get_mode(const char *str, unsigned int *modep)
   9{
  10        unsigned char c;
  11        unsigned int mode = 0;
  12
  13        if (*str == ' ')
  14                return NULL;
  15
  16        while ((c = *str++) != ' ') {
  17                if (c < '0' || c > '7')
  18                        return NULL;
  19                mode = (mode << 3) + (c - '0');
  20        }
  21        *modep = mode;
  22        return str;
  23}
  24
  25static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  26{
  27        const char *path;
  28        unsigned int mode, len;
  29
  30        if (size < 24 || buf[size - 21])
  31                die("corrupt tree file");
  32
  33        path = get_mode(buf, &mode);
  34        if (!path || !*path)
  35                die("corrupt tree file");
  36        len = strlen(path) + 1;
  37
  38        /* Initialize the descriptor entry */
  39        desc->entry.path = path;
  40        desc->entry.mode = mode;
  41        desc->entry.sha1 = (const unsigned char *)(path + len);
  42}
  43
  44void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  45{
  46        desc->buffer = buffer;
  47        desc->size = size;
  48        if (size)
  49                decode_tree_entry(desc, buffer, size);
  50}
  51
  52void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  53{
  54        unsigned long size = 0;
  55        void *buf = NULL;
  56
  57        if (sha1) {
  58                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  59                if (!buf)
  60                        die("unable to read tree %s", sha1_to_hex(sha1));
  61        }
  62        init_tree_desc(desc, buf, size);
  63        return buf;
  64}
  65
  66static void entry_clear(struct name_entry *a)
  67{
  68        memset(a, 0, sizeof(*a));
  69}
  70
  71static void entry_extract(struct tree_desc *t, struct name_entry *a)
  72{
  73        *a = t->entry;
  74}
  75
  76void update_tree_entry(struct tree_desc *desc)
  77{
  78        const void *buf = desc->buffer;
  79        const unsigned char *end = desc->entry.sha1 + 20;
  80        unsigned long size = desc->size;
  81        unsigned long len = end - (const unsigned char *)buf;
  82
  83        if (size < len)
  84                die("corrupt tree file");
  85        buf = end;
  86        size -= len;
  87        desc->buffer = buf;
  88        desc->size = size;
  89        if (size)
  90                decode_tree_entry(desc, buf, size);
  91}
  92
  93int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  94{
  95        if (!desc->size)
  96                return 0;
  97
  98        *entry = desc->entry;
  99        update_tree_entry(desc);
 100        return 1;
 101}
 102
 103void setup_traverse_info(struct traverse_info *info, const char *base)
 104{
 105        int pathlen = strlen(base);
 106        static struct traverse_info dummy;
 107
 108        memset(info, 0, sizeof(*info));
 109        if (pathlen && base[pathlen-1] == '/')
 110                pathlen--;
 111        info->pathlen = pathlen ? pathlen + 1 : 0;
 112        info->name.path = base;
 113        info->name.sha1 = (void *)(base + pathlen + 1);
 114        if (pathlen)
 115                info->prev = &dummy;
 116}
 117
 118char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 119{
 120        int len = tree_entry_len(n);
 121        int pathlen = info->pathlen;
 122
 123        path[pathlen + len] = 0;
 124        for (;;) {
 125                memcpy(path + pathlen, n->path, len);
 126                if (!pathlen)
 127                        break;
 128                path[--pathlen] = '/';
 129                n = &info->name;
 130                len = tree_entry_len(n);
 131                info = info->prev;
 132                pathlen -= len;
 133        }
 134        return path;
 135}
 136
 137struct tree_desc_skip {
 138        struct tree_desc_skip *prev;
 139        const void *ptr;
 140};
 141
 142struct tree_desc_x {
 143        struct tree_desc d;
 144        struct tree_desc_skip *skip;
 145};
 146
 147static int name_compare(const char *a, int a_len,
 148                        const char *b, int b_len)
 149{
 150        int len = (a_len < b_len) ? a_len : b_len;
 151        int cmp = memcmp(a, b, len);
 152        if (cmp)
 153                return cmp;
 154        return (a_len - b_len);
 155}
 156
 157static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 158{
 159        /*
 160         * The caller wants to pick *a* from a tree or nothing.
 161         * We are looking at *b* in a tree.
 162         *
 163         * (0) If a and b are the same name, we are trivially happy.
 164         *
 165         * There are three possibilities where *a* could be hiding
 166         * behind *b*.
 167         *
 168         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 169         *                                matter what.
 170         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 171         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 172         *
 173         * Otherwise we know *a* won't appear in the tree without
 174         * scanning further.
 175         */
 176
 177        int cmp = name_compare(a, a_len, b, b_len);
 178
 179        /* Most common case first -- reading sync'd trees */
 180        if (!cmp)
 181                return cmp;
 182
 183        if (0 < cmp) {
 184                /* a comes after b; it does not matter if it is case (3)
 185                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 186                        return 1;
 187                */
 188                return 1; /* keep looking */
 189        }
 190
 191        /* b comes after a; are we looking at case (2)? */
 192        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 193                return 1; /* keep looking */
 194
 195        return -1; /* a cannot appear in the tree */
 196}
 197
 198/*
 199 * From the extended tree_desc, extract the first name entry, while
 200 * paying attention to the candidate "first" name.  Most importantly,
 201 * when looking for an entry, if there are entries that sorts earlier
 202 * in the tree object representation than that name, skip them and
 203 * process the named entry first.  We will remember that we haven't
 204 * processed the first entry yet, and in the later call skip the
 205 * entry we processed early when update_extended_entry() is called.
 206 *
 207 * E.g. if the underlying tree object has these entries:
 208 *
 209 *    blob    "t-1"
 210 *    blob    "t-2"
 211 *    tree    "t"
 212 *    blob    "t=1"
 213 *
 214 * and the "first" asks for "t", remember that we still need to
 215 * process "t-1" and "t-2" but extract "t".  After processing the
 216 * entry "t" from this call, the caller will let us know by calling
 217 * update_extended_entry() that we can remember "t" has been processed
 218 * already.
 219 */
 220
 221static void extended_entry_extract(struct tree_desc_x *t,
 222                                   struct name_entry *a,
 223                                   const char *first,
 224                                   int first_len)
 225{
 226        const char *path;
 227        int len;
 228        struct tree_desc probe;
 229        struct tree_desc_skip *skip;
 230
 231        /*
 232         * Extract the first entry from the tree_desc, but skip the
 233         * ones that we already returned in earlier rounds.
 234         */
 235        while (1) {
 236                if (!t->d.size) {
 237                        entry_clear(a);
 238                        break; /* not found */
 239                }
 240                entry_extract(&t->d, a);
 241                for (skip = t->skip; skip; skip = skip->prev)
 242                        if (a->path == skip->ptr)
 243                                break; /* found */
 244                if (!skip)
 245                        break;
 246                /* We have processed this entry already. */
 247                update_tree_entry(&t->d);
 248        }
 249
 250        if (!first || !a->path)
 251                return;
 252
 253        /*
 254         * The caller wants "first" from this tree, or nothing.
 255         */
 256        path = a->path;
 257        len = tree_entry_len(a);
 258        switch (check_entry_match(first, first_len, path, len)) {
 259        case -1:
 260                entry_clear(a);
 261        case 0:
 262                return;
 263        default:
 264                break;
 265        }
 266
 267        /*
 268         * We need to look-ahead -- we suspect that a subtree whose
 269         * name is "first" may be hiding behind the current entry "path".
 270         */
 271        probe = t->d;
 272        while (probe.size) {
 273                entry_extract(&probe, a);
 274                path = a->path;
 275                len = tree_entry_len(a);
 276                switch (check_entry_match(first, first_len, path, len)) {
 277                case -1:
 278                        entry_clear(a);
 279                case 0:
 280                        return;
 281                default:
 282                        update_tree_entry(&probe);
 283                        break;
 284                }
 285                /* keep looking */
 286        }
 287        entry_clear(a);
 288}
 289
 290static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 291{
 292        if (t->d.entry.path == a->path) {
 293                update_tree_entry(&t->d);
 294        } else {
 295                /* we have returned this entry early */
 296                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 297                skip->ptr = a->path;
 298                skip->prev = t->skip;
 299                t->skip = skip;
 300        }
 301}
 302
 303static void free_extended_entry(struct tree_desc_x *t)
 304{
 305        struct tree_desc_skip *p, *s;
 306
 307        for (s = t->skip; s; s = p) {
 308                p = s->prev;
 309                free(s);
 310        }
 311}
 312
 313static inline int prune_traversal(struct name_entry *e,
 314                                  struct traverse_info *info,
 315                                  struct strbuf *base,
 316                                  int still_interesting)
 317{
 318        if (!info->pathspec || still_interesting == 2)
 319                return 2;
 320        if (still_interesting < 0)
 321                return still_interesting;
 322        return tree_entry_interesting(e, base, 0, info->pathspec);
 323}
 324
 325int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 326{
 327        int ret = 0;
 328        int error = 0;
 329        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 330        int i;
 331        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 332        struct strbuf base = STRBUF_INIT;
 333        int interesting = 1;
 334
 335        for (i = 0; i < n; i++)
 336                tx[i].d = t[i];
 337
 338        if (info->prev) {
 339                strbuf_grow(&base, info->pathlen);
 340                make_traverse_path(base.buf, info->prev, &info->name);
 341                base.buf[info->pathlen-1] = '/';
 342                strbuf_setlen(&base, info->pathlen);
 343        }
 344        for (;;) {
 345                unsigned long mask, dirmask;
 346                const char *first = NULL;
 347                int first_len = 0;
 348                struct name_entry *e = NULL;
 349                int len;
 350
 351                for (i = 0; i < n; i++) {
 352                        e = entry + i;
 353                        extended_entry_extract(tx + i, e, NULL, 0);
 354                }
 355
 356                /*
 357                 * A tree may have "t-2" at the current location even
 358                 * though it may have "t" that is a subtree behind it,
 359                 * and another tree may return "t".  We want to grab
 360                 * all "t" from all trees to match in such a case.
 361                 */
 362                for (i = 0; i < n; i++) {
 363                        e = entry + i;
 364                        if (!e->path)
 365                                continue;
 366                        len = tree_entry_len(e);
 367                        if (!first) {
 368                                first = e->path;
 369                                first_len = len;
 370                                continue;
 371                        }
 372                        if (name_compare(e->path, len, first, first_len) < 0) {
 373                                first = e->path;
 374                                first_len = len;
 375                        }
 376                }
 377
 378                if (first) {
 379                        for (i = 0; i < n; i++) {
 380                                e = entry + i;
 381                                extended_entry_extract(tx + i, e, first, first_len);
 382                                /* Cull the ones that are not the earliest */
 383                                if (!e->path)
 384                                        continue;
 385                                len = tree_entry_len(e);
 386                                if (name_compare(e->path, len, first, first_len))
 387                                        entry_clear(e);
 388                        }
 389                }
 390
 391                /* Now we have in entry[i] the earliest name from the trees */
 392                mask = 0;
 393                dirmask = 0;
 394                for (i = 0; i < n; i++) {
 395                        if (!entry[i].path)
 396                                continue;
 397                        mask |= 1ul << i;
 398                        if (S_ISDIR(entry[i].mode))
 399                                dirmask |= 1ul << i;
 400                        e = &entry[i];
 401                }
 402                if (!mask)
 403                        break;
 404                interesting = prune_traversal(e, info, &base, interesting);
 405                if (interesting < 0)
 406                        break;
 407                if (interesting) {
 408                        ret = info->fn(n, mask, dirmask, entry, info);
 409                        if (ret < 0) {
 410                                error = ret;
 411                                if (!info->show_all_errors)
 412                                        break;
 413                        }
 414                        mask &= ret;
 415                }
 416                ret = 0;
 417                for (i = 0; i < n; i++)
 418                        if (mask & (1ul << i))
 419                                update_extended_entry(tx + i, entry + i);
 420        }
 421        free(entry);
 422        for (i = 0; i < n; i++)
 423                free_extended_entry(tx + i);
 424        free(tx);
 425        strbuf_release(&base);
 426        return error;
 427}
 428
 429static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 430{
 431        int namelen = strlen(name);
 432        while (t->size) {
 433                const char *entry;
 434                const unsigned char *sha1;
 435                int entrylen, cmp;
 436
 437                sha1 = tree_entry_extract(t, &entry, mode);
 438                entrylen = tree_entry_len(&t->entry);
 439                update_tree_entry(t);
 440                if (entrylen > namelen)
 441                        continue;
 442                cmp = memcmp(name, entry, entrylen);
 443                if (cmp > 0)
 444                        continue;
 445                if (cmp < 0)
 446                        break;
 447                if (entrylen == namelen) {
 448                        hashcpy(result, sha1);
 449                        return 0;
 450                }
 451                if (name[entrylen] != '/')
 452                        continue;
 453                if (!S_ISDIR(*mode))
 454                        break;
 455                if (++entrylen == namelen) {
 456                        hashcpy(result, sha1);
 457                        return 0;
 458                }
 459                return get_tree_entry(sha1, name + entrylen, result, mode);
 460        }
 461        return -1;
 462}
 463
 464int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 465{
 466        int retval;
 467        void *tree;
 468        unsigned long size;
 469        unsigned char root[20];
 470
 471        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 472        if (!tree)
 473                return -1;
 474
 475        if (name[0] == '\0') {
 476                hashcpy(sha1, root);
 477                free(tree);
 478                return 0;
 479        }
 480
 481        if (!size) {
 482                retval = -1;
 483        } else {
 484                struct tree_desc t;
 485                init_tree_desc(&t, tree, size);
 486                retval = find_tree_entry(&t, name, sha1, mode);
 487        }
 488        free(tree);
 489        return retval;
 490}
 491
 492static int match_entry(const struct name_entry *entry, int pathlen,
 493                       const char *match, int matchlen,
 494                       enum interesting *never_interesting)
 495{
 496        int m = -1; /* signals that we haven't called strncmp() */
 497
 498        if (*never_interesting != entry_not_interesting) {
 499                /*
 500                 * We have not seen any match that sorts later
 501                 * than the current path.
 502                 */
 503
 504                /*
 505                 * Does match sort strictly earlier than path
 506                 * with their common parts?
 507                 */
 508                m = strncmp(match, entry->path,
 509                            (matchlen < pathlen) ? matchlen : pathlen);
 510                if (m < 0)
 511                        return 0;
 512
 513                /*
 514                 * If we come here even once, that means there is at
 515                 * least one pathspec that would sort equal to or
 516                 * later than the path we are currently looking at.
 517                 * In other words, if we have never reached this point
 518                 * after iterating all pathspecs, it means all
 519                 * pathspecs are either outside of base, or inside the
 520                 * base but sorts strictly earlier than the current
 521                 * one.  In either case, they will never match the
 522                 * subsequent entries.  In such a case, we initialized
 523                 * the variable to -1 and that is what will be
 524                 * returned, allowing the caller to terminate early.
 525                 */
 526                *never_interesting = entry_not_interesting;
 527        }
 528
 529        if (pathlen > matchlen)
 530                return 0;
 531
 532        if (matchlen > pathlen) {
 533                if (match[pathlen] != '/')
 534                        return 0;
 535                if (!S_ISDIR(entry->mode))
 536                        return 0;
 537        }
 538
 539        if (m == -1)
 540                /*
 541                 * we cheated and did not do strncmp(), so we do
 542                 * that here.
 543                 */
 544                m = strncmp(match, entry->path, pathlen);
 545
 546        /*
 547         * If common part matched earlier then it is a hit,
 548         * because we rejected the case where path is not a
 549         * leading directory and is shorter than match.
 550         */
 551        if (!m)
 552                return 1;
 553
 554        return 0;
 555}
 556
 557static int match_dir_prefix(const char *base,
 558                            const char *match, int matchlen)
 559{
 560        if (strncmp(base, match, matchlen))
 561                return 0;
 562
 563        /*
 564         * If the base is a subdirectory of a path which
 565         * was specified, all of them are interesting.
 566         */
 567        if (!matchlen ||
 568            base[matchlen] == '/' ||
 569            match[matchlen - 1] == '/')
 570                return 1;
 571
 572        /* Just a random prefix match */
 573        return 0;
 574}
 575
 576/*
 577 * Perform matching on the leading non-wildcard part of
 578 * pathspec. item->nowildcard_len must be greater than zero. Return
 579 * non-zero if base is matched.
 580 */
 581static int match_wildcard_base(const struct pathspec_item *item,
 582                               const char *base, int baselen,
 583                               int *matched)
 584{
 585        const char *match = item->match;
 586        /* the wildcard part is not considered in this function */
 587        int matchlen = item->nowildcard_len;
 588
 589        if (baselen) {
 590                int dirlen;
 591                /*
 592                 * Return early if base is longer than the
 593                 * non-wildcard part but it does not match.
 594                 */
 595                if (baselen >= matchlen) {
 596                        *matched = matchlen;
 597                        return !strncmp(base, match, matchlen);
 598                }
 599
 600                dirlen = matchlen;
 601                while (dirlen && match[dirlen - 1] != '/')
 602                        dirlen--;
 603
 604                /*
 605                 * Return early if base is shorter than the
 606                 * non-wildcard part but it does not match. Note that
 607                 * base ends with '/' so we are sure it really matches
 608                 * directory
 609                 */
 610                if (strncmp(base, match, baselen))
 611                        return 0;
 612                *matched = baselen;
 613        } else
 614                *matched = 0;
 615        /*
 616         * we could have checked entry against the non-wildcard part
 617         * that is not in base and does similar never_interesting
 618         * optimization as in match_entry. For now just be happy with
 619         * base comparison.
 620         */
 621        return entry_interesting;
 622}
 623
 624/*
 625 * Is a tree entry interesting given the pathspec we have?
 626 *
 627 * Pre-condition: either baselen == base_offset (i.e. empty path)
 628 * or base[baselen-1] == '/' (i.e. with trailing slash).
 629 */
 630enum interesting tree_entry_interesting(const struct name_entry *entry,
 631                                        struct strbuf *base, int base_offset,
 632                                        const struct pathspec *ps)
 633{
 634        int i;
 635        int pathlen, baselen = base->len - base_offset;
 636        enum interesting never_interesting = ps->has_wildcard ?
 637                entry_not_interesting : all_entries_not_interesting;
 638
 639        GUARD_PATHSPEC(ps,
 640                       PATHSPEC_FROMTOP |
 641                       PATHSPEC_MAXDEPTH |
 642                       PATHSPEC_LITERAL);
 643
 644        if (!ps->nr) {
 645                if (!ps->recursive ||
 646                    !(ps->magic & PATHSPEC_MAXDEPTH) ||
 647                    ps->max_depth == -1)
 648                        return all_entries_interesting;
 649                return within_depth(base->buf + base_offset, baselen,
 650                                    !!S_ISDIR(entry->mode),
 651                                    ps->max_depth) ?
 652                        entry_interesting : entry_not_interesting;
 653        }
 654
 655        pathlen = tree_entry_len(entry);
 656
 657        for (i = ps->nr - 1; i >= 0; i--) {
 658                const struct pathspec_item *item = ps->items+i;
 659                const char *match = item->match;
 660                const char *base_str = base->buf + base_offset;
 661                int matchlen = item->len, matched = 0;
 662
 663                if (baselen >= matchlen) {
 664                        /* If it doesn't match, move along... */
 665                        if (!match_dir_prefix(base_str, match, matchlen))
 666                                goto match_wildcards;
 667
 668                        if (!ps->recursive ||
 669                            !(ps->magic & PATHSPEC_MAXDEPTH) ||
 670                            ps->max_depth == -1)
 671                                return all_entries_interesting;
 672
 673                        return within_depth(base_str + matchlen + 1,
 674                                            baselen - matchlen - 1,
 675                                            !!S_ISDIR(entry->mode),
 676                                            ps->max_depth) ?
 677                                entry_interesting : entry_not_interesting;
 678                }
 679
 680                /* Either there must be no base, or the base must match. */
 681                if (baselen == 0 || !strncmp(base_str, match, baselen)) {
 682                        if (match_entry(entry, pathlen,
 683                                        match + baselen, matchlen - baselen,
 684                                        &never_interesting))
 685                                return entry_interesting;
 686
 687                        if (item->nowildcard_len < item->len) {
 688                                if (!git_fnmatch(match + baselen, entry->path,
 689                                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 690                                                 item->nowildcard_len - baselen))
 691                                        return entry_interesting;
 692
 693                                /*
 694                                 * Match all directories. We'll try to
 695                                 * match files later on.
 696                                 */
 697                                if (ps->recursive && S_ISDIR(entry->mode))
 698                                        return entry_interesting;
 699                        }
 700
 701                        continue;
 702                }
 703
 704match_wildcards:
 705                if (item->nowildcard_len == item->len)
 706                        continue;
 707
 708                if (item->nowildcard_len &&
 709                    !match_wildcard_base(item, base_str, baselen, &matched))
 710                        return entry_not_interesting;
 711
 712                /*
 713                 * Concatenate base and entry->path into one and do
 714                 * fnmatch() on it.
 715                 *
 716                 * While we could avoid concatenation in certain cases
 717                 * [1], which saves a memcpy and potentially a
 718                 * realloc, it turns out not worth it. Measurement on
 719                 * linux-2.6 does not show any clear improvements,
 720                 * partly because of the nowildcard_len optimization
 721                 * in git_fnmatch(). Avoid micro-optimizations here.
 722                 *
 723                 * [1] if match_wildcard_base() says the base
 724                 * directory is already matched, we only need to match
 725                 * the rest, which is shorter so _in theory_ faster.
 726                 */
 727
 728                strbuf_add(base, entry->path, pathlen);
 729
 730                if (!git_fnmatch(match, base->buf + base_offset,
 731                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 732                                 item->nowildcard_len)) {
 733                        strbuf_setlen(base, base_offset + baselen);
 734                        return entry_interesting;
 735                }
 736                strbuf_setlen(base, base_offset + baselen);
 737
 738                /*
 739                 * Match all directories. We'll try to match files
 740                 * later on.
 741                 * max_depth is ignored but we may consider support it
 742                 * in future, see
 743                 * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
 744                 */
 745                if (ps->recursive && S_ISDIR(entry->mode))
 746                        return entry_interesting;
 747        }
 748        return never_interesting; /* No matches */
 749}