tree-walk.con commit mailinfo: move use_scissors and use_inbody_headers to struct mailinfo (ad57ef9)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6#include "pathspec.h"
   7
   8static const char *get_mode(const char *str, unsigned int *modep)
   9{
  10        unsigned char c;
  11        unsigned int mode = 0;
  12
  13        if (*str == ' ')
  14                return NULL;
  15
  16        while ((c = *str++) != ' ') {
  17                if (c < '0' || c > '7')
  18                        return NULL;
  19                mode = (mode << 3) + (c - '0');
  20        }
  21        *modep = mode;
  22        return str;
  23}
  24
  25static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  26{
  27        const char *path;
  28        unsigned int mode, len;
  29
  30        if (size < 24 || buf[size - 21])
  31                die("corrupt tree file");
  32
  33        path = get_mode(buf, &mode);
  34        if (!path || !*path)
  35                die("corrupt tree file");
  36        len = strlen(path) + 1;
  37
  38        /* Initialize the descriptor entry */
  39        desc->entry.path = path;
  40        desc->entry.mode = canon_mode(mode);
  41        desc->entry.sha1 = (const unsigned char *)(path + len);
  42}
  43
  44void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  45{
  46        desc->buffer = buffer;
  47        desc->size = size;
  48        if (size)
  49                decode_tree_entry(desc, buffer, size);
  50}
  51
  52void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  53{
  54        unsigned long size = 0;
  55        void *buf = NULL;
  56
  57        if (sha1) {
  58                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  59                if (!buf)
  60                        die("unable to read tree %s", sha1_to_hex(sha1));
  61        }
  62        init_tree_desc(desc, buf, size);
  63        return buf;
  64}
  65
  66static void entry_clear(struct name_entry *a)
  67{
  68        memset(a, 0, sizeof(*a));
  69}
  70
  71static void entry_extract(struct tree_desc *t, struct name_entry *a)
  72{
  73        *a = t->entry;
  74}
  75
  76void update_tree_entry(struct tree_desc *desc)
  77{
  78        const void *buf = desc->buffer;
  79        const unsigned char *end = desc->entry.sha1 + 20;
  80        unsigned long size = desc->size;
  81        unsigned long len = end - (const unsigned char *)buf;
  82
  83        if (size < len)
  84                die("corrupt tree file");
  85        buf = end;
  86        size -= len;
  87        desc->buffer = buf;
  88        desc->size = size;
  89        if (size)
  90                decode_tree_entry(desc, buf, size);
  91}
  92
  93int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  94{
  95        if (!desc->size)
  96                return 0;
  97
  98        *entry = desc->entry;
  99        update_tree_entry(desc);
 100        return 1;
 101}
 102
 103void setup_traverse_info(struct traverse_info *info, const char *base)
 104{
 105        int pathlen = strlen(base);
 106        static struct traverse_info dummy;
 107
 108        memset(info, 0, sizeof(*info));
 109        if (pathlen && base[pathlen-1] == '/')
 110                pathlen--;
 111        info->pathlen = pathlen ? pathlen + 1 : 0;
 112        info->name.path = base;
 113        info->name.sha1 = (void *)(base + pathlen + 1);
 114        if (pathlen)
 115                info->prev = &dummy;
 116}
 117
 118char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 119{
 120        int len = tree_entry_len(n);
 121        int pathlen = info->pathlen;
 122
 123        path[pathlen + len] = 0;
 124        for (;;) {
 125                memcpy(path + pathlen, n->path, len);
 126                if (!pathlen)
 127                        break;
 128                path[--pathlen] = '/';
 129                n = &info->name;
 130                len = tree_entry_len(n);
 131                info = info->prev;
 132                pathlen -= len;
 133        }
 134        return path;
 135}
 136
 137struct tree_desc_skip {
 138        struct tree_desc_skip *prev;
 139        const void *ptr;
 140};
 141
 142struct tree_desc_x {
 143        struct tree_desc d;
 144        struct tree_desc_skip *skip;
 145};
 146
 147static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 148{
 149        /*
 150         * The caller wants to pick *a* from a tree or nothing.
 151         * We are looking at *b* in a tree.
 152         *
 153         * (0) If a and b are the same name, we are trivially happy.
 154         *
 155         * There are three possibilities where *a* could be hiding
 156         * behind *b*.
 157         *
 158         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 159         *                                matter what.
 160         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 161         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 162         *
 163         * Otherwise we know *a* won't appear in the tree without
 164         * scanning further.
 165         */
 166
 167        int cmp = name_compare(a, a_len, b, b_len);
 168
 169        /* Most common case first -- reading sync'd trees */
 170        if (!cmp)
 171                return cmp;
 172
 173        if (0 < cmp) {
 174                /* a comes after b; it does not matter if it is case (3)
 175                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 176                        return 1;
 177                */
 178                return 1; /* keep looking */
 179        }
 180
 181        /* b comes after a; are we looking at case (2)? */
 182        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 183                return 1; /* keep looking */
 184
 185        return -1; /* a cannot appear in the tree */
 186}
 187
 188/*
 189 * From the extended tree_desc, extract the first name entry, while
 190 * paying attention to the candidate "first" name.  Most importantly,
 191 * when looking for an entry, if there are entries that sorts earlier
 192 * in the tree object representation than that name, skip them and
 193 * process the named entry first.  We will remember that we haven't
 194 * processed the first entry yet, and in the later call skip the
 195 * entry we processed early when update_extended_entry() is called.
 196 *
 197 * E.g. if the underlying tree object has these entries:
 198 *
 199 *    blob    "t-1"
 200 *    blob    "t-2"
 201 *    tree    "t"
 202 *    blob    "t=1"
 203 *
 204 * and the "first" asks for "t", remember that we still need to
 205 * process "t-1" and "t-2" but extract "t".  After processing the
 206 * entry "t" from this call, the caller will let us know by calling
 207 * update_extended_entry() that we can remember "t" has been processed
 208 * already.
 209 */
 210
 211static void extended_entry_extract(struct tree_desc_x *t,
 212                                   struct name_entry *a,
 213                                   const char *first,
 214                                   int first_len)
 215{
 216        const char *path;
 217        int len;
 218        struct tree_desc probe;
 219        struct tree_desc_skip *skip;
 220
 221        /*
 222         * Extract the first entry from the tree_desc, but skip the
 223         * ones that we already returned in earlier rounds.
 224         */
 225        while (1) {
 226                if (!t->d.size) {
 227                        entry_clear(a);
 228                        break; /* not found */
 229                }
 230                entry_extract(&t->d, a);
 231                for (skip = t->skip; skip; skip = skip->prev)
 232                        if (a->path == skip->ptr)
 233                                break; /* found */
 234                if (!skip)
 235                        break;
 236                /* We have processed this entry already. */
 237                update_tree_entry(&t->d);
 238        }
 239
 240        if (!first || !a->path)
 241                return;
 242
 243        /*
 244         * The caller wants "first" from this tree, or nothing.
 245         */
 246        path = a->path;
 247        len = tree_entry_len(a);
 248        switch (check_entry_match(first, first_len, path, len)) {
 249        case -1:
 250                entry_clear(a);
 251        case 0:
 252                return;
 253        default:
 254                break;
 255        }
 256
 257        /*
 258         * We need to look-ahead -- we suspect that a subtree whose
 259         * name is "first" may be hiding behind the current entry "path".
 260         */
 261        probe = t->d;
 262        while (probe.size) {
 263                entry_extract(&probe, a);
 264                path = a->path;
 265                len = tree_entry_len(a);
 266                switch (check_entry_match(first, first_len, path, len)) {
 267                case -1:
 268                        entry_clear(a);
 269                case 0:
 270                        return;
 271                default:
 272                        update_tree_entry(&probe);
 273                        break;
 274                }
 275                /* keep looking */
 276        }
 277        entry_clear(a);
 278}
 279
 280static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 281{
 282        if (t->d.entry.path == a->path) {
 283                update_tree_entry(&t->d);
 284        } else {
 285                /* we have returned this entry early */
 286                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 287                skip->ptr = a->path;
 288                skip->prev = t->skip;
 289                t->skip = skip;
 290        }
 291}
 292
 293static void free_extended_entry(struct tree_desc_x *t)
 294{
 295        struct tree_desc_skip *p, *s;
 296
 297        for (s = t->skip; s; s = p) {
 298                p = s->prev;
 299                free(s);
 300        }
 301}
 302
 303static inline int prune_traversal(struct name_entry *e,
 304                                  struct traverse_info *info,
 305                                  struct strbuf *base,
 306                                  int still_interesting)
 307{
 308        if (!info->pathspec || still_interesting == 2)
 309                return 2;
 310        if (still_interesting < 0)
 311                return still_interesting;
 312        return tree_entry_interesting(e, base, 0, info->pathspec);
 313}
 314
 315int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 316{
 317        int error = 0;
 318        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 319        int i;
 320        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 321        struct strbuf base = STRBUF_INIT;
 322        int interesting = 1;
 323
 324        for (i = 0; i < n; i++)
 325                tx[i].d = t[i];
 326
 327        if (info->prev) {
 328                strbuf_grow(&base, info->pathlen);
 329                make_traverse_path(base.buf, info->prev, &info->name);
 330                base.buf[info->pathlen-1] = '/';
 331                strbuf_setlen(&base, info->pathlen);
 332        }
 333        for (;;) {
 334                int trees_used;
 335                unsigned long mask, dirmask;
 336                const char *first = NULL;
 337                int first_len = 0;
 338                struct name_entry *e = NULL;
 339                int len;
 340
 341                for (i = 0; i < n; i++) {
 342                        e = entry + i;
 343                        extended_entry_extract(tx + i, e, NULL, 0);
 344                }
 345
 346                /*
 347                 * A tree may have "t-2" at the current location even
 348                 * though it may have "t" that is a subtree behind it,
 349                 * and another tree may return "t".  We want to grab
 350                 * all "t" from all trees to match in such a case.
 351                 */
 352                for (i = 0; i < n; i++) {
 353                        e = entry + i;
 354                        if (!e->path)
 355                                continue;
 356                        len = tree_entry_len(e);
 357                        if (!first) {
 358                                first = e->path;
 359                                first_len = len;
 360                                continue;
 361                        }
 362                        if (name_compare(e->path, len, first, first_len) < 0) {
 363                                first = e->path;
 364                                first_len = len;
 365                        }
 366                }
 367
 368                if (first) {
 369                        for (i = 0; i < n; i++) {
 370                                e = entry + i;
 371                                extended_entry_extract(tx + i, e, first, first_len);
 372                                /* Cull the ones that are not the earliest */
 373                                if (!e->path)
 374                                        continue;
 375                                len = tree_entry_len(e);
 376                                if (name_compare(e->path, len, first, first_len))
 377                                        entry_clear(e);
 378                        }
 379                }
 380
 381                /* Now we have in entry[i] the earliest name from the trees */
 382                mask = 0;
 383                dirmask = 0;
 384                for (i = 0; i < n; i++) {
 385                        if (!entry[i].path)
 386                                continue;
 387                        mask |= 1ul << i;
 388                        if (S_ISDIR(entry[i].mode))
 389                                dirmask |= 1ul << i;
 390                        e = &entry[i];
 391                }
 392                if (!mask)
 393                        break;
 394                interesting = prune_traversal(e, info, &base, interesting);
 395                if (interesting < 0)
 396                        break;
 397                if (interesting) {
 398                        trees_used = info->fn(n, mask, dirmask, entry, info);
 399                        if (trees_used < 0) {
 400                                error = trees_used;
 401                                if (!info->show_all_errors)
 402                                        break;
 403                        }
 404                        mask &= trees_used;
 405                }
 406                for (i = 0; i < n; i++)
 407                        if (mask & (1ul << i))
 408                                update_extended_entry(tx + i, entry + i);
 409        }
 410        free(entry);
 411        for (i = 0; i < n; i++)
 412                free_extended_entry(tx + i);
 413        free(tx);
 414        strbuf_release(&base);
 415        return error;
 416}
 417
 418struct dir_state {
 419        void *tree;
 420        unsigned long size;
 421        unsigned char sha1[20];
 422};
 423
 424static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 425{
 426        int namelen = strlen(name);
 427        while (t->size) {
 428                const char *entry;
 429                const unsigned char *sha1;
 430                int entrylen, cmp;
 431
 432                sha1 = tree_entry_extract(t, &entry, mode);
 433                entrylen = tree_entry_len(&t->entry);
 434                update_tree_entry(t);
 435                if (entrylen > namelen)
 436                        continue;
 437                cmp = memcmp(name, entry, entrylen);
 438                if (cmp > 0)
 439                        continue;
 440                if (cmp < 0)
 441                        break;
 442                if (entrylen == namelen) {
 443                        hashcpy(result, sha1);
 444                        return 0;
 445                }
 446                if (name[entrylen] != '/')
 447                        continue;
 448                if (!S_ISDIR(*mode))
 449                        break;
 450                if (++entrylen == namelen) {
 451                        hashcpy(result, sha1);
 452                        return 0;
 453                }
 454                return get_tree_entry(sha1, name + entrylen, result, mode);
 455        }
 456        return -1;
 457}
 458
 459int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 460{
 461        int retval;
 462        void *tree;
 463        unsigned long size;
 464        unsigned char root[20];
 465
 466        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 467        if (!tree)
 468                return -1;
 469
 470        if (name[0] == '\0') {
 471                hashcpy(sha1, root);
 472                free(tree);
 473                return 0;
 474        }
 475
 476        if (!size) {
 477                retval = -1;
 478        } else {
 479                struct tree_desc t;
 480                init_tree_desc(&t, tree, size);
 481                retval = find_tree_entry(&t, name, sha1, mode);
 482        }
 483        free(tree);
 484        return retval;
 485}
 486
 487/*
 488 * This is Linux's built-in max for the number of symlinks to follow.
 489 * That limit, of course, does not affect git, but it's a reasonable
 490 * choice.
 491 */
 492#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
 493
 494/**
 495 * Find a tree entry by following symlinks in tree_sha (which is
 496 * assumed to be the root of the repository).  In the event that a
 497 * symlink points outside the repository (e.g. a link to /foo or a
 498 * root-level link to ../foo), the portion of the link which is
 499 * outside the repository will be returned in result_path, and *mode
 500 * will be set to 0.  It is assumed that result_path is uninitialized.
 501 * If there are no symlinks, or the end result of the symlink chain
 502 * points to an object inside the repository, result will be filled in
 503 * with the sha1 of the found object, and *mode will hold the mode of
 504 * the object.
 505 *
 506 * See the code for enum follow_symlink_result for a description of
 507 * the return values.
 508 */
 509enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode)
 510{
 511        int retval = MISSING_OBJECT;
 512        struct dir_state *parents = NULL;
 513        size_t parents_alloc = 0;
 514        ssize_t parents_nr = 0;
 515        unsigned char current_tree_sha1[20];
 516        struct strbuf namebuf = STRBUF_INIT;
 517        struct tree_desc t;
 518        int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
 519        int i;
 520
 521        init_tree_desc(&t, NULL, 0UL);
 522        strbuf_init(result_path, 0);
 523        strbuf_addstr(&namebuf, name);
 524        hashcpy(current_tree_sha1, tree_sha1);
 525
 526        while (1) {
 527                int find_result;
 528                char *first_slash;
 529                char *remainder = NULL;
 530
 531                if (!t.buffer) {
 532                        void *tree;
 533                        unsigned char root[20];
 534                        unsigned long size;
 535                        tree = read_object_with_reference(current_tree_sha1,
 536                                                          tree_type, &size,
 537                                                          root);
 538                        if (!tree)
 539                                goto done;
 540
 541                        ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
 542                        parents[parents_nr].tree = tree;
 543                        parents[parents_nr].size = size;
 544                        hashcpy(parents[parents_nr].sha1, root);
 545                        parents_nr++;
 546
 547                        if (namebuf.buf[0] == '\0') {
 548                                hashcpy(result, root);
 549                                retval = FOUND;
 550                                goto done;
 551                        }
 552
 553                        if (!size)
 554                                goto done;
 555
 556                        /* descend */
 557                        init_tree_desc(&t, tree, size);
 558                }
 559
 560                /* Handle symlinks to e.g. a//b by removing leading slashes */
 561                while (namebuf.buf[0] == '/') {
 562                        strbuf_remove(&namebuf, 0, 1);
 563                }
 564
 565                /* Split namebuf into a first component and a remainder */
 566                if ((first_slash = strchr(namebuf.buf, '/'))) {
 567                        *first_slash = 0;
 568                        remainder = first_slash + 1;
 569                }
 570
 571                if (!strcmp(namebuf.buf, "..")) {
 572                        struct dir_state *parent;
 573                        /*
 574                         * We could end up with .. in the namebuf if it
 575                         * appears in a symlink.
 576                         */
 577
 578                        if (parents_nr == 1) {
 579                                if (remainder)
 580                                        *first_slash = '/';
 581                                strbuf_add(result_path, namebuf.buf,
 582                                           namebuf.len);
 583                                *mode = 0;
 584                                retval = FOUND;
 585                                goto done;
 586                        }
 587                        parent = &parents[parents_nr - 1];
 588                        free(parent->tree);
 589                        parents_nr--;
 590                        parent = &parents[parents_nr - 1];
 591                        init_tree_desc(&t, parent->tree, parent->size);
 592                        strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
 593                        continue;
 594                }
 595
 596                /* We could end up here via a symlink to dir/.. */
 597                if (namebuf.buf[0] == '\0') {
 598                        hashcpy(result, parents[parents_nr - 1].sha1);
 599                        retval = FOUND;
 600                        goto done;
 601                }
 602
 603                /* Look up the first (or only) path component in the tree. */
 604                find_result = find_tree_entry(&t, namebuf.buf,
 605                                              current_tree_sha1, mode);
 606                if (find_result) {
 607                        goto done;
 608                }
 609
 610                if (S_ISDIR(*mode)) {
 611                        if (!remainder) {
 612                                hashcpy(result, current_tree_sha1);
 613                                retval = FOUND;
 614                                goto done;
 615                        }
 616                        /* Descend the tree */
 617                        t.buffer = NULL;
 618                        strbuf_remove(&namebuf, 0,
 619                                      1 + first_slash - namebuf.buf);
 620                } else if (S_ISREG(*mode)) {
 621                        if (!remainder) {
 622                                hashcpy(result, current_tree_sha1);
 623                                retval = FOUND;
 624                        } else {
 625                                retval = NOT_DIR;
 626                        }
 627                        goto done;
 628                } else if (S_ISLNK(*mode)) {
 629                        /* Follow a symlink */
 630                        unsigned long link_len;
 631                        size_t len;
 632                        char *contents, *contents_start;
 633                        struct dir_state *parent;
 634                        enum object_type type;
 635
 636                        if (follows_remaining-- == 0) {
 637                                /* Too many symlinks followed */
 638                                retval = SYMLINK_LOOP;
 639                                goto done;
 640                        }
 641
 642                        /*
 643                         * At this point, we have followed at a least
 644                         * one symlink, so on error we need to report this.
 645                         */
 646                        retval = DANGLING_SYMLINK;
 647
 648                        contents = read_sha1_file(current_tree_sha1, &type,
 649                                                  &link_len);
 650
 651                        if (!contents)
 652                                goto done;
 653
 654                        if (contents[0] == '/') {
 655                                strbuf_addstr(result_path, contents);
 656                                free(contents);
 657                                *mode = 0;
 658                                retval = FOUND;
 659                                goto done;
 660                        }
 661
 662                        if (remainder)
 663                                len = first_slash - namebuf.buf;
 664                        else
 665                                len = namebuf.len;
 666
 667                        contents_start = contents;
 668
 669                        parent = &parents[parents_nr - 1];
 670                        init_tree_desc(&t, parent->tree, parent->size);
 671                        strbuf_splice(&namebuf, 0, len,
 672                                      contents_start, link_len);
 673                        if (remainder)
 674                                namebuf.buf[link_len] = '/';
 675                        free(contents);
 676                }
 677        }
 678done:
 679        for (i = 0; i < parents_nr; i++)
 680                free(parents[i].tree);
 681        free(parents);
 682
 683        strbuf_release(&namebuf);
 684        return retval;
 685}
 686
 687static int match_entry(const struct pathspec_item *item,
 688                       const struct name_entry *entry, int pathlen,
 689                       const char *match, int matchlen,
 690                       enum interesting *never_interesting)
 691{
 692        int m = -1; /* signals that we haven't called strncmp() */
 693
 694        if (item->magic & PATHSPEC_ICASE)
 695                /*
 696                 * "Never interesting" trick requires exact
 697                 * matching. We could do something clever with inexact
 698                 * matching, but it's trickier (and not to forget that
 699                 * strcasecmp is locale-dependent, at least in
 700                 * glibc). Just disable it for now. It can't be worse
 701                 * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
 702                 * pattern.
 703                 */
 704                *never_interesting = entry_not_interesting;
 705        else if (*never_interesting != entry_not_interesting) {
 706                /*
 707                 * We have not seen any match that sorts later
 708                 * than the current path.
 709                 */
 710
 711                /*
 712                 * Does match sort strictly earlier than path
 713                 * with their common parts?
 714                 */
 715                m = strncmp(match, entry->path,
 716                            (matchlen < pathlen) ? matchlen : pathlen);
 717                if (m < 0)
 718                        return 0;
 719
 720                /*
 721                 * If we come here even once, that means there is at
 722                 * least one pathspec that would sort equal to or
 723                 * later than the path we are currently looking at.
 724                 * In other words, if we have never reached this point
 725                 * after iterating all pathspecs, it means all
 726                 * pathspecs are either outside of base, or inside the
 727                 * base but sorts strictly earlier than the current
 728                 * one.  In either case, they will never match the
 729                 * subsequent entries.  In such a case, we initialized
 730                 * the variable to -1 and that is what will be
 731                 * returned, allowing the caller to terminate early.
 732                 */
 733                *never_interesting = entry_not_interesting;
 734        }
 735
 736        if (pathlen > matchlen)
 737                return 0;
 738
 739        if (matchlen > pathlen) {
 740                if (match[pathlen] != '/')
 741                        return 0;
 742                if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
 743                        return 0;
 744        }
 745
 746        if (m == -1)
 747                /*
 748                 * we cheated and did not do strncmp(), so we do
 749                 * that here.
 750                 */
 751                m = ps_strncmp(item, match, entry->path, pathlen);
 752
 753        /*
 754         * If common part matched earlier then it is a hit,
 755         * because we rejected the case where path is not a
 756         * leading directory and is shorter than match.
 757         */
 758        if (!m)
 759                /*
 760                 * match_entry does not check if the prefix part is
 761                 * matched case-sensitively. If the entry is a
 762                 * directory and part of prefix, it'll be rematched
 763                 * eventually by basecmp with special treatment for
 764                 * the prefix.
 765                 */
 766                return 1;
 767
 768        return 0;
 769}
 770
 771/* :(icase)-aware string compare */
 772static int basecmp(const struct pathspec_item *item,
 773                   const char *base, const char *match, int len)
 774{
 775        if (item->magic & PATHSPEC_ICASE) {
 776                int ret, n = len > item->prefix ? item->prefix : len;
 777                ret = strncmp(base, match, n);
 778                if (ret)
 779                        return ret;
 780                base += n;
 781                match += n;
 782                len -= n;
 783        }
 784        return ps_strncmp(item, base, match, len);
 785}
 786
 787static int match_dir_prefix(const struct pathspec_item *item,
 788                            const char *base,
 789                            const char *match, int matchlen)
 790{
 791        if (basecmp(item, base, match, matchlen))
 792                return 0;
 793
 794        /*
 795         * If the base is a subdirectory of a path which
 796         * was specified, all of them are interesting.
 797         */
 798        if (!matchlen ||
 799            base[matchlen] == '/' ||
 800            match[matchlen - 1] == '/')
 801                return 1;
 802
 803        /* Just a random prefix match */
 804        return 0;
 805}
 806
 807/*
 808 * Perform matching on the leading non-wildcard part of
 809 * pathspec. item->nowildcard_len must be greater than zero. Return
 810 * non-zero if base is matched.
 811 */
 812static int match_wildcard_base(const struct pathspec_item *item,
 813                               const char *base, int baselen,
 814                               int *matched)
 815{
 816        const char *match = item->match;
 817        /* the wildcard part is not considered in this function */
 818        int matchlen = item->nowildcard_len;
 819
 820        if (baselen) {
 821                int dirlen;
 822                /*
 823                 * Return early if base is longer than the
 824                 * non-wildcard part but it does not match.
 825                 */
 826                if (baselen >= matchlen) {
 827                        *matched = matchlen;
 828                        return !basecmp(item, base, match, matchlen);
 829                }
 830
 831                dirlen = matchlen;
 832                while (dirlen && match[dirlen - 1] != '/')
 833                        dirlen--;
 834
 835                /*
 836                 * Return early if base is shorter than the
 837                 * non-wildcard part but it does not match. Note that
 838                 * base ends with '/' so we are sure it really matches
 839                 * directory
 840                 */
 841                if (basecmp(item, base, match, baselen))
 842                        return 0;
 843                *matched = baselen;
 844        } else
 845                *matched = 0;
 846        /*
 847         * we could have checked entry against the non-wildcard part
 848         * that is not in base and does similar never_interesting
 849         * optimization as in match_entry. For now just be happy with
 850         * base comparison.
 851         */
 852        return entry_interesting;
 853}
 854
 855/*
 856 * Is a tree entry interesting given the pathspec we have?
 857 *
 858 * Pre-condition: either baselen == base_offset (i.e. empty path)
 859 * or base[baselen-1] == '/' (i.e. with trailing slash).
 860 */
 861static enum interesting do_match(const struct name_entry *entry,
 862                                 struct strbuf *base, int base_offset,
 863                                 const struct pathspec *ps,
 864                                 int exclude)
 865{
 866        int i;
 867        int pathlen, baselen = base->len - base_offset;
 868        enum interesting never_interesting = ps->has_wildcard ?
 869                entry_not_interesting : all_entries_not_interesting;
 870
 871        GUARD_PATHSPEC(ps,
 872                       PATHSPEC_FROMTOP |
 873                       PATHSPEC_MAXDEPTH |
 874                       PATHSPEC_LITERAL |
 875                       PATHSPEC_GLOB |
 876                       PATHSPEC_ICASE |
 877                       PATHSPEC_EXCLUDE);
 878
 879        if (!ps->nr) {
 880                if (!ps->recursive ||
 881                    !(ps->magic & PATHSPEC_MAXDEPTH) ||
 882                    ps->max_depth == -1)
 883                        return all_entries_interesting;
 884                return within_depth(base->buf + base_offset, baselen,
 885                                    !!S_ISDIR(entry->mode),
 886                                    ps->max_depth) ?
 887                        entry_interesting : entry_not_interesting;
 888        }
 889
 890        pathlen = tree_entry_len(entry);
 891
 892        for (i = ps->nr - 1; i >= 0; i--) {
 893                const struct pathspec_item *item = ps->items+i;
 894                const char *match = item->match;
 895                const char *base_str = base->buf + base_offset;
 896                int matchlen = item->len, matched = 0;
 897
 898                if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
 899                    ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
 900                        continue;
 901
 902                if (baselen >= matchlen) {
 903                        /* If it doesn't match, move along... */
 904                        if (!match_dir_prefix(item, base_str, match, matchlen))
 905                                goto match_wildcards;
 906
 907                        if (!ps->recursive ||
 908                            !(ps->magic & PATHSPEC_MAXDEPTH) ||
 909                            ps->max_depth == -1)
 910                                return all_entries_interesting;
 911
 912                        return within_depth(base_str + matchlen + 1,
 913                                            baselen - matchlen - 1,
 914                                            !!S_ISDIR(entry->mode),
 915                                            ps->max_depth) ?
 916                                entry_interesting : entry_not_interesting;
 917                }
 918
 919                /* Either there must be no base, or the base must match. */
 920                if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
 921                        if (match_entry(item, entry, pathlen,
 922                                        match + baselen, matchlen - baselen,
 923                                        &never_interesting))
 924                                return entry_interesting;
 925
 926                        if (item->nowildcard_len < item->len) {
 927                                if (!git_fnmatch(item, match + baselen, entry->path,
 928                                                 item->nowildcard_len - baselen))
 929                                        return entry_interesting;
 930
 931                                /*
 932                                 * Match all directories. We'll try to
 933                                 * match files later on.
 934                                 */
 935                                if (ps->recursive && S_ISDIR(entry->mode))
 936                                        return entry_interesting;
 937                        }
 938
 939                        continue;
 940                }
 941
 942match_wildcards:
 943                if (item->nowildcard_len == item->len)
 944                        continue;
 945
 946                if (item->nowildcard_len &&
 947                    !match_wildcard_base(item, base_str, baselen, &matched))
 948                        continue;
 949
 950                /*
 951                 * Concatenate base and entry->path into one and do
 952                 * fnmatch() on it.
 953                 *
 954                 * While we could avoid concatenation in certain cases
 955                 * [1], which saves a memcpy and potentially a
 956                 * realloc, it turns out not worth it. Measurement on
 957                 * linux-2.6 does not show any clear improvements,
 958                 * partly because of the nowildcard_len optimization
 959                 * in git_fnmatch(). Avoid micro-optimizations here.
 960                 *
 961                 * [1] if match_wildcard_base() says the base
 962                 * directory is already matched, we only need to match
 963                 * the rest, which is shorter so _in theory_ faster.
 964                 */
 965
 966                strbuf_add(base, entry->path, pathlen);
 967
 968                if (!git_fnmatch(item, match, base->buf + base_offset,
 969                                 item->nowildcard_len)) {
 970                        strbuf_setlen(base, base_offset + baselen);
 971                        return entry_interesting;
 972                }
 973                strbuf_setlen(base, base_offset + baselen);
 974
 975                /*
 976                 * Match all directories. We'll try to match files
 977                 * later on.
 978                 * max_depth is ignored but we may consider support it
 979                 * in future, see
 980                 * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
 981                 */
 982                if (ps->recursive && S_ISDIR(entry->mode))
 983                        return entry_interesting;
 984        }
 985        return never_interesting; /* No matches */
 986}
 987
 988/*
 989 * Is a tree entry interesting given the pathspec we have?
 990 *
 991 * Pre-condition: either baselen == base_offset (i.e. empty path)
 992 * or base[baselen-1] == '/' (i.e. with trailing slash).
 993 */
 994enum interesting tree_entry_interesting(const struct name_entry *entry,
 995                                        struct strbuf *base, int base_offset,
 996                                        const struct pathspec *ps)
 997{
 998        enum interesting positive, negative;
 999        positive = do_match(entry, base, base_offset, ps, 0);
1000
1001        /*
1002         * case | entry | positive | negative | result
1003         * -----+-------+----------+----------+-------
1004         *   1  |  file |   -1     |  -1..2   |  -1
1005         *   2  |  file |    0     |  -1..2   |   0
1006         *   3  |  file |    1     |   -1     |   1
1007         *   4  |  file |    1     |    0     |   1
1008         *   5  |  file |    1     |    1     |   0
1009         *   6  |  file |    1     |    2     |   0
1010         *   7  |  file |    2     |   -1     |   2
1011         *   8  |  file |    2     |    0     |   2
1012         *   9  |  file |    2     |    1     |   0
1013         *  10  |  file |    2     |    2     |  -1
1014         * -----+-------+----------+----------+-------
1015         *  11  |  dir  |   -1     |  -1..2   |  -1
1016         *  12  |  dir  |    0     |  -1..2   |   0
1017         *  13  |  dir  |    1     |   -1     |   1
1018         *  14  |  dir  |    1     |    0     |   1
1019         *  15  |  dir  |    1     |    1     |   1 (*)
1020         *  16  |  dir  |    1     |    2     |   0
1021         *  17  |  dir  |    2     |   -1     |   2
1022         *  18  |  dir  |    2     |    0     |   2
1023         *  19  |  dir  |    2     |    1     |   1 (*)
1024         *  20  |  dir  |    2     |    2     |  -1
1025         *
1026         * (*) An exclude pattern interested in a directory does not
1027         * necessarily mean it will exclude all of the directory. In
1028         * wildcard case, it can't decide until looking at individual
1029         * files inside. So don't write such directories off yet.
1030         */
1031
1032        if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1033            positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1034                return positive;
1035
1036        negative = do_match(entry, base, base_offset, ps, 1);
1037
1038        /* #3, #4, #7, #8, #13, #14, #17, #18 */
1039        if (negative <= entry_not_interesting)
1040                return positive;
1041
1042        /* #15, #19 */
1043        if (S_ISDIR(entry->mode) &&
1044            positive >= entry_interesting &&
1045            negative == entry_interesting)
1046                return entry_interesting;
1047
1048        if ((positive == entry_interesting &&
1049             negative >= entry_interesting) || /* #5, #6, #16 */
1050            (positive == all_entries_interesting &&
1051             negative == entry_interesting)) /* #9 */
1052                return entry_not_interesting;
1053
1054        return all_entries_not_interesting; /* #10, #20 */
1055}