tree-walk.con commit regex: use regexec_buf() (b7d36ff)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6#include "pathspec.h"
   7
   8static const char *get_mode(const char *str, unsigned int *modep)
   9{
  10        unsigned char c;
  11        unsigned int mode = 0;
  12
  13        if (*str == ' ')
  14                return NULL;
  15
  16        while ((c = *str++) != ' ') {
  17                if (c < '0' || c > '7')
  18                        return NULL;
  19                mode = (mode << 3) + (c - '0');
  20        }
  21        *modep = mode;
  22        return str;
  23}
  24
  25static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  26{
  27        const char *path;
  28        unsigned int mode, len;
  29
  30        if (size < 24 || buf[size - 21])
  31                die("corrupt tree file");
  32
  33        path = get_mode(buf, &mode);
  34        if (!path || !*path)
  35                die("corrupt tree file");
  36        len = strlen(path) + 1;
  37
  38        /* Initialize the descriptor entry */
  39        desc->entry.path = path;
  40        desc->entry.mode = canon_mode(mode);
  41        desc->entry.sha1 = (const unsigned char *)(path + len);
  42}
  43
  44void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  45{
  46        desc->buffer = buffer;
  47        desc->size = size;
  48        if (size)
  49                decode_tree_entry(desc, buffer, size);
  50}
  51
  52void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  53{
  54        unsigned long size = 0;
  55        void *buf = NULL;
  56
  57        if (sha1) {
  58                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  59                if (!buf)
  60                        die("unable to read tree %s", sha1_to_hex(sha1));
  61        }
  62        init_tree_desc(desc, buf, size);
  63        return buf;
  64}
  65
  66static void entry_clear(struct name_entry *a)
  67{
  68        memset(a, 0, sizeof(*a));
  69}
  70
  71static void entry_extract(struct tree_desc *t, struct name_entry *a)
  72{
  73        *a = t->entry;
  74}
  75
  76void update_tree_entry(struct tree_desc *desc)
  77{
  78        const void *buf = desc->buffer;
  79        const unsigned char *end = desc->entry.sha1 + 20;
  80        unsigned long size = desc->size;
  81        unsigned long len = end - (const unsigned char *)buf;
  82
  83        if (size < len)
  84                die("corrupt tree file");
  85        buf = end;
  86        size -= len;
  87        desc->buffer = buf;
  88        desc->size = size;
  89        if (size)
  90                decode_tree_entry(desc, buf, size);
  91}
  92
  93int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  94{
  95        if (!desc->size)
  96                return 0;
  97
  98        *entry = desc->entry;
  99        update_tree_entry(desc);
 100        return 1;
 101}
 102
 103void setup_traverse_info(struct traverse_info *info, const char *base)
 104{
 105        int pathlen = strlen(base);
 106        static struct traverse_info dummy;
 107
 108        memset(info, 0, sizeof(*info));
 109        if (pathlen && base[pathlen-1] == '/')
 110                pathlen--;
 111        info->pathlen = pathlen ? pathlen + 1 : 0;
 112        info->name.path = base;
 113        info->name.sha1 = (void *)(base + pathlen + 1);
 114        if (pathlen)
 115                info->prev = &dummy;
 116}
 117
 118char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 119{
 120        int len = tree_entry_len(n);
 121        int pathlen = info->pathlen;
 122
 123        path[pathlen + len] = 0;
 124        for (;;) {
 125                memcpy(path + pathlen, n->path, len);
 126                if (!pathlen)
 127                        break;
 128                path[--pathlen] = '/';
 129                n = &info->name;
 130                len = tree_entry_len(n);
 131                info = info->prev;
 132                pathlen -= len;
 133        }
 134        return path;
 135}
 136
 137struct tree_desc_skip {
 138        struct tree_desc_skip *prev;
 139        const void *ptr;
 140};
 141
 142struct tree_desc_x {
 143        struct tree_desc d;
 144        struct tree_desc_skip *skip;
 145};
 146
 147static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 148{
 149        /*
 150         * The caller wants to pick *a* from a tree or nothing.
 151         * We are looking at *b* in a tree.
 152         *
 153         * (0) If a and b are the same name, we are trivially happy.
 154         *
 155         * There are three possibilities where *a* could be hiding
 156         * behind *b*.
 157         *
 158         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 159         *                                matter what.
 160         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 161         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 162         *
 163         * Otherwise we know *a* won't appear in the tree without
 164         * scanning further.
 165         */
 166
 167        int cmp = name_compare(a, a_len, b, b_len);
 168
 169        /* Most common case first -- reading sync'd trees */
 170        if (!cmp)
 171                return cmp;
 172
 173        if (0 < cmp) {
 174                /* a comes after b; it does not matter if it is case (3)
 175                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 176                        return 1;
 177                */
 178                return 1; /* keep looking */
 179        }
 180
 181        /* b comes after a; are we looking at case (2)? */
 182        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 183                return 1; /* keep looking */
 184
 185        return -1; /* a cannot appear in the tree */
 186}
 187
 188/*
 189 * From the extended tree_desc, extract the first name entry, while
 190 * paying attention to the candidate "first" name.  Most importantly,
 191 * when looking for an entry, if there are entries that sorts earlier
 192 * in the tree object representation than that name, skip them and
 193 * process the named entry first.  We will remember that we haven't
 194 * processed the first entry yet, and in the later call skip the
 195 * entry we processed early when update_extended_entry() is called.
 196 *
 197 * E.g. if the underlying tree object has these entries:
 198 *
 199 *    blob    "t-1"
 200 *    blob    "t-2"
 201 *    tree    "t"
 202 *    blob    "t=1"
 203 *
 204 * and the "first" asks for "t", remember that we still need to
 205 * process "t-1" and "t-2" but extract "t".  After processing the
 206 * entry "t" from this call, the caller will let us know by calling
 207 * update_extended_entry() that we can remember "t" has been processed
 208 * already.
 209 */
 210
 211static void extended_entry_extract(struct tree_desc_x *t,
 212                                   struct name_entry *a,
 213                                   const char *first,
 214                                   int first_len)
 215{
 216        const char *path;
 217        int len;
 218        struct tree_desc probe;
 219        struct tree_desc_skip *skip;
 220
 221        /*
 222         * Extract the first entry from the tree_desc, but skip the
 223         * ones that we already returned in earlier rounds.
 224         */
 225        while (1) {
 226                if (!t->d.size) {
 227                        entry_clear(a);
 228                        break; /* not found */
 229                }
 230                entry_extract(&t->d, a);
 231                for (skip = t->skip; skip; skip = skip->prev)
 232                        if (a->path == skip->ptr)
 233                                break; /* found */
 234                if (!skip)
 235                        break;
 236                /* We have processed this entry already. */
 237                update_tree_entry(&t->d);
 238        }
 239
 240        if (!first || !a->path)
 241                return;
 242
 243        /*
 244         * The caller wants "first" from this tree, or nothing.
 245         */
 246        path = a->path;
 247        len = tree_entry_len(a);
 248        switch (check_entry_match(first, first_len, path, len)) {
 249        case -1:
 250                entry_clear(a);
 251        case 0:
 252                return;
 253        default:
 254                break;
 255        }
 256
 257        /*
 258         * We need to look-ahead -- we suspect that a subtree whose
 259         * name is "first" may be hiding behind the current entry "path".
 260         */
 261        probe = t->d;
 262        while (probe.size) {
 263                entry_extract(&probe, a);
 264                path = a->path;
 265                len = tree_entry_len(a);
 266                switch (check_entry_match(first, first_len, path, len)) {
 267                case -1:
 268                        entry_clear(a);
 269                case 0:
 270                        return;
 271                default:
 272                        update_tree_entry(&probe);
 273                        break;
 274                }
 275                /* keep looking */
 276        }
 277        entry_clear(a);
 278}
 279
 280static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 281{
 282        if (t->d.entry.path == a->path) {
 283                update_tree_entry(&t->d);
 284        } else {
 285                /* we have returned this entry early */
 286                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 287                skip->ptr = a->path;
 288                skip->prev = t->skip;
 289                t->skip = skip;
 290        }
 291}
 292
 293static void free_extended_entry(struct tree_desc_x *t)
 294{
 295        struct tree_desc_skip *p, *s;
 296
 297        for (s = t->skip; s; s = p) {
 298                p = s->prev;
 299                free(s);
 300        }
 301}
 302
 303static inline int prune_traversal(struct name_entry *e,
 304                                  struct traverse_info *info,
 305                                  struct strbuf *base,
 306                                  int still_interesting)
 307{
 308        if (!info->pathspec || still_interesting == 2)
 309                return 2;
 310        if (still_interesting < 0)
 311                return still_interesting;
 312        return tree_entry_interesting(e, base, 0, info->pathspec);
 313}
 314
 315int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 316{
 317        int error = 0;
 318        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 319        int i;
 320        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 321        struct strbuf base = STRBUF_INIT;
 322        int interesting = 1;
 323        char *traverse_path;
 324
 325        for (i = 0; i < n; i++)
 326                tx[i].d = t[i];
 327
 328        if (info->prev) {
 329                strbuf_grow(&base, info->pathlen);
 330                make_traverse_path(base.buf, info->prev, &info->name);
 331                base.buf[info->pathlen-1] = '/';
 332                strbuf_setlen(&base, info->pathlen);
 333                traverse_path = xstrndup(base.buf, info->pathlen);
 334        } else {
 335                traverse_path = xstrndup(info->name.path, info->pathlen);
 336        }
 337        info->traverse_path = traverse_path;
 338        for (;;) {
 339                int trees_used;
 340                unsigned long mask, dirmask;
 341                const char *first = NULL;
 342                int first_len = 0;
 343                struct name_entry *e = NULL;
 344                int len;
 345
 346                for (i = 0; i < n; i++) {
 347                        e = entry + i;
 348                        extended_entry_extract(tx + i, e, NULL, 0);
 349                }
 350
 351                /*
 352                 * A tree may have "t-2" at the current location even
 353                 * though it may have "t" that is a subtree behind it,
 354                 * and another tree may return "t".  We want to grab
 355                 * all "t" from all trees to match in such a case.
 356                 */
 357                for (i = 0; i < n; i++) {
 358                        e = entry + i;
 359                        if (!e->path)
 360                                continue;
 361                        len = tree_entry_len(e);
 362                        if (!first) {
 363                                first = e->path;
 364                                first_len = len;
 365                                continue;
 366                        }
 367                        if (name_compare(e->path, len, first, first_len) < 0) {
 368                                first = e->path;
 369                                first_len = len;
 370                        }
 371                }
 372
 373                if (first) {
 374                        for (i = 0; i < n; i++) {
 375                                e = entry + i;
 376                                extended_entry_extract(tx + i, e, first, first_len);
 377                                /* Cull the ones that are not the earliest */
 378                                if (!e->path)
 379                                        continue;
 380                                len = tree_entry_len(e);
 381                                if (name_compare(e->path, len, first, first_len))
 382                                        entry_clear(e);
 383                        }
 384                }
 385
 386                /* Now we have in entry[i] the earliest name from the trees */
 387                mask = 0;
 388                dirmask = 0;
 389                for (i = 0; i < n; i++) {
 390                        if (!entry[i].path)
 391                                continue;
 392                        mask |= 1ul << i;
 393                        if (S_ISDIR(entry[i].mode))
 394                                dirmask |= 1ul << i;
 395                        e = &entry[i];
 396                }
 397                if (!mask)
 398                        break;
 399                interesting = prune_traversal(e, info, &base, interesting);
 400                if (interesting < 0)
 401                        break;
 402                if (interesting) {
 403                        trees_used = info->fn(n, mask, dirmask, entry, info);
 404                        if (trees_used < 0) {
 405                                error = trees_used;
 406                                if (!info->show_all_errors)
 407                                        break;
 408                        }
 409                        mask &= trees_used;
 410                }
 411                for (i = 0; i < n; i++)
 412                        if (mask & (1ul << i))
 413                                update_extended_entry(tx + i, entry + i);
 414        }
 415        free(entry);
 416        for (i = 0; i < n; i++)
 417                free_extended_entry(tx + i);
 418        free(tx);
 419        free(traverse_path);
 420        info->traverse_path = NULL;
 421        strbuf_release(&base);
 422        return error;
 423}
 424
 425struct dir_state {
 426        void *tree;
 427        unsigned long size;
 428        unsigned char sha1[20];
 429};
 430
 431static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 432{
 433        int namelen = strlen(name);
 434        while (t->size) {
 435                const char *entry;
 436                const unsigned char *sha1;
 437                int entrylen, cmp;
 438
 439                sha1 = tree_entry_extract(t, &entry, mode);
 440                entrylen = tree_entry_len(&t->entry);
 441                update_tree_entry(t);
 442                if (entrylen > namelen)
 443                        continue;
 444                cmp = memcmp(name, entry, entrylen);
 445                if (cmp > 0)
 446                        continue;
 447                if (cmp < 0)
 448                        break;
 449                if (entrylen == namelen) {
 450                        hashcpy(result, sha1);
 451                        return 0;
 452                }
 453                if (name[entrylen] != '/')
 454                        continue;
 455                if (!S_ISDIR(*mode))
 456                        break;
 457                if (++entrylen == namelen) {
 458                        hashcpy(result, sha1);
 459                        return 0;
 460                }
 461                return get_tree_entry(sha1, name + entrylen, result, mode);
 462        }
 463        return -1;
 464}
 465
 466int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 467{
 468        int retval;
 469        void *tree;
 470        unsigned long size;
 471        unsigned char root[20];
 472
 473        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 474        if (!tree)
 475                return -1;
 476
 477        if (name[0] == '\0') {
 478                hashcpy(sha1, root);
 479                free(tree);
 480                return 0;
 481        }
 482
 483        if (!size) {
 484                retval = -1;
 485        } else {
 486                struct tree_desc t;
 487                init_tree_desc(&t, tree, size);
 488                retval = find_tree_entry(&t, name, sha1, mode);
 489        }
 490        free(tree);
 491        return retval;
 492}
 493
 494/*
 495 * This is Linux's built-in max for the number of symlinks to follow.
 496 * That limit, of course, does not affect git, but it's a reasonable
 497 * choice.
 498 */
 499#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
 500
 501/**
 502 * Find a tree entry by following symlinks in tree_sha (which is
 503 * assumed to be the root of the repository).  In the event that a
 504 * symlink points outside the repository (e.g. a link to /foo or a
 505 * root-level link to ../foo), the portion of the link which is
 506 * outside the repository will be returned in result_path, and *mode
 507 * will be set to 0.  It is assumed that result_path is uninitialized.
 508 * If there are no symlinks, or the end result of the symlink chain
 509 * points to an object inside the repository, result will be filled in
 510 * with the sha1 of the found object, and *mode will hold the mode of
 511 * the object.
 512 *
 513 * See the code for enum follow_symlink_result for a description of
 514 * the return values.
 515 */
 516enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode)
 517{
 518        int retval = MISSING_OBJECT;
 519        struct dir_state *parents = NULL;
 520        size_t parents_alloc = 0;
 521        ssize_t parents_nr = 0;
 522        unsigned char current_tree_sha1[20];
 523        struct strbuf namebuf = STRBUF_INIT;
 524        struct tree_desc t;
 525        int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
 526        int i;
 527
 528        init_tree_desc(&t, NULL, 0UL);
 529        strbuf_init(result_path, 0);
 530        strbuf_addstr(&namebuf, name);
 531        hashcpy(current_tree_sha1, tree_sha1);
 532
 533        while (1) {
 534                int find_result;
 535                char *first_slash;
 536                char *remainder = NULL;
 537
 538                if (!t.buffer) {
 539                        void *tree;
 540                        unsigned char root[20];
 541                        unsigned long size;
 542                        tree = read_object_with_reference(current_tree_sha1,
 543                                                          tree_type, &size,
 544                                                          root);
 545                        if (!tree)
 546                                goto done;
 547
 548                        ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
 549                        parents[parents_nr].tree = tree;
 550                        parents[parents_nr].size = size;
 551                        hashcpy(parents[parents_nr].sha1, root);
 552                        parents_nr++;
 553
 554                        if (namebuf.buf[0] == '\0') {
 555                                hashcpy(result, root);
 556                                retval = FOUND;
 557                                goto done;
 558                        }
 559
 560                        if (!size)
 561                                goto done;
 562
 563                        /* descend */
 564                        init_tree_desc(&t, tree, size);
 565                }
 566
 567                /* Handle symlinks to e.g. a//b by removing leading slashes */
 568                while (namebuf.buf[0] == '/') {
 569                        strbuf_remove(&namebuf, 0, 1);
 570                }
 571
 572                /* Split namebuf into a first component and a remainder */
 573                if ((first_slash = strchr(namebuf.buf, '/'))) {
 574                        *first_slash = 0;
 575                        remainder = first_slash + 1;
 576                }
 577
 578                if (!strcmp(namebuf.buf, "..")) {
 579                        struct dir_state *parent;
 580                        /*
 581                         * We could end up with .. in the namebuf if it
 582                         * appears in a symlink.
 583                         */
 584
 585                        if (parents_nr == 1) {
 586                                if (remainder)
 587                                        *first_slash = '/';
 588                                strbuf_add(result_path, namebuf.buf,
 589                                           namebuf.len);
 590                                *mode = 0;
 591                                retval = FOUND;
 592                                goto done;
 593                        }
 594                        parent = &parents[parents_nr - 1];
 595                        free(parent->tree);
 596                        parents_nr--;
 597                        parent = &parents[parents_nr - 1];
 598                        init_tree_desc(&t, parent->tree, parent->size);
 599                        strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
 600                        continue;
 601                }
 602
 603                /* We could end up here via a symlink to dir/.. */
 604                if (namebuf.buf[0] == '\0') {
 605                        hashcpy(result, parents[parents_nr - 1].sha1);
 606                        retval = FOUND;
 607                        goto done;
 608                }
 609
 610                /* Look up the first (or only) path component in the tree. */
 611                find_result = find_tree_entry(&t, namebuf.buf,
 612                                              current_tree_sha1, mode);
 613                if (find_result) {
 614                        goto done;
 615                }
 616
 617                if (S_ISDIR(*mode)) {
 618                        if (!remainder) {
 619                                hashcpy(result, current_tree_sha1);
 620                                retval = FOUND;
 621                                goto done;
 622                        }
 623                        /* Descend the tree */
 624                        t.buffer = NULL;
 625                        strbuf_remove(&namebuf, 0,
 626                                      1 + first_slash - namebuf.buf);
 627                } else if (S_ISREG(*mode)) {
 628                        if (!remainder) {
 629                                hashcpy(result, current_tree_sha1);
 630                                retval = FOUND;
 631                        } else {
 632                                retval = NOT_DIR;
 633                        }
 634                        goto done;
 635                } else if (S_ISLNK(*mode)) {
 636                        /* Follow a symlink */
 637                        unsigned long link_len;
 638                        size_t len;
 639                        char *contents, *contents_start;
 640                        struct dir_state *parent;
 641                        enum object_type type;
 642
 643                        if (follows_remaining-- == 0) {
 644                                /* Too many symlinks followed */
 645                                retval = SYMLINK_LOOP;
 646                                goto done;
 647                        }
 648
 649                        /*
 650                         * At this point, we have followed at a least
 651                         * one symlink, so on error we need to report this.
 652                         */
 653                        retval = DANGLING_SYMLINK;
 654
 655                        contents = read_sha1_file(current_tree_sha1, &type,
 656                                                  &link_len);
 657
 658                        if (!contents)
 659                                goto done;
 660
 661                        if (contents[0] == '/') {
 662                                strbuf_addstr(result_path, contents);
 663                                free(contents);
 664                                *mode = 0;
 665                                retval = FOUND;
 666                                goto done;
 667                        }
 668
 669                        if (remainder)
 670                                len = first_slash - namebuf.buf;
 671                        else
 672                                len = namebuf.len;
 673
 674                        contents_start = contents;
 675
 676                        parent = &parents[parents_nr - 1];
 677                        init_tree_desc(&t, parent->tree, parent->size);
 678                        strbuf_splice(&namebuf, 0, len,
 679                                      contents_start, link_len);
 680                        if (remainder)
 681                                namebuf.buf[link_len] = '/';
 682                        free(contents);
 683                }
 684        }
 685done:
 686        for (i = 0; i < parents_nr; i++)
 687                free(parents[i].tree);
 688        free(parents);
 689
 690        strbuf_release(&namebuf);
 691        return retval;
 692}
 693
 694static int match_entry(const struct pathspec_item *item,
 695                       const struct name_entry *entry, int pathlen,
 696                       const char *match, int matchlen,
 697                       enum interesting *never_interesting)
 698{
 699        int m = -1; /* signals that we haven't called strncmp() */
 700
 701        if (item->magic & PATHSPEC_ICASE)
 702                /*
 703                 * "Never interesting" trick requires exact
 704                 * matching. We could do something clever with inexact
 705                 * matching, but it's trickier (and not to forget that
 706                 * strcasecmp is locale-dependent, at least in
 707                 * glibc). Just disable it for now. It can't be worse
 708                 * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
 709                 * pattern.
 710                 */
 711                *never_interesting = entry_not_interesting;
 712        else if (*never_interesting != entry_not_interesting) {
 713                /*
 714                 * We have not seen any match that sorts later
 715                 * than the current path.
 716                 */
 717
 718                /*
 719                 * Does match sort strictly earlier than path
 720                 * with their common parts?
 721                 */
 722                m = strncmp(match, entry->path,
 723                            (matchlen < pathlen) ? matchlen : pathlen);
 724                if (m < 0)
 725                        return 0;
 726
 727                /*
 728                 * If we come here even once, that means there is at
 729                 * least one pathspec that would sort equal to or
 730                 * later than the path we are currently looking at.
 731                 * In other words, if we have never reached this point
 732                 * after iterating all pathspecs, it means all
 733                 * pathspecs are either outside of base, or inside the
 734                 * base but sorts strictly earlier than the current
 735                 * one.  In either case, they will never match the
 736                 * subsequent entries.  In such a case, we initialized
 737                 * the variable to -1 and that is what will be
 738                 * returned, allowing the caller to terminate early.
 739                 */
 740                *never_interesting = entry_not_interesting;
 741        }
 742
 743        if (pathlen > matchlen)
 744                return 0;
 745
 746        if (matchlen > pathlen) {
 747                if (match[pathlen] != '/')
 748                        return 0;
 749                if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
 750                        return 0;
 751        }
 752
 753        if (m == -1)
 754                /*
 755                 * we cheated and did not do strncmp(), so we do
 756                 * that here.
 757                 */
 758                m = ps_strncmp(item, match, entry->path, pathlen);
 759
 760        /*
 761         * If common part matched earlier then it is a hit,
 762         * because we rejected the case where path is not a
 763         * leading directory and is shorter than match.
 764         */
 765        if (!m)
 766                /*
 767                 * match_entry does not check if the prefix part is
 768                 * matched case-sensitively. If the entry is a
 769                 * directory and part of prefix, it'll be rematched
 770                 * eventually by basecmp with special treatment for
 771                 * the prefix.
 772                 */
 773                return 1;
 774
 775        return 0;
 776}
 777
 778/* :(icase)-aware string compare */
 779static int basecmp(const struct pathspec_item *item,
 780                   const char *base, const char *match, int len)
 781{
 782        if (item->magic & PATHSPEC_ICASE) {
 783                int ret, n = len > item->prefix ? item->prefix : len;
 784                ret = strncmp(base, match, n);
 785                if (ret)
 786                        return ret;
 787                base += n;
 788                match += n;
 789                len -= n;
 790        }
 791        return ps_strncmp(item, base, match, len);
 792}
 793
 794static int match_dir_prefix(const struct pathspec_item *item,
 795                            const char *base,
 796                            const char *match, int matchlen)
 797{
 798        if (basecmp(item, base, match, matchlen))
 799                return 0;
 800
 801        /*
 802         * If the base is a subdirectory of a path which
 803         * was specified, all of them are interesting.
 804         */
 805        if (!matchlen ||
 806            base[matchlen] == '/' ||
 807            match[matchlen - 1] == '/')
 808                return 1;
 809
 810        /* Just a random prefix match */
 811        return 0;
 812}
 813
 814/*
 815 * Perform matching on the leading non-wildcard part of
 816 * pathspec. item->nowildcard_len must be greater than zero. Return
 817 * non-zero if base is matched.
 818 */
 819static int match_wildcard_base(const struct pathspec_item *item,
 820                               const char *base, int baselen,
 821                               int *matched)
 822{
 823        const char *match = item->match;
 824        /* the wildcard part is not considered in this function */
 825        int matchlen = item->nowildcard_len;
 826
 827        if (baselen) {
 828                int dirlen;
 829                /*
 830                 * Return early if base is longer than the
 831                 * non-wildcard part but it does not match.
 832                 */
 833                if (baselen >= matchlen) {
 834                        *matched = matchlen;
 835                        return !basecmp(item, base, match, matchlen);
 836                }
 837
 838                dirlen = matchlen;
 839                while (dirlen && match[dirlen - 1] != '/')
 840                        dirlen--;
 841
 842                /*
 843                 * Return early if base is shorter than the
 844                 * non-wildcard part but it does not match. Note that
 845                 * base ends with '/' so we are sure it really matches
 846                 * directory
 847                 */
 848                if (basecmp(item, base, match, baselen))
 849                        return 0;
 850                *matched = baselen;
 851        } else
 852                *matched = 0;
 853        /*
 854         * we could have checked entry against the non-wildcard part
 855         * that is not in base and does similar never_interesting
 856         * optimization as in match_entry. For now just be happy with
 857         * base comparison.
 858         */
 859        return entry_interesting;
 860}
 861
 862/*
 863 * Is a tree entry interesting given the pathspec we have?
 864 *
 865 * Pre-condition: either baselen == base_offset (i.e. empty path)
 866 * or base[baselen-1] == '/' (i.e. with trailing slash).
 867 */
 868static enum interesting do_match(const struct name_entry *entry,
 869                                 struct strbuf *base, int base_offset,
 870                                 const struct pathspec *ps,
 871                                 int exclude)
 872{
 873        int i;
 874        int pathlen, baselen = base->len - base_offset;
 875        enum interesting never_interesting = ps->has_wildcard ?
 876                entry_not_interesting : all_entries_not_interesting;
 877
 878        GUARD_PATHSPEC(ps,
 879                       PATHSPEC_FROMTOP |
 880                       PATHSPEC_MAXDEPTH |
 881                       PATHSPEC_LITERAL |
 882                       PATHSPEC_GLOB |
 883                       PATHSPEC_ICASE |
 884                       PATHSPEC_EXCLUDE);
 885
 886        if (!ps->nr) {
 887                if (!ps->recursive ||
 888                    !(ps->magic & PATHSPEC_MAXDEPTH) ||
 889                    ps->max_depth == -1)
 890                        return all_entries_interesting;
 891                return within_depth(base->buf + base_offset, baselen,
 892                                    !!S_ISDIR(entry->mode),
 893                                    ps->max_depth) ?
 894                        entry_interesting : entry_not_interesting;
 895        }
 896
 897        pathlen = tree_entry_len(entry);
 898
 899        for (i = ps->nr - 1; i >= 0; i--) {
 900                const struct pathspec_item *item = ps->items+i;
 901                const char *match = item->match;
 902                const char *base_str = base->buf + base_offset;
 903                int matchlen = item->len, matched = 0;
 904
 905                if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
 906                    ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
 907                        continue;
 908
 909                if (baselen >= matchlen) {
 910                        /* If it doesn't match, move along... */
 911                        if (!match_dir_prefix(item, base_str, match, matchlen))
 912                                goto match_wildcards;
 913
 914                        if (!ps->recursive ||
 915                            !(ps->magic & PATHSPEC_MAXDEPTH) ||
 916                            ps->max_depth == -1)
 917                                return all_entries_interesting;
 918
 919                        return within_depth(base_str + matchlen + 1,
 920                                            baselen - matchlen - 1,
 921                                            !!S_ISDIR(entry->mode),
 922                                            ps->max_depth) ?
 923                                entry_interesting : entry_not_interesting;
 924                }
 925
 926                /* Either there must be no base, or the base must match. */
 927                if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
 928                        if (match_entry(item, entry, pathlen,
 929                                        match + baselen, matchlen - baselen,
 930                                        &never_interesting))
 931                                return entry_interesting;
 932
 933                        if (item->nowildcard_len < item->len) {
 934                                if (!git_fnmatch(item, match + baselen, entry->path,
 935                                                 item->nowildcard_len - baselen))
 936                                        return entry_interesting;
 937
 938                                /*
 939                                 * Match all directories. We'll try to
 940                                 * match files later on.
 941                                 */
 942                                if (ps->recursive && S_ISDIR(entry->mode))
 943                                        return entry_interesting;
 944                        }
 945
 946                        continue;
 947                }
 948
 949match_wildcards:
 950                if (item->nowildcard_len == item->len)
 951                        continue;
 952
 953                if (item->nowildcard_len &&
 954                    !match_wildcard_base(item, base_str, baselen, &matched))
 955                        continue;
 956
 957                /*
 958                 * Concatenate base and entry->path into one and do
 959                 * fnmatch() on it.
 960                 *
 961                 * While we could avoid concatenation in certain cases
 962                 * [1], which saves a memcpy and potentially a
 963                 * realloc, it turns out not worth it. Measurement on
 964                 * linux-2.6 does not show any clear improvements,
 965                 * partly because of the nowildcard_len optimization
 966                 * in git_fnmatch(). Avoid micro-optimizations here.
 967                 *
 968                 * [1] if match_wildcard_base() says the base
 969                 * directory is already matched, we only need to match
 970                 * the rest, which is shorter so _in theory_ faster.
 971                 */
 972
 973                strbuf_add(base, entry->path, pathlen);
 974
 975                if (!git_fnmatch(item, match, base->buf + base_offset,
 976                                 item->nowildcard_len)) {
 977                        strbuf_setlen(base, base_offset + baselen);
 978                        return entry_interesting;
 979                }
 980                strbuf_setlen(base, base_offset + baselen);
 981
 982                /*
 983                 * Match all directories. We'll try to match files
 984                 * later on.
 985                 * max_depth is ignored but we may consider support it
 986                 * in future, see
 987                 * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
 988                 */
 989                if (ps->recursive && S_ISDIR(entry->mode))
 990                        return entry_interesting;
 991        }
 992        return never_interesting; /* No matches */
 993}
 994
 995/*
 996 * Is a tree entry interesting given the pathspec we have?
 997 *
 998 * Pre-condition: either baselen == base_offset (i.e. empty path)
 999 * or base[baselen-1] == '/' (i.e. with trailing slash).
1000 */
1001enum interesting tree_entry_interesting(const struct name_entry *entry,
1002                                        struct strbuf *base, int base_offset,
1003                                        const struct pathspec *ps)
1004{
1005        enum interesting positive, negative;
1006        positive = do_match(entry, base, base_offset, ps, 0);
1007
1008        /*
1009         * case | entry | positive | negative | result
1010         * -----+-------+----------+----------+-------
1011         *   1  |  file |   -1     |  -1..2   |  -1
1012         *   2  |  file |    0     |  -1..2   |   0
1013         *   3  |  file |    1     |   -1     |   1
1014         *   4  |  file |    1     |    0     |   1
1015         *   5  |  file |    1     |    1     |   0
1016         *   6  |  file |    1     |    2     |   0
1017         *   7  |  file |    2     |   -1     |   2
1018         *   8  |  file |    2     |    0     |   2
1019         *   9  |  file |    2     |    1     |   0
1020         *  10  |  file |    2     |    2     |  -1
1021         * -----+-------+----------+----------+-------
1022         *  11  |  dir  |   -1     |  -1..2   |  -1
1023         *  12  |  dir  |    0     |  -1..2   |   0
1024         *  13  |  dir  |    1     |   -1     |   1
1025         *  14  |  dir  |    1     |    0     |   1
1026         *  15  |  dir  |    1     |    1     |   1 (*)
1027         *  16  |  dir  |    1     |    2     |   0
1028         *  17  |  dir  |    2     |   -1     |   2
1029         *  18  |  dir  |    2     |    0     |   2
1030         *  19  |  dir  |    2     |    1     |   1 (*)
1031         *  20  |  dir  |    2     |    2     |  -1
1032         *
1033         * (*) An exclude pattern interested in a directory does not
1034         * necessarily mean it will exclude all of the directory. In
1035         * wildcard case, it can't decide until looking at individual
1036         * files inside. So don't write such directories off yet.
1037         */
1038
1039        if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1040            positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1041                return positive;
1042
1043        negative = do_match(entry, base, base_offset, ps, 1);
1044
1045        /* #3, #4, #7, #8, #13, #14, #17, #18 */
1046        if (negative <= entry_not_interesting)
1047                return positive;
1048
1049        /* #15, #19 */
1050        if (S_ISDIR(entry->mode) &&
1051            positive >= entry_interesting &&
1052            negative == entry_interesting)
1053                return entry_interesting;
1054
1055        if ((positive == entry_interesting &&
1056             negative >= entry_interesting) || /* #5, #6, #16 */
1057            (positive == all_entries_interesting &&
1058             negative == entry_interesting)) /* #9 */
1059                return entry_not_interesting;
1060
1061        return all_entries_not_interesting; /* #10, #20 */
1062}