tree-walk.con commit Recommend git-filter-repo instead of git-filter-branch (9df53c5)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "object-store.h"
   6#include "tree.h"
   7#include "pathspec.h"
   8
   9static const char *get_mode(const char *str, unsigned int *modep)
  10{
  11        unsigned char c;
  12        unsigned int mode = 0;
  13
  14        if (*str == ' ')
  15                return NULL;
  16
  17        while ((c = *str++) != ' ') {
  18                if (c < '0' || c > '7')
  19                        return NULL;
  20                mode = (mode << 3) + (c - '0');
  21        }
  22        *modep = mode;
  23        return str;
  24}
  25
  26static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
  27{
  28        const char *path;
  29        unsigned int mode, len;
  30        const unsigned hashsz = the_hash_algo->rawsz;
  31
  32        if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
  33                strbuf_addstr(err, _("too-short tree object"));
  34                return -1;
  35        }
  36
  37        path = get_mode(buf, &mode);
  38        if (!path) {
  39                strbuf_addstr(err, _("malformed mode in tree entry"));
  40                return -1;
  41        }
  42        if (!*path) {
  43                strbuf_addstr(err, _("empty filename in tree entry"));
  44                return -1;
  45        }
  46        len = strlen(path) + 1;
  47
  48        /* Initialize the descriptor entry */
  49        desc->entry.path = path;
  50        desc->entry.mode = canon_mode(mode);
  51        desc->entry.pathlen = len - 1;
  52        hashcpy(desc->entry.oid.hash, (const unsigned char *)path + len);
  53
  54        return 0;
  55}
  56
  57static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
  58{
  59        desc->buffer = buffer;
  60        desc->size = size;
  61        if (size)
  62                return decode_tree_entry(desc, buffer, size, err);
  63        return 0;
  64}
  65
  66void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  67{
  68        struct strbuf err = STRBUF_INIT;
  69        if (init_tree_desc_internal(desc, buffer, size, &err))
  70                die("%s", err.buf);
  71        strbuf_release(&err);
  72}
  73
  74int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
  75{
  76        struct strbuf err = STRBUF_INIT;
  77        int result = init_tree_desc_internal(desc, buffer, size, &err);
  78        if (result)
  79                error("%s", err.buf);
  80        strbuf_release(&err);
  81        return result;
  82}
  83
  84void *fill_tree_descriptor(struct repository *r,
  85                           struct tree_desc *desc,
  86                           const struct object_id *oid)
  87{
  88        unsigned long size = 0;
  89        void *buf = NULL;
  90
  91        if (oid) {
  92                buf = read_object_with_reference(r, oid, tree_type, &size, NULL);
  93                if (!buf)
  94                        die("unable to read tree %s", oid_to_hex(oid));
  95        }
  96        init_tree_desc(desc, buf, size);
  97        return buf;
  98}
  99
 100static void entry_clear(struct name_entry *a)
 101{
 102        memset(a, 0, sizeof(*a));
 103}
 104
 105static void entry_extract(struct tree_desc *t, struct name_entry *a)
 106{
 107        *a = t->entry;
 108}
 109
 110static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
 111{
 112        const void *buf = desc->buffer;
 113        const unsigned char *end = (const unsigned char *)desc->entry.path + desc->entry.pathlen + 1 + the_hash_algo->rawsz;
 114        unsigned long size = desc->size;
 115        unsigned long len = end - (const unsigned char *)buf;
 116
 117        if (size < len)
 118                die(_("too-short tree file"));
 119        buf = end;
 120        size -= len;
 121        desc->buffer = buf;
 122        desc->size = size;
 123        if (size)
 124                return decode_tree_entry(desc, buf, size, err);
 125        return 0;
 126}
 127
 128void update_tree_entry(struct tree_desc *desc)
 129{
 130        struct strbuf err = STRBUF_INIT;
 131        if (update_tree_entry_internal(desc, &err))
 132                die("%s", err.buf);
 133        strbuf_release(&err);
 134}
 135
 136int update_tree_entry_gently(struct tree_desc *desc)
 137{
 138        struct strbuf err = STRBUF_INIT;
 139        if (update_tree_entry_internal(desc, &err)) {
 140                error("%s", err.buf);
 141                strbuf_release(&err);
 142                /* Stop processing this tree after error */
 143                desc->size = 0;
 144                return -1;
 145        }
 146        strbuf_release(&err);
 147        return 0;
 148}
 149
 150int tree_entry(struct tree_desc *desc, struct name_entry *entry)
 151{
 152        if (!desc->size)
 153                return 0;
 154
 155        *entry = desc->entry;
 156        update_tree_entry(desc);
 157        return 1;
 158}
 159
 160int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
 161{
 162        if (!desc->size)
 163                return 0;
 164
 165        *entry = desc->entry;
 166        if (update_tree_entry_gently(desc))
 167                return 0;
 168        return 1;
 169}
 170
 171void setup_traverse_info(struct traverse_info *info, const char *base)
 172{
 173        size_t pathlen = strlen(base);
 174        static struct traverse_info dummy;
 175
 176        memset(info, 0, sizeof(*info));
 177        if (pathlen && base[pathlen-1] == '/')
 178                pathlen--;
 179        info->pathlen = pathlen ? pathlen + 1 : 0;
 180        info->name = base;
 181        info->namelen = pathlen;
 182        if (pathlen)
 183                info->prev = &dummy;
 184}
 185
 186char *make_traverse_path(char *path, size_t pathlen,
 187                         const struct traverse_info *info,
 188                         const char *name, size_t namelen)
 189{
 190        /* Always points to the end of the name we're about to add */
 191        size_t pos = st_add(info->pathlen, namelen);
 192
 193        if (pos >= pathlen)
 194                BUG("too small buffer passed to make_traverse_path");
 195
 196        path[pos] = 0;
 197        for (;;) {
 198                if (pos < namelen)
 199                        BUG("traverse_info pathlen does not match strings");
 200                pos -= namelen;
 201                memcpy(path + pos, name, namelen);
 202
 203                if (!pos)
 204                        break;
 205                path[--pos] = '/';
 206
 207                if (!info)
 208                        BUG("traverse_info ran out of list items");
 209                name = info->name;
 210                namelen = info->namelen;
 211                info = info->prev;
 212        }
 213        return path;
 214}
 215
 216void strbuf_make_traverse_path(struct strbuf *out,
 217                               const struct traverse_info *info,
 218                               const char *name, size_t namelen)
 219{
 220        size_t len = traverse_path_len(info, namelen);
 221
 222        strbuf_grow(out, len);
 223        make_traverse_path(out->buf + out->len, out->alloc - out->len,
 224                           info, name, namelen);
 225        strbuf_setlen(out, out->len + len);
 226}
 227
 228struct tree_desc_skip {
 229        struct tree_desc_skip *prev;
 230        const void *ptr;
 231};
 232
 233struct tree_desc_x {
 234        struct tree_desc d;
 235        struct tree_desc_skip *skip;
 236};
 237
 238static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 239{
 240        /*
 241         * The caller wants to pick *a* from a tree or nothing.
 242         * We are looking at *b* in a tree.
 243         *
 244         * (0) If a and b are the same name, we are trivially happy.
 245         *
 246         * There are three possibilities where *a* could be hiding
 247         * behind *b*.
 248         *
 249         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 250         *                                matter what.
 251         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 252         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 253         *
 254         * Otherwise we know *a* won't appear in the tree without
 255         * scanning further.
 256         */
 257
 258        int cmp = name_compare(a, a_len, b, b_len);
 259
 260        /* Most common case first -- reading sync'd trees */
 261        if (!cmp)
 262                return cmp;
 263
 264        if (0 < cmp) {
 265                /* a comes after b; it does not matter if it is case (3)
 266                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 267                        return 1;
 268                */
 269                return 1; /* keep looking */
 270        }
 271
 272        /* b comes after a; are we looking at case (2)? */
 273        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 274                return 1; /* keep looking */
 275
 276        return -1; /* a cannot appear in the tree */
 277}
 278
 279/*
 280 * From the extended tree_desc, extract the first name entry, while
 281 * paying attention to the candidate "first" name.  Most importantly,
 282 * when looking for an entry, if there are entries that sorts earlier
 283 * in the tree object representation than that name, skip them and
 284 * process the named entry first.  We will remember that we haven't
 285 * processed the first entry yet, and in the later call skip the
 286 * entry we processed early when update_extended_entry() is called.
 287 *
 288 * E.g. if the underlying tree object has these entries:
 289 *
 290 *    blob    "t-1"
 291 *    blob    "t-2"
 292 *    tree    "t"
 293 *    blob    "t=1"
 294 *
 295 * and the "first" asks for "t", remember that we still need to
 296 * process "t-1" and "t-2" but extract "t".  After processing the
 297 * entry "t" from this call, the caller will let us know by calling
 298 * update_extended_entry() that we can remember "t" has been processed
 299 * already.
 300 */
 301
 302static void extended_entry_extract(struct tree_desc_x *t,
 303                                   struct name_entry *a,
 304                                   const char *first,
 305                                   int first_len)
 306{
 307        const char *path;
 308        int len;
 309        struct tree_desc probe;
 310        struct tree_desc_skip *skip;
 311
 312        /*
 313         * Extract the first entry from the tree_desc, but skip the
 314         * ones that we already returned in earlier rounds.
 315         */
 316        while (1) {
 317                if (!t->d.size) {
 318                        entry_clear(a);
 319                        break; /* not found */
 320                }
 321                entry_extract(&t->d, a);
 322                for (skip = t->skip; skip; skip = skip->prev)
 323                        if (a->path == skip->ptr)
 324                                break; /* found */
 325                if (!skip)
 326                        break;
 327                /* We have processed this entry already. */
 328                update_tree_entry(&t->d);
 329        }
 330
 331        if (!first || !a->path)
 332                return;
 333
 334        /*
 335         * The caller wants "first" from this tree, or nothing.
 336         */
 337        path = a->path;
 338        len = tree_entry_len(a);
 339        switch (check_entry_match(first, first_len, path, len)) {
 340        case -1:
 341                entry_clear(a);
 342        case 0:
 343                return;
 344        default:
 345                break;
 346        }
 347
 348        /*
 349         * We need to look-ahead -- we suspect that a subtree whose
 350         * name is "first" may be hiding behind the current entry "path".
 351         */
 352        probe = t->d;
 353        while (probe.size) {
 354                entry_extract(&probe, a);
 355                path = a->path;
 356                len = tree_entry_len(a);
 357                switch (check_entry_match(first, first_len, path, len)) {
 358                case -1:
 359                        entry_clear(a);
 360                case 0:
 361                        return;
 362                default:
 363                        update_tree_entry(&probe);
 364                        break;
 365                }
 366                /* keep looking */
 367        }
 368        entry_clear(a);
 369}
 370
 371static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 372{
 373        if (t->d.entry.path == a->path) {
 374                update_tree_entry(&t->d);
 375        } else {
 376                /* we have returned this entry early */
 377                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 378                skip->ptr = a->path;
 379                skip->prev = t->skip;
 380                t->skip = skip;
 381        }
 382}
 383
 384static void free_extended_entry(struct tree_desc_x *t)
 385{
 386        struct tree_desc_skip *p, *s;
 387
 388        for (s = t->skip; s; s = p) {
 389                p = s->prev;
 390                free(s);
 391        }
 392}
 393
 394static inline int prune_traversal(struct index_state *istate,
 395                                  struct name_entry *e,
 396                                  struct traverse_info *info,
 397                                  struct strbuf *base,
 398                                  int still_interesting)
 399{
 400        if (!info->pathspec || still_interesting == 2)
 401                return 2;
 402        if (still_interesting < 0)
 403                return still_interesting;
 404        return tree_entry_interesting(istate, e, base,
 405                                      0, info->pathspec);
 406}
 407
 408int traverse_trees(struct index_state *istate,
 409                   int n, struct tree_desc *t,
 410                   struct traverse_info *info)
 411{
 412        int error = 0;
 413        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 414        int i;
 415        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 416        struct strbuf base = STRBUF_INIT;
 417        int interesting = 1;
 418        char *traverse_path;
 419
 420        for (i = 0; i < n; i++)
 421                tx[i].d = t[i];
 422
 423        if (info->prev) {
 424                strbuf_make_traverse_path(&base, info->prev,
 425                                          info->name, info->namelen);
 426                strbuf_addch(&base, '/');
 427                traverse_path = xstrndup(base.buf, base.len);
 428        } else {
 429                traverse_path = xstrndup(info->name, info->pathlen);
 430        }
 431        info->traverse_path = traverse_path;
 432        for (;;) {
 433                int trees_used;
 434                unsigned long mask, dirmask;
 435                const char *first = NULL;
 436                int first_len = 0;
 437                struct name_entry *e = NULL;
 438                int len;
 439
 440                for (i = 0; i < n; i++) {
 441                        e = entry + i;
 442                        extended_entry_extract(tx + i, e, NULL, 0);
 443                }
 444
 445                /*
 446                 * A tree may have "t-2" at the current location even
 447                 * though it may have "t" that is a subtree behind it,
 448                 * and another tree may return "t".  We want to grab
 449                 * all "t" from all trees to match in such a case.
 450                 */
 451                for (i = 0; i < n; i++) {
 452                        e = entry + i;
 453                        if (!e->path)
 454                                continue;
 455                        len = tree_entry_len(e);
 456                        if (!first) {
 457                                first = e->path;
 458                                first_len = len;
 459                                continue;
 460                        }
 461                        if (name_compare(e->path, len, first, first_len) < 0) {
 462                                first = e->path;
 463                                first_len = len;
 464                        }
 465                }
 466
 467                if (first) {
 468                        for (i = 0; i < n; i++) {
 469                                e = entry + i;
 470                                extended_entry_extract(tx + i, e, first, first_len);
 471                                /* Cull the ones that are not the earliest */
 472                                if (!e->path)
 473                                        continue;
 474                                len = tree_entry_len(e);
 475                                if (name_compare(e->path, len, first, first_len))
 476                                        entry_clear(e);
 477                        }
 478                }
 479
 480                /* Now we have in entry[i] the earliest name from the trees */
 481                mask = 0;
 482                dirmask = 0;
 483                for (i = 0; i < n; i++) {
 484                        if (!entry[i].path)
 485                                continue;
 486                        mask |= 1ul << i;
 487                        if (S_ISDIR(entry[i].mode))
 488                                dirmask |= 1ul << i;
 489                        e = &entry[i];
 490                }
 491                if (!mask)
 492                        break;
 493                interesting = prune_traversal(istate, e, info, &base, interesting);
 494                if (interesting < 0)
 495                        break;
 496                if (interesting) {
 497                        trees_used = info->fn(n, mask, dirmask, entry, info);
 498                        if (trees_used < 0) {
 499                                error = trees_used;
 500                                if (!info->show_all_errors)
 501                                        break;
 502                        }
 503                        mask &= trees_used;
 504                }
 505                for (i = 0; i < n; i++)
 506                        if (mask & (1ul << i))
 507                                update_extended_entry(tx + i, entry + i);
 508        }
 509        free(entry);
 510        for (i = 0; i < n; i++)
 511                free_extended_entry(tx + i);
 512        free(tx);
 513        free(traverse_path);
 514        info->traverse_path = NULL;
 515        strbuf_release(&base);
 516        return error;
 517}
 518
 519struct dir_state {
 520        void *tree;
 521        unsigned long size;
 522        struct object_id oid;
 523};
 524
 525static int find_tree_entry(struct repository *r, struct tree_desc *t,
 526                           const char *name, struct object_id *result,
 527                           unsigned short *mode)
 528{
 529        int namelen = strlen(name);
 530        while (t->size) {
 531                const char *entry;
 532                struct object_id oid;
 533                int entrylen, cmp;
 534
 535                oidcpy(&oid, tree_entry_extract(t, &entry, mode));
 536                entrylen = tree_entry_len(&t->entry);
 537                update_tree_entry(t);
 538                if (entrylen > namelen)
 539                        continue;
 540                cmp = memcmp(name, entry, entrylen);
 541                if (cmp > 0)
 542                        continue;
 543                if (cmp < 0)
 544                        break;
 545                if (entrylen == namelen) {
 546                        oidcpy(result, &oid);
 547                        return 0;
 548                }
 549                if (name[entrylen] != '/')
 550                        continue;
 551                if (!S_ISDIR(*mode))
 552                        break;
 553                if (++entrylen == namelen) {
 554                        oidcpy(result, &oid);
 555                        return 0;
 556                }
 557                return get_tree_entry(r, &oid, name + entrylen, result, mode);
 558        }
 559        return -1;
 560}
 561
 562int get_tree_entry(struct repository *r,
 563                   const struct object_id *tree_oid,
 564                   const char *name,
 565                   struct object_id *oid,
 566                   unsigned short *mode)
 567{
 568        int retval;
 569        void *tree;
 570        unsigned long size;
 571        struct object_id root;
 572
 573        tree = read_object_with_reference(r, tree_oid, tree_type, &size, &root);
 574        if (!tree)
 575                return -1;
 576
 577        if (name[0] == '\0') {
 578                oidcpy(oid, &root);
 579                free(tree);
 580                return 0;
 581        }
 582
 583        if (!size) {
 584                retval = -1;
 585        } else {
 586                struct tree_desc t;
 587                init_tree_desc(&t, tree, size);
 588                retval = find_tree_entry(r, &t, name, oid, mode);
 589        }
 590        free(tree);
 591        return retval;
 592}
 593
 594/*
 595 * This is Linux's built-in max for the number of symlinks to follow.
 596 * That limit, of course, does not affect git, but it's a reasonable
 597 * choice.
 598 */
 599#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
 600
 601/**
 602 * Find a tree entry by following symlinks in tree_sha (which is
 603 * assumed to be the root of the repository).  In the event that a
 604 * symlink points outside the repository (e.g. a link to /foo or a
 605 * root-level link to ../foo), the portion of the link which is
 606 * outside the repository will be returned in result_path, and *mode
 607 * will be set to 0.  It is assumed that result_path is uninitialized.
 608 * If there are no symlinks, or the end result of the symlink chain
 609 * points to an object inside the repository, result will be filled in
 610 * with the sha1 of the found object, and *mode will hold the mode of
 611 * the object.
 612 *
 613 * See the code for enum get_oid_result for a description of
 614 * the return values.
 615 */
 616enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 617                struct object_id *tree_oid, const char *name,
 618                struct object_id *result, struct strbuf *result_path,
 619                unsigned short *mode)
 620{
 621        int retval = MISSING_OBJECT;
 622        struct dir_state *parents = NULL;
 623        size_t parents_alloc = 0;
 624        size_t i, parents_nr = 0;
 625        struct object_id current_tree_oid;
 626        struct strbuf namebuf = STRBUF_INIT;
 627        struct tree_desc t;
 628        int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
 629
 630        init_tree_desc(&t, NULL, 0UL);
 631        strbuf_addstr(&namebuf, name);
 632        oidcpy(&current_tree_oid, tree_oid);
 633
 634        while (1) {
 635                int find_result;
 636                char *first_slash;
 637                char *remainder = NULL;
 638
 639                if (!t.buffer) {
 640                        void *tree;
 641                        struct object_id root;
 642                        unsigned long size;
 643                        tree = read_object_with_reference(r,
 644                                                          &current_tree_oid,
 645                                                          tree_type, &size,
 646                                                          &root);
 647                        if (!tree)
 648                                goto done;
 649
 650                        ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
 651                        parents[parents_nr].tree = tree;
 652                        parents[parents_nr].size = size;
 653                        oidcpy(&parents[parents_nr].oid, &root);
 654                        parents_nr++;
 655
 656                        if (namebuf.buf[0] == '\0') {
 657                                oidcpy(result, &root);
 658                                retval = FOUND;
 659                                goto done;
 660                        }
 661
 662                        if (!size)
 663                                goto done;
 664
 665                        /* descend */
 666                        init_tree_desc(&t, tree, size);
 667                }
 668
 669                /* Handle symlinks to e.g. a//b by removing leading slashes */
 670                while (namebuf.buf[0] == '/') {
 671                        strbuf_remove(&namebuf, 0, 1);
 672                }
 673
 674                /* Split namebuf into a first component and a remainder */
 675                if ((first_slash = strchr(namebuf.buf, '/'))) {
 676                        *first_slash = 0;
 677                        remainder = first_slash + 1;
 678                }
 679
 680                if (!strcmp(namebuf.buf, "..")) {
 681                        struct dir_state *parent;
 682                        /*
 683                         * We could end up with .. in the namebuf if it
 684                         * appears in a symlink.
 685                         */
 686
 687                        if (parents_nr == 1) {
 688                                if (remainder)
 689                                        *first_slash = '/';
 690                                strbuf_add(result_path, namebuf.buf,
 691                                           namebuf.len);
 692                                *mode = 0;
 693                                retval = FOUND;
 694                                goto done;
 695                        }
 696                        parent = &parents[parents_nr - 1];
 697                        free(parent->tree);
 698                        parents_nr--;
 699                        parent = &parents[parents_nr - 1];
 700                        init_tree_desc(&t, parent->tree, parent->size);
 701                        strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
 702                        continue;
 703                }
 704
 705                /* We could end up here via a symlink to dir/.. */
 706                if (namebuf.buf[0] == '\0') {
 707                        oidcpy(result, &parents[parents_nr - 1].oid);
 708                        retval = FOUND;
 709                        goto done;
 710                }
 711
 712                /* Look up the first (or only) path component in the tree. */
 713                find_result = find_tree_entry(r, &t, namebuf.buf,
 714                                              &current_tree_oid, mode);
 715                if (find_result) {
 716                        goto done;
 717                }
 718
 719                if (S_ISDIR(*mode)) {
 720                        if (!remainder) {
 721                                oidcpy(result, &current_tree_oid);
 722                                retval = FOUND;
 723                                goto done;
 724                        }
 725                        /* Descend the tree */
 726                        t.buffer = NULL;
 727                        strbuf_remove(&namebuf, 0,
 728                                      1 + first_slash - namebuf.buf);
 729                } else if (S_ISREG(*mode)) {
 730                        if (!remainder) {
 731                                oidcpy(result, &current_tree_oid);
 732                                retval = FOUND;
 733                        } else {
 734                                retval = NOT_DIR;
 735                        }
 736                        goto done;
 737                } else if (S_ISLNK(*mode)) {
 738                        /* Follow a symlink */
 739                        unsigned long link_len;
 740                        size_t len;
 741                        char *contents, *contents_start;
 742                        struct dir_state *parent;
 743                        enum object_type type;
 744
 745                        if (follows_remaining-- == 0) {
 746                                /* Too many symlinks followed */
 747                                retval = SYMLINK_LOOP;
 748                                goto done;
 749                        }
 750
 751                        /*
 752                         * At this point, we have followed at a least
 753                         * one symlink, so on error we need to report this.
 754                         */
 755                        retval = DANGLING_SYMLINK;
 756
 757                        contents = repo_read_object_file(r,
 758                                                    &current_tree_oid, &type,
 759                                                    &link_len);
 760
 761                        if (!contents)
 762                                goto done;
 763
 764                        if (contents[0] == '/') {
 765                                strbuf_addstr(result_path, contents);
 766                                free(contents);
 767                                *mode = 0;
 768                                retval = FOUND;
 769                                goto done;
 770                        }
 771
 772                        if (remainder)
 773                                len = first_slash - namebuf.buf;
 774                        else
 775                                len = namebuf.len;
 776
 777                        contents_start = contents;
 778
 779                        parent = &parents[parents_nr - 1];
 780                        init_tree_desc(&t, parent->tree, parent->size);
 781                        strbuf_splice(&namebuf, 0, len,
 782                                      contents_start, link_len);
 783                        if (remainder)
 784                                namebuf.buf[link_len] = '/';
 785                        free(contents);
 786                }
 787        }
 788done:
 789        for (i = 0; i < parents_nr; i++)
 790                free(parents[i].tree);
 791        free(parents);
 792
 793        strbuf_release(&namebuf);
 794        return retval;
 795}
 796
 797static int match_entry(const struct pathspec_item *item,
 798                       const struct name_entry *entry, int pathlen,
 799                       const char *match, int matchlen,
 800                       enum interesting *never_interesting)
 801{
 802        int m = -1; /* signals that we haven't called strncmp() */
 803
 804        if (item->magic & PATHSPEC_ICASE)
 805                /*
 806                 * "Never interesting" trick requires exact
 807                 * matching. We could do something clever with inexact
 808                 * matching, but it's trickier (and not to forget that
 809                 * strcasecmp is locale-dependent, at least in
 810                 * glibc). Just disable it for now. It can't be worse
 811                 * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
 812                 * pattern.
 813                 */
 814                *never_interesting = entry_not_interesting;
 815        else if (*never_interesting != entry_not_interesting) {
 816                /*
 817                 * We have not seen any match that sorts later
 818                 * than the current path.
 819                 */
 820
 821                /*
 822                 * Does match sort strictly earlier than path
 823                 * with their common parts?
 824                 */
 825                m = strncmp(match, entry->path,
 826                            (matchlen < pathlen) ? matchlen : pathlen);
 827                if (m < 0)
 828                        return 0;
 829
 830                /*
 831                 * If we come here even once, that means there is at
 832                 * least one pathspec that would sort equal to or
 833                 * later than the path we are currently looking at.
 834                 * In other words, if we have never reached this point
 835                 * after iterating all pathspecs, it means all
 836                 * pathspecs are either outside of base, or inside the
 837                 * base but sorts strictly earlier than the current
 838                 * one.  In either case, they will never match the
 839                 * subsequent entries.  In such a case, we initialized
 840                 * the variable to -1 and that is what will be
 841                 * returned, allowing the caller to terminate early.
 842                 */
 843                *never_interesting = entry_not_interesting;
 844        }
 845
 846        if (pathlen > matchlen)
 847                return 0;
 848
 849        if (matchlen > pathlen) {
 850                if (match[pathlen] != '/')
 851                        return 0;
 852                if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
 853                        return 0;
 854        }
 855
 856        if (m == -1)
 857                /*
 858                 * we cheated and did not do strncmp(), so we do
 859                 * that here.
 860                 */
 861                m = ps_strncmp(item, match, entry->path, pathlen);
 862
 863        /*
 864         * If common part matched earlier then it is a hit,
 865         * because we rejected the case where path is not a
 866         * leading directory and is shorter than match.
 867         */
 868        if (!m)
 869                /*
 870                 * match_entry does not check if the prefix part is
 871                 * matched case-sensitively. If the entry is a
 872                 * directory and part of prefix, it'll be rematched
 873                 * eventually by basecmp with special treatment for
 874                 * the prefix.
 875                 */
 876                return 1;
 877
 878        return 0;
 879}
 880
 881/* :(icase)-aware string compare */
 882static int basecmp(const struct pathspec_item *item,
 883                   const char *base, const char *match, int len)
 884{
 885        if (item->magic & PATHSPEC_ICASE) {
 886                int ret, n = len > item->prefix ? item->prefix : len;
 887                ret = strncmp(base, match, n);
 888                if (ret)
 889                        return ret;
 890                base += n;
 891                match += n;
 892                len -= n;
 893        }
 894        return ps_strncmp(item, base, match, len);
 895}
 896
 897static int match_dir_prefix(const struct pathspec_item *item,
 898                            const char *base,
 899                            const char *match, int matchlen)
 900{
 901        if (basecmp(item, base, match, matchlen))
 902                return 0;
 903
 904        /*
 905         * If the base is a subdirectory of a path which
 906         * was specified, all of them are interesting.
 907         */
 908        if (!matchlen ||
 909            base[matchlen] == '/' ||
 910            match[matchlen - 1] == '/')
 911                return 1;
 912
 913        /* Just a random prefix match */
 914        return 0;
 915}
 916
 917/*
 918 * Perform matching on the leading non-wildcard part of
 919 * pathspec. item->nowildcard_len must be greater than zero. Return
 920 * non-zero if base is matched.
 921 */
 922static int match_wildcard_base(const struct pathspec_item *item,
 923                               const char *base, int baselen,
 924                               int *matched)
 925{
 926        const char *match = item->match;
 927        /* the wildcard part is not considered in this function */
 928        int matchlen = item->nowildcard_len;
 929
 930        if (baselen) {
 931                int dirlen;
 932                /*
 933                 * Return early if base is longer than the
 934                 * non-wildcard part but it does not match.
 935                 */
 936                if (baselen >= matchlen) {
 937                        *matched = matchlen;
 938                        return !basecmp(item, base, match, matchlen);
 939                }
 940
 941                dirlen = matchlen;
 942                while (dirlen && match[dirlen - 1] != '/')
 943                        dirlen--;
 944
 945                /*
 946                 * Return early if base is shorter than the
 947                 * non-wildcard part but it does not match. Note that
 948                 * base ends with '/' so we are sure it really matches
 949                 * directory
 950                 */
 951                if (basecmp(item, base, match, baselen))
 952                        return 0;
 953                *matched = baselen;
 954        } else
 955                *matched = 0;
 956        /*
 957         * we could have checked entry against the non-wildcard part
 958         * that is not in base and does similar never_interesting
 959         * optimization as in match_entry. For now just be happy with
 960         * base comparison.
 961         */
 962        return entry_interesting;
 963}
 964
 965/*
 966 * Is a tree entry interesting given the pathspec we have?
 967 *
 968 * Pre-condition: either baselen == base_offset (i.e. empty path)
 969 * or base[baselen-1] == '/' (i.e. with trailing slash).
 970 */
 971static enum interesting do_match(struct index_state *istate,
 972                                 const struct name_entry *entry,
 973                                 struct strbuf *base, int base_offset,
 974                                 const struct pathspec *ps,
 975                                 int exclude)
 976{
 977        int i;
 978        int pathlen, baselen = base->len - base_offset;
 979        enum interesting never_interesting = ps->has_wildcard ?
 980                entry_not_interesting : all_entries_not_interesting;
 981
 982        GUARD_PATHSPEC(ps,
 983                       PATHSPEC_FROMTOP |
 984                       PATHSPEC_MAXDEPTH |
 985                       PATHSPEC_LITERAL |
 986                       PATHSPEC_GLOB |
 987                       PATHSPEC_ICASE |
 988                       PATHSPEC_EXCLUDE |
 989                       PATHSPEC_ATTR);
 990
 991        if (!ps->nr) {
 992                if (!ps->recursive ||
 993                    !(ps->magic & PATHSPEC_MAXDEPTH) ||
 994                    ps->max_depth == -1)
 995                        return all_entries_interesting;
 996                return within_depth(base->buf + base_offset, baselen,
 997                                    !!S_ISDIR(entry->mode),
 998                                    ps->max_depth) ?
 999                        entry_interesting : entry_not_interesting;
1000        }
1001
1002        pathlen = tree_entry_len(entry);
1003
1004        for (i = ps->nr - 1; i >= 0; i--) {
1005                const struct pathspec_item *item = ps->items+i;
1006                const char *match = item->match;
1007                const char *base_str = base->buf + base_offset;
1008                int matchlen = item->len, matched = 0;
1009
1010                if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
1011                    ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
1012                        continue;
1013
1014                if (baselen >= matchlen) {
1015                        /* If it doesn't match, move along... */
1016                        if (!match_dir_prefix(item, base_str, match, matchlen))
1017                                goto match_wildcards;
1018
1019                        if (!ps->recursive ||
1020                            !(ps->magic & PATHSPEC_MAXDEPTH) ||
1021                            ps->max_depth == -1) {
1022                                if (!item->attr_match_nr)
1023                                        return all_entries_interesting;
1024                                else
1025                                        goto interesting;
1026                        }
1027
1028                        if (within_depth(base_str + matchlen + 1,
1029                                         baselen - matchlen - 1,
1030                                         !!S_ISDIR(entry->mode),
1031                                         ps->max_depth))
1032                                goto interesting;
1033                        else
1034                                return entry_not_interesting;
1035                }
1036
1037                /* Either there must be no base, or the base must match. */
1038                if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
1039                        if (match_entry(item, entry, pathlen,
1040                                        match + baselen, matchlen - baselen,
1041                                        &never_interesting))
1042                                goto interesting;
1043
1044                        if (item->nowildcard_len < item->len) {
1045                                if (!git_fnmatch(item, match + baselen, entry->path,
1046                                                 item->nowildcard_len - baselen))
1047                                        goto interesting;
1048
1049                                /*
1050                                 * Match all directories. We'll try to
1051                                 * match files later on.
1052                                 */
1053                                if (ps->recursive && S_ISDIR(entry->mode))
1054                                        return entry_interesting;
1055
1056                                /*
1057                                 * When matching against submodules with
1058                                 * wildcard characters, ensure that the entry
1059                                 * at least matches up to the first wild
1060                                 * character.  More accurate matching can then
1061                                 * be performed in the submodule itself.
1062                                 */
1063                                if (ps->recurse_submodules &&
1064                                    S_ISGITLINK(entry->mode) &&
1065                                    !ps_strncmp(item, match + baselen,
1066                                                entry->path,
1067                                                item->nowildcard_len - baselen))
1068                                        goto interesting;
1069                        }
1070
1071                        continue;
1072                }
1073
1074match_wildcards:
1075                if (item->nowildcard_len == item->len)
1076                        continue;
1077
1078                if (item->nowildcard_len &&
1079                    !match_wildcard_base(item, base_str, baselen, &matched))
1080                        continue;
1081
1082                /*
1083                 * Concatenate base and entry->path into one and do
1084                 * fnmatch() on it.
1085                 *
1086                 * While we could avoid concatenation in certain cases
1087                 * [1], which saves a memcpy and potentially a
1088                 * realloc, it turns out not worth it. Measurement on
1089                 * linux-2.6 does not show any clear improvements,
1090                 * partly because of the nowildcard_len optimization
1091                 * in git_fnmatch(). Avoid micro-optimizations here.
1092                 *
1093                 * [1] if match_wildcard_base() says the base
1094                 * directory is already matched, we only need to match
1095                 * the rest, which is shorter so _in theory_ faster.
1096                 */
1097
1098                strbuf_add(base, entry->path, pathlen);
1099
1100                if (!git_fnmatch(item, match, base->buf + base_offset,
1101                                 item->nowildcard_len)) {
1102                        strbuf_setlen(base, base_offset + baselen);
1103                        goto interesting;
1104                }
1105
1106                /*
1107                 * When matching against submodules with
1108                 * wildcard characters, ensure that the entry
1109                 * at least matches up to the first wild
1110                 * character.  More accurate matching can then
1111                 * be performed in the submodule itself.
1112                 */
1113                if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
1114                    !ps_strncmp(item, match, base->buf + base_offset,
1115                                item->nowildcard_len)) {
1116                        strbuf_setlen(base, base_offset + baselen);
1117                        goto interesting;
1118                }
1119
1120                strbuf_setlen(base, base_offset + baselen);
1121
1122                /*
1123                 * Match all directories. We'll try to match files
1124                 * later on.
1125                 * max_depth is ignored but we may consider support it
1126                 * in future, see
1127                 * https://public-inbox.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1128                 */
1129                if (ps->recursive && S_ISDIR(entry->mode))
1130                        return entry_interesting;
1131                continue;
1132interesting:
1133                if (item->attr_match_nr) {
1134                        int ret;
1135
1136                        /*
1137                         * Must not return all_entries_not_interesting
1138                         * prematurely. We do not know if all entries do not
1139                         * match some attributes with current attr API.
1140                         */
1141                        never_interesting = entry_not_interesting;
1142
1143                        /*
1144                         * Consider all directories interesting (because some
1145                         * of those files inside may match some attributes
1146                         * even though the parent dir does not)
1147                         *
1148                         * FIXME: attributes _can_ match directories and we
1149                         * can probably return all_entries_interesting or
1150                         * all_entries_not_interesting here if matched.
1151                         */
1152                        if (S_ISDIR(entry->mode))
1153                                return entry_interesting;
1154
1155                        strbuf_add(base, entry->path, pathlen);
1156                        ret = match_pathspec_attrs(istate, base->buf + base_offset,
1157                                                   base->len - base_offset, item);
1158                        strbuf_setlen(base, base_offset + baselen);
1159                        if (!ret)
1160                                continue;
1161                }
1162                return entry_interesting;
1163        }
1164        return never_interesting; /* No matches */
1165}
1166
1167/*
1168 * Is a tree entry interesting given the pathspec we have?
1169 *
1170 * Pre-condition: either baselen == base_offset (i.e. empty path)
1171 * or base[baselen-1] == '/' (i.e. with trailing slash).
1172 */
1173enum interesting tree_entry_interesting(struct index_state *istate,
1174                                        const struct name_entry *entry,
1175                                        struct strbuf *base, int base_offset,
1176                                        const struct pathspec *ps)
1177{
1178        enum interesting positive, negative;
1179        positive = do_match(istate, entry, base, base_offset, ps, 0);
1180
1181        /*
1182         * case | entry | positive | negative | result
1183         * -----+-------+----------+----------+-------
1184         *   1  |  file |   -1     |  -1..2   |  -1
1185         *   2  |  file |    0     |  -1..2   |   0
1186         *   3  |  file |    1     |   -1     |   1
1187         *   4  |  file |    1     |    0     |   1
1188         *   5  |  file |    1     |    1     |   0
1189         *   6  |  file |    1     |    2     |   0
1190         *   7  |  file |    2     |   -1     |   2
1191         *   8  |  file |    2     |    0     |   1
1192         *   9  |  file |    2     |    1     |   0
1193         *  10  |  file |    2     |    2     |  -1
1194         * -----+-------+----------+----------+-------
1195         *  11  |  dir  |   -1     |  -1..2   |  -1
1196         *  12  |  dir  |    0     |  -1..2   |   0
1197         *  13  |  dir  |    1     |   -1     |   1
1198         *  14  |  dir  |    1     |    0     |   1
1199         *  15  |  dir  |    1     |    1     |   1 (*)
1200         *  16  |  dir  |    1     |    2     |   0
1201         *  17  |  dir  |    2     |   -1     |   2
1202         *  18  |  dir  |    2     |    0     |   1
1203         *  19  |  dir  |    2     |    1     |   1 (*)
1204         *  20  |  dir  |    2     |    2     |  -1
1205         *
1206         * (*) An exclude pattern interested in a directory does not
1207         * necessarily mean it will exclude all of the directory. In
1208         * wildcard case, it can't decide until looking at individual
1209         * files inside. So don't write such directories off yet.
1210         */
1211
1212        if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1213            positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1214                return positive;
1215
1216        negative = do_match(istate, entry, base, base_offset, ps, 1);
1217
1218        /* #8, #18 */
1219        if (positive == all_entries_interesting &&
1220            negative == entry_not_interesting)
1221                return entry_interesting;
1222
1223        /* #3, #4, #7, #13, #14, #17 */
1224        if (negative <= entry_not_interesting)
1225                return positive;
1226
1227        /* #15, #19 */
1228        if (S_ISDIR(entry->mode) &&
1229            positive >= entry_interesting &&
1230            negative == entry_interesting)
1231                return entry_interesting;
1232
1233        if ((positive == entry_interesting &&
1234             negative >= entry_interesting) || /* #5, #6, #16 */
1235            (positive == all_entries_interesting &&
1236             negative == entry_interesting)) /* #9 */
1237                return entry_not_interesting;
1238
1239        return all_entries_not_interesting; /* #10, #20 */
1240}