tree-walk.con commit log_tree_diff: die when we fail to parse a commit (7059dcc)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6
   7static const char *get_mode(const char *str, unsigned int *modep)
   8{
   9        unsigned char c;
  10        unsigned int mode = 0;
  11
  12        if (*str == ' ')
  13                return NULL;
  14
  15        while ((c = *str++) != ' ') {
  16                if (c < '0' || c > '7')
  17                        return NULL;
  18                mode = (mode << 3) + (c - '0');
  19        }
  20        *modep = mode;
  21        return str;
  22}
  23
  24static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  25{
  26        const char *path;
  27        unsigned int mode, len;
  28
  29        if (size < 24 || buf[size - 21])
  30                die("corrupt tree file");
  31
  32        path = get_mode(buf, &mode);
  33        if (!path || !*path)
  34                die("corrupt tree file");
  35        len = strlen(path) + 1;
  36
  37        /* Initialize the descriptor entry */
  38        desc->entry.path = path;
  39        desc->entry.mode = mode;
  40        desc->entry.sha1 = (const unsigned char *)(path + len);
  41}
  42
  43void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  44{
  45        desc->buffer = buffer;
  46        desc->size = size;
  47        if (size)
  48                decode_tree_entry(desc, buffer, size);
  49}
  50
  51void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  52{
  53        unsigned long size = 0;
  54        void *buf = NULL;
  55
  56        if (sha1) {
  57                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  58                if (!buf)
  59                        die("unable to read tree %s", sha1_to_hex(sha1));
  60        }
  61        init_tree_desc(desc, buf, size);
  62        return buf;
  63}
  64
  65static void entry_clear(struct name_entry *a)
  66{
  67        memset(a, 0, sizeof(*a));
  68}
  69
  70static void entry_extract(struct tree_desc *t, struct name_entry *a)
  71{
  72        *a = t->entry;
  73}
  74
  75void update_tree_entry(struct tree_desc *desc)
  76{
  77        const void *buf = desc->buffer;
  78        const unsigned char *end = desc->entry.sha1 + 20;
  79        unsigned long size = desc->size;
  80        unsigned long len = end - (const unsigned char *)buf;
  81
  82        if (size < len)
  83                die("corrupt tree file");
  84        buf = end;
  85        size -= len;
  86        desc->buffer = buf;
  87        desc->size = size;
  88        if (size)
  89                decode_tree_entry(desc, buf, size);
  90}
  91
  92int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  93{
  94        if (!desc->size)
  95                return 0;
  96
  97        *entry = desc->entry;
  98        update_tree_entry(desc);
  99        return 1;
 100}
 101
 102void setup_traverse_info(struct traverse_info *info, const char *base)
 103{
 104        int pathlen = strlen(base);
 105        static struct traverse_info dummy;
 106
 107        memset(info, 0, sizeof(*info));
 108        if (pathlen && base[pathlen-1] == '/')
 109                pathlen--;
 110        info->pathlen = pathlen ? pathlen + 1 : 0;
 111        info->name.path = base;
 112        info->name.sha1 = (void *)(base + pathlen + 1);
 113        if (pathlen)
 114                info->prev = &dummy;
 115}
 116
 117char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 118{
 119        int len = tree_entry_len(n);
 120        int pathlen = info->pathlen;
 121
 122        path[pathlen + len] = 0;
 123        for (;;) {
 124                memcpy(path + pathlen, n->path, len);
 125                if (!pathlen)
 126                        break;
 127                path[--pathlen] = '/';
 128                n = &info->name;
 129                len = tree_entry_len(n);
 130                info = info->prev;
 131                pathlen -= len;
 132        }
 133        return path;
 134}
 135
 136struct tree_desc_skip {
 137        struct tree_desc_skip *prev;
 138        const void *ptr;
 139};
 140
 141struct tree_desc_x {
 142        struct tree_desc d;
 143        struct tree_desc_skip *skip;
 144};
 145
 146static int name_compare(const char *a, int a_len,
 147                        const char *b, int b_len)
 148{
 149        int len = (a_len < b_len) ? a_len : b_len;
 150        int cmp = memcmp(a, b, len);
 151        if (cmp)
 152                return cmp;
 153        return (a_len - b_len);
 154}
 155
 156static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 157{
 158        /*
 159         * The caller wants to pick *a* from a tree or nothing.
 160         * We are looking at *b* in a tree.
 161         *
 162         * (0) If a and b are the same name, we are trivially happy.
 163         *
 164         * There are three possibilities where *a* could be hiding
 165         * behind *b*.
 166         *
 167         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 168         *                                matter what.
 169         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 170         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 171         *
 172         * Otherwise we know *a* won't appear in the tree without
 173         * scanning further.
 174         */
 175
 176        int cmp = name_compare(a, a_len, b, b_len);
 177
 178        /* Most common case first -- reading sync'd trees */
 179        if (!cmp)
 180                return cmp;
 181
 182        if (0 < cmp) {
 183                /* a comes after b; it does not matter if it is case (3)
 184                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 185                        return 1;
 186                */
 187                return 1; /* keep looking */
 188        }
 189
 190        /* b comes after a; are we looking at case (2)? */
 191        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 192                return 1; /* keep looking */
 193
 194        return -1; /* a cannot appear in the tree */
 195}
 196
 197/*
 198 * From the extended tree_desc, extract the first name entry, while
 199 * paying attention to the candidate "first" name.  Most importantly,
 200 * when looking for an entry, if there are entries that sorts earlier
 201 * in the tree object representation than that name, skip them and
 202 * process the named entry first.  We will remember that we haven't
 203 * processed the first entry yet, and in the later call skip the
 204 * entry we processed early when update_extended_entry() is called.
 205 *
 206 * E.g. if the underlying tree object has these entries:
 207 *
 208 *    blob    "t-1"
 209 *    blob    "t-2"
 210 *    tree    "t"
 211 *    blob    "t=1"
 212 *
 213 * and the "first" asks for "t", remember that we still need to
 214 * process "t-1" and "t-2" but extract "t".  After processing the
 215 * entry "t" from this call, the caller will let us know by calling
 216 * update_extended_entry() that we can remember "t" has been processed
 217 * already.
 218 */
 219
 220static void extended_entry_extract(struct tree_desc_x *t,
 221                                   struct name_entry *a,
 222                                   const char *first,
 223                                   int first_len)
 224{
 225        const char *path;
 226        int len;
 227        struct tree_desc probe;
 228        struct tree_desc_skip *skip;
 229
 230        /*
 231         * Extract the first entry from the tree_desc, but skip the
 232         * ones that we already returned in earlier rounds.
 233         */
 234        while (1) {
 235                if (!t->d.size) {
 236                        entry_clear(a);
 237                        break; /* not found */
 238                }
 239                entry_extract(&t->d, a);
 240                for (skip = t->skip; skip; skip = skip->prev)
 241                        if (a->path == skip->ptr)
 242                                break; /* found */
 243                if (!skip)
 244                        break;
 245                /* We have processed this entry already. */
 246                update_tree_entry(&t->d);
 247        }
 248
 249        if (!first || !a->path)
 250                return;
 251
 252        /*
 253         * The caller wants "first" from this tree, or nothing.
 254         */
 255        path = a->path;
 256        len = tree_entry_len(a);
 257        switch (check_entry_match(first, first_len, path, len)) {
 258        case -1:
 259                entry_clear(a);
 260        case 0:
 261                return;
 262        default:
 263                break;
 264        }
 265
 266        /*
 267         * We need to look-ahead -- we suspect that a subtree whose
 268         * name is "first" may be hiding behind the current entry "path".
 269         */
 270        probe = t->d;
 271        while (probe.size) {
 272                entry_extract(&probe, a);
 273                path = a->path;
 274                len = tree_entry_len(a);
 275                switch (check_entry_match(first, first_len, path, len)) {
 276                case -1:
 277                        entry_clear(a);
 278                case 0:
 279                        return;
 280                default:
 281                        update_tree_entry(&probe);
 282                        break;
 283                }
 284                /* keep looking */
 285        }
 286        entry_clear(a);
 287}
 288
 289static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 290{
 291        if (t->d.entry.path == a->path) {
 292                update_tree_entry(&t->d);
 293        } else {
 294                /* we have returned this entry early */
 295                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 296                skip->ptr = a->path;
 297                skip->prev = t->skip;
 298                t->skip = skip;
 299        }
 300}
 301
 302static void free_extended_entry(struct tree_desc_x *t)
 303{
 304        struct tree_desc_skip *p, *s;
 305
 306        for (s = t->skip; s; s = p) {
 307                p = s->prev;
 308                free(s);
 309        }
 310}
 311
 312static inline int prune_traversal(struct name_entry *e,
 313                                  struct traverse_info *info,
 314                                  struct strbuf *base,
 315                                  int still_interesting)
 316{
 317        if (!info->pathspec || still_interesting == 2)
 318                return 2;
 319        if (still_interesting < 0)
 320                return still_interesting;
 321        return tree_entry_interesting(e, base, 0, info->pathspec);
 322}
 323
 324int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 325{
 326        int error = 0;
 327        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 328        int i;
 329        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 330        struct strbuf base = STRBUF_INIT;
 331        int interesting = 1;
 332
 333        for (i = 0; i < n; i++)
 334                tx[i].d = t[i];
 335
 336        if (info->prev) {
 337                strbuf_grow(&base, info->pathlen);
 338                make_traverse_path(base.buf, info->prev, &info->name);
 339                base.buf[info->pathlen-1] = '/';
 340                strbuf_setlen(&base, info->pathlen);
 341        }
 342        for (;;) {
 343                int trees_used;
 344                unsigned long mask, dirmask;
 345                const char *first = NULL;
 346                int first_len = 0;
 347                struct name_entry *e = NULL;
 348                int len;
 349
 350                for (i = 0; i < n; i++) {
 351                        e = entry + i;
 352                        extended_entry_extract(tx + i, e, NULL, 0);
 353                }
 354
 355                /*
 356                 * A tree may have "t-2" at the current location even
 357                 * though it may have "t" that is a subtree behind it,
 358                 * and another tree may return "t".  We want to grab
 359                 * all "t" from all trees to match in such a case.
 360                 */
 361                for (i = 0; i < n; i++) {
 362                        e = entry + i;
 363                        if (!e->path)
 364                                continue;
 365                        len = tree_entry_len(e);
 366                        if (!first) {
 367                                first = e->path;
 368                                first_len = len;
 369                                continue;
 370                        }
 371                        if (name_compare(e->path, len, first, first_len) < 0) {
 372                                first = e->path;
 373                                first_len = len;
 374                        }
 375                }
 376
 377                if (first) {
 378                        for (i = 0; i < n; i++) {
 379                                e = entry + i;
 380                                extended_entry_extract(tx + i, e, first, first_len);
 381                                /* Cull the ones that are not the earliest */
 382                                if (!e->path)
 383                                        continue;
 384                                len = tree_entry_len(e);
 385                                if (name_compare(e->path, len, first, first_len))
 386                                        entry_clear(e);
 387                        }
 388                }
 389
 390                /* Now we have in entry[i] the earliest name from the trees */
 391                mask = 0;
 392                dirmask = 0;
 393                for (i = 0; i < n; i++) {
 394                        if (!entry[i].path)
 395                                continue;
 396                        mask |= 1ul << i;
 397                        if (S_ISDIR(entry[i].mode))
 398                                dirmask |= 1ul << i;
 399                        e = &entry[i];
 400                }
 401                if (!mask)
 402                        break;
 403                interesting = prune_traversal(e, info, &base, interesting);
 404                if (interesting < 0)
 405                        break;
 406                if (interesting) {
 407                        trees_used = info->fn(n, mask, dirmask, entry, info);
 408                        if (trees_used < 0) {
 409                                error = trees_used;
 410                                if (!info->show_all_errors)
 411                                        break;
 412                        }
 413                        mask &= trees_used;
 414                }
 415                for (i = 0; i < n; i++)
 416                        if (mask & (1ul << i))
 417                                update_extended_entry(tx + i, entry + i);
 418        }
 419        free(entry);
 420        for (i = 0; i < n; i++)
 421                free_extended_entry(tx + i);
 422        free(tx);
 423        strbuf_release(&base);
 424        return error;
 425}
 426
 427static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 428{
 429        int namelen = strlen(name);
 430        while (t->size) {
 431                const char *entry;
 432                const unsigned char *sha1;
 433                int entrylen, cmp;
 434
 435                sha1 = tree_entry_extract(t, &entry, mode);
 436                entrylen = tree_entry_len(&t->entry);
 437                update_tree_entry(t);
 438                if (entrylen > namelen)
 439                        continue;
 440                cmp = memcmp(name, entry, entrylen);
 441                if (cmp > 0)
 442                        continue;
 443                if (cmp < 0)
 444                        break;
 445                if (entrylen == namelen) {
 446                        hashcpy(result, sha1);
 447                        return 0;
 448                }
 449                if (name[entrylen] != '/')
 450                        continue;
 451                if (!S_ISDIR(*mode))
 452                        break;
 453                if (++entrylen == namelen) {
 454                        hashcpy(result, sha1);
 455                        return 0;
 456                }
 457                return get_tree_entry(sha1, name + entrylen, result, mode);
 458        }
 459        return -1;
 460}
 461
 462int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 463{
 464        int retval;
 465        void *tree;
 466        unsigned long size;
 467        unsigned char root[20];
 468
 469        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 470        if (!tree)
 471                return -1;
 472
 473        if (name[0] == '\0') {
 474                hashcpy(sha1, root);
 475                free(tree);
 476                return 0;
 477        }
 478
 479        if (!size) {
 480                retval = -1;
 481        } else {
 482                struct tree_desc t;
 483                init_tree_desc(&t, tree, size);
 484                retval = find_tree_entry(&t, name, sha1, mode);
 485        }
 486        free(tree);
 487        return retval;
 488}
 489
 490static int match_entry(const struct name_entry *entry, int pathlen,
 491                       const char *match, int matchlen,
 492                       enum interesting *never_interesting)
 493{
 494        int m = -1; /* signals that we haven't called strncmp() */
 495
 496        if (*never_interesting != entry_not_interesting) {
 497                /*
 498                 * We have not seen any match that sorts later
 499                 * than the current path.
 500                 */
 501
 502                /*
 503                 * Does match sort strictly earlier than path
 504                 * with their common parts?
 505                 */
 506                m = strncmp(match, entry->path,
 507                            (matchlen < pathlen) ? matchlen : pathlen);
 508                if (m < 0)
 509                        return 0;
 510
 511                /*
 512                 * If we come here even once, that means there is at
 513                 * least one pathspec that would sort equal to or
 514                 * later than the path we are currently looking at.
 515                 * In other words, if we have never reached this point
 516                 * after iterating all pathspecs, it means all
 517                 * pathspecs are either outside of base, or inside the
 518                 * base but sorts strictly earlier than the current
 519                 * one.  In either case, they will never match the
 520                 * subsequent entries.  In such a case, we initialized
 521                 * the variable to -1 and that is what will be
 522                 * returned, allowing the caller to terminate early.
 523                 */
 524                *never_interesting = entry_not_interesting;
 525        }
 526
 527        if (pathlen > matchlen)
 528                return 0;
 529
 530        if (matchlen > pathlen) {
 531                if (match[pathlen] != '/')
 532                        return 0;
 533                if (!S_ISDIR(entry->mode))
 534                        return 0;
 535        }
 536
 537        if (m == -1)
 538                /*
 539                 * we cheated and did not do strncmp(), so we do
 540                 * that here.
 541                 */
 542                m = strncmp(match, entry->path, pathlen);
 543
 544        /*
 545         * If common part matched earlier then it is a hit,
 546         * because we rejected the case where path is not a
 547         * leading directory and is shorter than match.
 548         */
 549        if (!m)
 550                return 1;
 551
 552        return 0;
 553}
 554
 555static int match_dir_prefix(const char *base,
 556                            const char *match, int matchlen)
 557{
 558        if (strncmp(base, match, matchlen))
 559                return 0;
 560
 561        /*
 562         * If the base is a subdirectory of a path which
 563         * was specified, all of them are interesting.
 564         */
 565        if (!matchlen ||
 566            base[matchlen] == '/' ||
 567            match[matchlen - 1] == '/')
 568                return 1;
 569
 570        /* Just a random prefix match */
 571        return 0;
 572}
 573
 574/*
 575 * Perform matching on the leading non-wildcard part of
 576 * pathspec. item->nowildcard_len must be greater than zero. Return
 577 * non-zero if base is matched.
 578 */
 579static int match_wildcard_base(const struct pathspec_item *item,
 580                               const char *base, int baselen,
 581                               int *matched)
 582{
 583        const char *match = item->match;
 584        /* the wildcard part is not considered in this function */
 585        int matchlen = item->nowildcard_len;
 586
 587        if (baselen) {
 588                int dirlen;
 589                /*
 590                 * Return early if base is longer than the
 591                 * non-wildcard part but it does not match.
 592                 */
 593                if (baselen >= matchlen) {
 594                        *matched = matchlen;
 595                        return !strncmp(base, match, matchlen);
 596                }
 597
 598                dirlen = matchlen;
 599                while (dirlen && match[dirlen - 1] != '/')
 600                        dirlen--;
 601
 602                /*
 603                 * Return early if base is shorter than the
 604                 * non-wildcard part but it does not match. Note that
 605                 * base ends with '/' so we are sure it really matches
 606                 * directory
 607                 */
 608                if (strncmp(base, match, baselen))
 609                        return 0;
 610                *matched = baselen;
 611        } else
 612                *matched = 0;
 613        /*
 614         * we could have checked entry against the non-wildcard part
 615         * that is not in base and does similar never_interesting
 616         * optimization as in match_entry. For now just be happy with
 617         * base comparison.
 618         */
 619        return entry_interesting;
 620}
 621
 622/*
 623 * Is a tree entry interesting given the pathspec we have?
 624 *
 625 * Pre-condition: either baselen == base_offset (i.e. empty path)
 626 * or base[baselen-1] == '/' (i.e. with trailing slash).
 627 */
 628enum interesting tree_entry_interesting(const struct name_entry *entry,
 629                                        struct strbuf *base, int base_offset,
 630                                        const struct pathspec *ps)
 631{
 632        int i;
 633        int pathlen, baselen = base->len - base_offset;
 634        enum interesting never_interesting = ps->has_wildcard ?
 635                entry_not_interesting : all_entries_not_interesting;
 636
 637        if (!ps->nr) {
 638                if (!ps->recursive || ps->max_depth == -1)
 639                        return all_entries_interesting;
 640                return within_depth(base->buf + base_offset, baselen,
 641                                    !!S_ISDIR(entry->mode),
 642                                    ps->max_depth) ?
 643                        entry_interesting : entry_not_interesting;
 644        }
 645
 646        pathlen = tree_entry_len(entry);
 647
 648        for (i = ps->nr - 1; i >= 0; i--) {
 649                const struct pathspec_item *item = ps->items+i;
 650                const char *match = item->match;
 651                const char *base_str = base->buf + base_offset;
 652                int matchlen = item->len, matched = 0;
 653
 654                if (baselen >= matchlen) {
 655                        /* If it doesn't match, move along... */
 656                        if (!match_dir_prefix(base_str, match, matchlen))
 657                                goto match_wildcards;
 658
 659                        if (!ps->recursive || ps->max_depth == -1)
 660                                return all_entries_interesting;
 661
 662                        return within_depth(base_str + matchlen + 1,
 663                                            baselen - matchlen - 1,
 664                                            !!S_ISDIR(entry->mode),
 665                                            ps->max_depth) ?
 666                                entry_interesting : entry_not_interesting;
 667                }
 668
 669                /* Either there must be no base, or the base must match. */
 670                if (baselen == 0 || !strncmp(base_str, match, baselen)) {
 671                        if (match_entry(entry, pathlen,
 672                                        match + baselen, matchlen - baselen,
 673                                        &never_interesting))
 674                                return entry_interesting;
 675
 676                        if (item->nowildcard_len < item->len) {
 677                                if (!git_fnmatch(match + baselen, entry->path,
 678                                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 679                                                 item->nowildcard_len - baselen))
 680                                        return entry_interesting;
 681
 682                                /*
 683                                 * Match all directories. We'll try to
 684                                 * match files later on.
 685                                 */
 686                                if (ps->recursive && S_ISDIR(entry->mode))
 687                                        return entry_interesting;
 688                        }
 689
 690                        continue;
 691                }
 692
 693match_wildcards:
 694                if (item->nowildcard_len == item->len)
 695                        continue;
 696
 697                if (item->nowildcard_len &&
 698                    !match_wildcard_base(item, base_str, baselen, &matched))
 699                        return entry_not_interesting;
 700
 701                /*
 702                 * Concatenate base and entry->path into one and do
 703                 * fnmatch() on it.
 704                 *
 705                 * While we could avoid concatenation in certain cases
 706                 * [1], which saves a memcpy and potentially a
 707                 * realloc, it turns out not worth it. Measurement on
 708                 * linux-2.6 does not show any clear improvements,
 709                 * partly because of the nowildcard_len optimization
 710                 * in git_fnmatch(). Avoid micro-optimizations here.
 711                 *
 712                 * [1] if match_wildcard_base() says the base
 713                 * directory is already matched, we only need to match
 714                 * the rest, which is shorter so _in theory_ faster.
 715                 */
 716
 717                strbuf_add(base, entry->path, pathlen);
 718
 719                if (!git_fnmatch(match, base->buf + base_offset,
 720                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 721                                 item->nowildcard_len)) {
 722                        strbuf_setlen(base, base_offset + baselen);
 723                        return entry_interesting;
 724                }
 725                strbuf_setlen(base, base_offset + baselen);
 726
 727                /*
 728                 * Match all directories. We'll try to match files
 729                 * later on.
 730                 * max_depth is ignored but we may consider support it
 731                 * in future, see
 732                 * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
 733                 */
 734                if (ps->recursive && S_ISDIR(entry->mode))
 735                        return entry_interesting;
 736        }
 737        return never_interesting; /* No matches */
 738}