tree-walk.con commit t7008: demonstrate behavior of grep with textconv (97f6a9c)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6
   7static const char *get_mode(const char *str, unsigned int *modep)
   8{
   9        unsigned char c;
  10        unsigned int mode = 0;
  11
  12        if (*str == ' ')
  13                return NULL;
  14
  15        while ((c = *str++) != ' ') {
  16                if (c < '0' || c > '7')
  17                        return NULL;
  18                mode = (mode << 3) + (c - '0');
  19        }
  20        *modep = mode;
  21        return str;
  22}
  23
  24static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  25{
  26        const char *path;
  27        unsigned int mode, len;
  28
  29        if (size < 24 || buf[size - 21])
  30                die("corrupt tree file");
  31
  32        path = get_mode(buf, &mode);
  33        if (!path || !*path)
  34                die("corrupt tree file");
  35        len = strlen(path) + 1;
  36
  37        /* Initialize the descriptor entry */
  38        desc->entry.path = path;
  39        desc->entry.mode = mode;
  40        desc->entry.sha1 = (const unsigned char *)(path + len);
  41}
  42
  43void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  44{
  45        desc->buffer = buffer;
  46        desc->size = size;
  47        if (size)
  48                decode_tree_entry(desc, buffer, size);
  49}
  50
  51void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  52{
  53        unsigned long size = 0;
  54        void *buf = NULL;
  55
  56        if (sha1) {
  57                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  58                if (!buf)
  59                        die("unable to read tree %s", sha1_to_hex(sha1));
  60        }
  61        init_tree_desc(desc, buf, size);
  62        return buf;
  63}
  64
  65static void entry_clear(struct name_entry *a)
  66{
  67        memset(a, 0, sizeof(*a));
  68}
  69
  70static void entry_extract(struct tree_desc *t, struct name_entry *a)
  71{
  72        *a = t->entry;
  73}
  74
  75void update_tree_entry(struct tree_desc *desc)
  76{
  77        const void *buf = desc->buffer;
  78        const unsigned char *end = desc->entry.sha1 + 20;
  79        unsigned long size = desc->size;
  80        unsigned long len = end - (const unsigned char *)buf;
  81
  82        if (size < len)
  83                die("corrupt tree file");
  84        buf = end;
  85        size -= len;
  86        desc->buffer = buf;
  87        desc->size = size;
  88        if (size)
  89                decode_tree_entry(desc, buf, size);
  90}
  91
  92int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  93{
  94        if (!desc->size)
  95                return 0;
  96
  97        *entry = desc->entry;
  98        update_tree_entry(desc);
  99        return 1;
 100}
 101
 102void setup_traverse_info(struct traverse_info *info, const char *base)
 103{
 104        int pathlen = strlen(base);
 105        static struct traverse_info dummy;
 106
 107        memset(info, 0, sizeof(*info));
 108        if (pathlen && base[pathlen-1] == '/')
 109                pathlen--;
 110        info->pathlen = pathlen ? pathlen + 1 : 0;
 111        info->name.path = base;
 112        info->name.sha1 = (void *)(base + pathlen + 1);
 113        if (pathlen)
 114                info->prev = &dummy;
 115}
 116
 117char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 118{
 119        int len = tree_entry_len(n);
 120        int pathlen = info->pathlen;
 121
 122        path[pathlen + len] = 0;
 123        for (;;) {
 124                memcpy(path + pathlen, n->path, len);
 125                if (!pathlen)
 126                        break;
 127                path[--pathlen] = '/';
 128                n = &info->name;
 129                len = tree_entry_len(n);
 130                info = info->prev;
 131                pathlen -= len;
 132        }
 133        return path;
 134}
 135
 136struct tree_desc_skip {
 137        struct tree_desc_skip *prev;
 138        const void *ptr;
 139};
 140
 141struct tree_desc_x {
 142        struct tree_desc d;
 143        struct tree_desc_skip *skip;
 144};
 145
 146static int name_compare(const char *a, int a_len,
 147                        const char *b, int b_len)
 148{
 149        int len = (a_len < b_len) ? a_len : b_len;
 150        int cmp = memcmp(a, b, len);
 151        if (cmp)
 152                return cmp;
 153        return (a_len - b_len);
 154}
 155
 156static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 157{
 158        /*
 159         * The caller wants to pick *a* from a tree or nothing.
 160         * We are looking at *b* in a tree.
 161         *
 162         * (0) If a and b are the same name, we are trivially happy.
 163         *
 164         * There are three possibilities where *a* could be hiding
 165         * behind *b*.
 166         *
 167         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 168         *                                matter what.
 169         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 170         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 171         *
 172         * Otherwise we know *a* won't appear in the tree without
 173         * scanning further.
 174         */
 175
 176        int cmp = name_compare(a, a_len, b, b_len);
 177
 178        /* Most common case first -- reading sync'd trees */
 179        if (!cmp)
 180                return cmp;
 181
 182        if (0 < cmp) {
 183                /* a comes after b; it does not matter if it is case (3)
 184                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 185                        return 1;
 186                */
 187                return 1; /* keep looking */
 188        }
 189
 190        /* b comes after a; are we looking at case (2)? */
 191        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 192                return 1; /* keep looking */
 193
 194        return -1; /* a cannot appear in the tree */
 195}
 196
 197/*
 198 * From the extended tree_desc, extract the first name entry, while
 199 * paying attention to the candidate "first" name.  Most importantly,
 200 * when looking for an entry, if there are entries that sorts earlier
 201 * in the tree object representation than that name, skip them and
 202 * process the named entry first.  We will remember that we haven't
 203 * processed the first entry yet, and in the later call skip the
 204 * entry we processed early when update_extended_entry() is called.
 205 *
 206 * E.g. if the underlying tree object has these entries:
 207 *
 208 *    blob    "t-1"
 209 *    blob    "t-2"
 210 *    tree    "t"
 211 *    blob    "t=1"
 212 *
 213 * and the "first" asks for "t", remember that we still need to
 214 * process "t-1" and "t-2" but extract "t".  After processing the
 215 * entry "t" from this call, the caller will let us know by calling
 216 * update_extended_entry() that we can remember "t" has been processed
 217 * already.
 218 */
 219
 220static void extended_entry_extract(struct tree_desc_x *t,
 221                                   struct name_entry *a,
 222                                   const char *first,
 223                                   int first_len)
 224{
 225        const char *path;
 226        int len;
 227        struct tree_desc probe;
 228        struct tree_desc_skip *skip;
 229
 230        /*
 231         * Extract the first entry from the tree_desc, but skip the
 232         * ones that we already returned in earlier rounds.
 233         */
 234        while (1) {
 235                if (!t->d.size) {
 236                        entry_clear(a);
 237                        break; /* not found */
 238                }
 239                entry_extract(&t->d, a);
 240                for (skip = t->skip; skip; skip = skip->prev)
 241                        if (a->path == skip->ptr)
 242                                break; /* found */
 243                if (!skip)
 244                        break;
 245                /* We have processed this entry already. */
 246                update_tree_entry(&t->d);
 247        }
 248
 249        if (!first || !a->path)
 250                return;
 251
 252        /*
 253         * The caller wants "first" from this tree, or nothing.
 254         */
 255        path = a->path;
 256        len = tree_entry_len(a);
 257        switch (check_entry_match(first, first_len, path, len)) {
 258        case -1:
 259                entry_clear(a);
 260        case 0:
 261                return;
 262        default:
 263                break;
 264        }
 265
 266        /*
 267         * We need to look-ahead -- we suspect that a subtree whose
 268         * name is "first" may be hiding behind the current entry "path".
 269         */
 270        probe = t->d;
 271        while (probe.size) {
 272                entry_extract(&probe, a);
 273                path = a->path;
 274                len = tree_entry_len(a);
 275                switch (check_entry_match(first, first_len, path, len)) {
 276                case -1:
 277                        entry_clear(a);
 278                case 0:
 279                        return;
 280                default:
 281                        update_tree_entry(&probe);
 282                        break;
 283                }
 284                /* keep looking */
 285        }
 286        entry_clear(a);
 287}
 288
 289static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 290{
 291        if (t->d.entry.path == a->path) {
 292                update_tree_entry(&t->d);
 293        } else {
 294                /* we have returned this entry early */
 295                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 296                skip->ptr = a->path;
 297                skip->prev = t->skip;
 298                t->skip = skip;
 299        }
 300}
 301
 302static void free_extended_entry(struct tree_desc_x *t)
 303{
 304        struct tree_desc_skip *p, *s;
 305
 306        for (s = t->skip; s; s = p) {
 307                p = s->prev;
 308                free(s);
 309        }
 310}
 311
 312static inline int prune_traversal(struct name_entry *e,
 313                                  struct traverse_info *info,
 314                                  struct strbuf *base,
 315                                  int still_interesting)
 316{
 317        if (!info->pathspec || still_interesting == 2)
 318                return 2;
 319        if (still_interesting < 0)
 320                return still_interesting;
 321        return tree_entry_interesting(e, base, 0, info->pathspec);
 322}
 323
 324int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 325{
 326        int ret = 0;
 327        int error = 0;
 328        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 329        int i;
 330        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 331        struct strbuf base = STRBUF_INIT;
 332        int interesting = 1;
 333
 334        for (i = 0; i < n; i++)
 335                tx[i].d = t[i];
 336
 337        if (info->prev) {
 338                strbuf_grow(&base, info->pathlen);
 339                make_traverse_path(base.buf, info->prev, &info->name);
 340                base.buf[info->pathlen-1] = '/';
 341                strbuf_setlen(&base, info->pathlen);
 342        }
 343        for (;;) {
 344                unsigned long mask, dirmask;
 345                const char *first = NULL;
 346                int first_len = 0;
 347                struct name_entry *e = NULL;
 348                int len;
 349
 350                for (i = 0; i < n; i++) {
 351                        e = entry + i;
 352                        extended_entry_extract(tx + i, e, NULL, 0);
 353                }
 354
 355                /*
 356                 * A tree may have "t-2" at the current location even
 357                 * though it may have "t" that is a subtree behind it,
 358                 * and another tree may return "t".  We want to grab
 359                 * all "t" from all trees to match in such a case.
 360                 */
 361                for (i = 0; i < n; i++) {
 362                        e = entry + i;
 363                        if (!e->path)
 364                                continue;
 365                        len = tree_entry_len(e);
 366                        if (!first) {
 367                                first = e->path;
 368                                first_len = len;
 369                                continue;
 370                        }
 371                        if (name_compare(e->path, len, first, first_len) < 0) {
 372                                first = e->path;
 373                                first_len = len;
 374                        }
 375                }
 376
 377                if (first) {
 378                        for (i = 0; i < n; i++) {
 379                                e = entry + i;
 380                                extended_entry_extract(tx + i, e, first, first_len);
 381                                /* Cull the ones that are not the earliest */
 382                                if (!e->path)
 383                                        continue;
 384                                len = tree_entry_len(e);
 385                                if (name_compare(e->path, len, first, first_len))
 386                                        entry_clear(e);
 387                        }
 388                }
 389
 390                /* Now we have in entry[i] the earliest name from the trees */
 391                mask = 0;
 392                dirmask = 0;
 393                for (i = 0; i < n; i++) {
 394                        if (!entry[i].path)
 395                                continue;
 396                        mask |= 1ul << i;
 397                        if (S_ISDIR(entry[i].mode))
 398                                dirmask |= 1ul << i;
 399                        e = &entry[i];
 400                }
 401                if (!mask)
 402                        break;
 403                interesting = prune_traversal(e, info, &base, interesting);
 404                if (interesting < 0)
 405                        break;
 406                if (interesting) {
 407                        ret = info->fn(n, mask, dirmask, entry, info);
 408                        if (ret < 0) {
 409                                error = ret;
 410                                if (!info->show_all_errors)
 411                                        break;
 412                        }
 413                        mask &= ret;
 414                }
 415                ret = 0;
 416                for (i = 0; i < n; i++)
 417                        if (mask & (1ul << i))
 418                                update_extended_entry(tx + i, entry + i);
 419        }
 420        free(entry);
 421        for (i = 0; i < n; i++)
 422                free_extended_entry(tx + i);
 423        free(tx);
 424        strbuf_release(&base);
 425        return error;
 426}
 427
 428static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 429{
 430        int namelen = strlen(name);
 431        while (t->size) {
 432                const char *entry;
 433                const unsigned char *sha1;
 434                int entrylen, cmp;
 435
 436                sha1 = tree_entry_extract(t, &entry, mode);
 437                entrylen = tree_entry_len(&t->entry);
 438                update_tree_entry(t);
 439                if (entrylen > namelen)
 440                        continue;
 441                cmp = memcmp(name, entry, entrylen);
 442                if (cmp > 0)
 443                        continue;
 444                if (cmp < 0)
 445                        break;
 446                if (entrylen == namelen) {
 447                        hashcpy(result, sha1);
 448                        return 0;
 449                }
 450                if (name[entrylen] != '/')
 451                        continue;
 452                if (!S_ISDIR(*mode))
 453                        break;
 454                if (++entrylen == namelen) {
 455                        hashcpy(result, sha1);
 456                        return 0;
 457                }
 458                return get_tree_entry(sha1, name + entrylen, result, mode);
 459        }
 460        return -1;
 461}
 462
 463int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 464{
 465        int retval;
 466        void *tree;
 467        unsigned long size;
 468        unsigned char root[20];
 469
 470        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 471        if (!tree)
 472                return -1;
 473
 474        if (name[0] == '\0') {
 475                hashcpy(sha1, root);
 476                free(tree);
 477                return 0;
 478        }
 479
 480        if (!size) {
 481                retval = -1;
 482        } else {
 483                struct tree_desc t;
 484                init_tree_desc(&t, tree, size);
 485                retval = find_tree_entry(&t, name, sha1, mode);
 486        }
 487        free(tree);
 488        return retval;
 489}
 490
 491static int match_entry(const struct name_entry *entry, int pathlen,
 492                       const char *match, int matchlen,
 493                       enum interesting *never_interesting)
 494{
 495        int m = -1; /* signals that we haven't called strncmp() */
 496
 497        if (*never_interesting != entry_not_interesting) {
 498                /*
 499                 * We have not seen any match that sorts later
 500                 * than the current path.
 501                 */
 502
 503                /*
 504                 * Does match sort strictly earlier than path
 505                 * with their common parts?
 506                 */
 507                m = strncmp(match, entry->path,
 508                            (matchlen < pathlen) ? matchlen : pathlen);
 509                if (m < 0)
 510                        return 0;
 511
 512                /*
 513                 * If we come here even once, that means there is at
 514                 * least one pathspec that would sort equal to or
 515                 * later than the path we are currently looking at.
 516                 * In other words, if we have never reached this point
 517                 * after iterating all pathspecs, it means all
 518                 * pathspecs are either outside of base, or inside the
 519                 * base but sorts strictly earlier than the current
 520                 * one.  In either case, they will never match the
 521                 * subsequent entries.  In such a case, we initialized
 522                 * the variable to -1 and that is what will be
 523                 * returned, allowing the caller to terminate early.
 524                 */
 525                *never_interesting = entry_not_interesting;
 526        }
 527
 528        if (pathlen > matchlen)
 529                return 0;
 530
 531        if (matchlen > pathlen) {
 532                if (match[pathlen] != '/')
 533                        return 0;
 534                if (!S_ISDIR(entry->mode))
 535                        return 0;
 536        }
 537
 538        if (m == -1)
 539                /*
 540                 * we cheated and did not do strncmp(), so we do
 541                 * that here.
 542                 */
 543                m = strncmp(match, entry->path, pathlen);
 544
 545        /*
 546         * If common part matched earlier then it is a hit,
 547         * because we rejected the case where path is not a
 548         * leading directory and is shorter than match.
 549         */
 550        if (!m)
 551                return 1;
 552
 553        return 0;
 554}
 555
 556static int match_dir_prefix(const char *base,
 557                            const char *match, int matchlen)
 558{
 559        if (strncmp(base, match, matchlen))
 560                return 0;
 561
 562        /*
 563         * If the base is a subdirectory of a path which
 564         * was specified, all of them are interesting.
 565         */
 566        if (!matchlen ||
 567            base[matchlen] == '/' ||
 568            match[matchlen - 1] == '/')
 569                return 1;
 570
 571        /* Just a random prefix match */
 572        return 0;
 573}
 574
 575/*
 576 * Perform matching on the leading non-wildcard part of
 577 * pathspec. item->nowildcard_len must be greater than zero. Return
 578 * non-zero if base is matched.
 579 */
 580static int match_wildcard_base(const struct pathspec_item *item,
 581                               const char *base, int baselen,
 582                               int *matched)
 583{
 584        const char *match = item->match;
 585        /* the wildcard part is not considered in this function */
 586        int matchlen = item->nowildcard_len;
 587
 588        if (baselen) {
 589                int dirlen;
 590                /*
 591                 * Return early if base is longer than the
 592                 * non-wildcard part but it does not match.
 593                 */
 594                if (baselen >= matchlen) {
 595                        *matched = matchlen;
 596                        return !strncmp(base, match, matchlen);
 597                }
 598
 599                dirlen = matchlen;
 600                while (dirlen && match[dirlen - 1] != '/')
 601                        dirlen--;
 602
 603                /*
 604                 * Return early if base is shorter than the
 605                 * non-wildcard part but it does not match. Note that
 606                 * base ends with '/' so we are sure it really matches
 607                 * directory
 608                 */
 609                if (strncmp(base, match, baselen))
 610                        return 0;
 611                *matched = baselen;
 612        } else
 613                *matched = 0;
 614        /*
 615         * we could have checked entry against the non-wildcard part
 616         * that is not in base and does similar never_interesting
 617         * optimization as in match_entry. For now just be happy with
 618         * base comparison.
 619         */
 620        return entry_interesting;
 621}
 622
 623/*
 624 * Is a tree entry interesting given the pathspec we have?
 625 *
 626 * Pre-condition: either baselen == base_offset (i.e. empty path)
 627 * or base[baselen-1] == '/' (i.e. with trailing slash).
 628 */
 629enum interesting tree_entry_interesting(const struct name_entry *entry,
 630                                        struct strbuf *base, int base_offset,
 631                                        const struct pathspec *ps)
 632{
 633        int i;
 634        int pathlen, baselen = base->len - base_offset;
 635        enum interesting never_interesting = ps->has_wildcard ?
 636                entry_not_interesting : all_entries_not_interesting;
 637
 638        if (!ps->nr) {
 639                if (!ps->recursive || ps->max_depth == -1)
 640                        return all_entries_interesting;
 641                return within_depth(base->buf + base_offset, baselen,
 642                                    !!S_ISDIR(entry->mode),
 643                                    ps->max_depth) ?
 644                        entry_interesting : entry_not_interesting;
 645        }
 646
 647        pathlen = tree_entry_len(entry);
 648
 649        for (i = ps->nr - 1; i >= 0; i--) {
 650                const struct pathspec_item *item = ps->items+i;
 651                const char *match = item->match;
 652                const char *base_str = base->buf + base_offset;
 653                int matchlen = item->len, matched = 0;
 654
 655                if (baselen >= matchlen) {
 656                        /* If it doesn't match, move along... */
 657                        if (!match_dir_prefix(base_str, match, matchlen))
 658                                goto match_wildcards;
 659
 660                        if (!ps->recursive || ps->max_depth == -1)
 661                                return all_entries_interesting;
 662
 663                        return within_depth(base_str + matchlen + 1,
 664                                            baselen - matchlen - 1,
 665                                            !!S_ISDIR(entry->mode),
 666                                            ps->max_depth) ?
 667                                entry_interesting : entry_not_interesting;
 668                }
 669
 670                /* Either there must be no base, or the base must match. */
 671                if (baselen == 0 || !strncmp(base_str, match, baselen)) {
 672                        if (match_entry(entry, pathlen,
 673                                        match + baselen, matchlen - baselen,
 674                                        &never_interesting))
 675                                return entry_interesting;
 676
 677                        if (item->nowildcard_len < item->len) {
 678                                if (!git_fnmatch(match + baselen, entry->path,
 679                                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 680                                                 item->nowildcard_len - baselen))
 681                                        return entry_interesting;
 682
 683                                /*
 684                                 * Match all directories. We'll try to
 685                                 * match files later on.
 686                                 */
 687                                if (ps->recursive && S_ISDIR(entry->mode))
 688                                        return entry_interesting;
 689                        }
 690
 691                        continue;
 692                }
 693
 694match_wildcards:
 695                if (item->nowildcard_len == item->len)
 696                        continue;
 697
 698                if (item->nowildcard_len &&
 699                    !match_wildcard_base(item, base_str, baselen, &matched))
 700                        return entry_not_interesting;
 701
 702                /*
 703                 * Concatenate base and entry->path into one and do
 704                 * fnmatch() on it.
 705                 *
 706                 * While we could avoid concatenation in certain cases
 707                 * [1], which saves a memcpy and potentially a
 708                 * realloc, it turns out not worth it. Measurement on
 709                 * linux-2.6 does not show any clear improvements,
 710                 * partly because of the nowildcard_len optimization
 711                 * in git_fnmatch(). Avoid micro-optimizations here.
 712                 *
 713                 * [1] if match_wildcard_base() says the base
 714                 * directory is already matched, we only need to match
 715                 * the rest, which is shorter so _in theory_ faster.
 716                 */
 717
 718                strbuf_add(base, entry->path, pathlen);
 719
 720                if (!git_fnmatch(match, base->buf + base_offset,
 721                                 item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
 722                                 item->nowildcard_len)) {
 723                        strbuf_setlen(base, base_offset + baselen);
 724                        return entry_interesting;
 725                }
 726                strbuf_setlen(base, base_offset + baselen);
 727
 728                /*
 729                 * Match all directories. We'll try to match files
 730                 * later on.
 731                 * max_depth is ignored but we may consider support it
 732                 * in future, see
 733                 * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
 734                 */
 735                if (ps->recursive && S_ISDIR(entry->mode))
 736                        return entry_interesting;
 737        }
 738        return never_interesting; /* No matches */
 739}