tree-walk.con commit docs: mention "-k" for both forms of "git mv" (cfe21f0)
   1#include "cache.h"
   2#include "tree-walk.h"
   3#include "unpack-trees.h"
   4#include "dir.h"
   5#include "tree.h"
   6
   7static const char *get_mode(const char *str, unsigned int *modep)
   8{
   9        unsigned char c;
  10        unsigned int mode = 0;
  11
  12        if (*str == ' ')
  13                return NULL;
  14
  15        while ((c = *str++) != ' ') {
  16                if (c < '0' || c > '7')
  17                        return NULL;
  18                mode = (mode << 3) + (c - '0');
  19        }
  20        *modep = mode;
  21        return str;
  22}
  23
  24static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
  25{
  26        const char *path;
  27        unsigned int mode, len;
  28
  29        if (size < 24 || buf[size - 21])
  30                die("corrupt tree file");
  31
  32        path = get_mode(buf, &mode);
  33        if (!path || !*path)
  34                die("corrupt tree file");
  35        len = strlen(path) + 1;
  36
  37        /* Initialize the descriptor entry */
  38        desc->entry.path = path;
  39        desc->entry.mode = mode;
  40        desc->entry.sha1 = (const unsigned char *)(path + len);
  41}
  42
  43void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
  44{
  45        desc->buffer = buffer;
  46        desc->size = size;
  47        if (size)
  48                decode_tree_entry(desc, buffer, size);
  49}
  50
  51void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  52{
  53        unsigned long size = 0;
  54        void *buf = NULL;
  55
  56        if (sha1) {
  57                buf = read_object_with_reference(sha1, tree_type, &size, NULL);
  58                if (!buf)
  59                        die("unable to read tree %s", sha1_to_hex(sha1));
  60        }
  61        init_tree_desc(desc, buf, size);
  62        return buf;
  63}
  64
  65static void entry_clear(struct name_entry *a)
  66{
  67        memset(a, 0, sizeof(*a));
  68}
  69
  70static void entry_extract(struct tree_desc *t, struct name_entry *a)
  71{
  72        *a = t->entry;
  73}
  74
  75void update_tree_entry(struct tree_desc *desc)
  76{
  77        const void *buf = desc->buffer;
  78        const unsigned char *end = desc->entry.sha1 + 20;
  79        unsigned long size = desc->size;
  80        unsigned long len = end - (const unsigned char *)buf;
  81
  82        if (size < len)
  83                die("corrupt tree file");
  84        buf = end;
  85        size -= len;
  86        desc->buffer = buf;
  87        desc->size = size;
  88        if (size)
  89                decode_tree_entry(desc, buf, size);
  90}
  91
  92int tree_entry(struct tree_desc *desc, struct name_entry *entry)
  93{
  94        if (!desc->size)
  95                return 0;
  96
  97        *entry = desc->entry;
  98        update_tree_entry(desc);
  99        return 1;
 100}
 101
 102void setup_traverse_info(struct traverse_info *info, const char *base)
 103{
 104        int pathlen = strlen(base);
 105        static struct traverse_info dummy;
 106
 107        memset(info, 0, sizeof(*info));
 108        if (pathlen && base[pathlen-1] == '/')
 109                pathlen--;
 110        info->pathlen = pathlen ? pathlen + 1 : 0;
 111        info->name.path = base;
 112        info->name.sha1 = (void *)(base + pathlen + 1);
 113        if (pathlen)
 114                info->prev = &dummy;
 115}
 116
 117char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
 118{
 119        int len = tree_entry_len(n->path, n->sha1);
 120        int pathlen = info->pathlen;
 121
 122        path[pathlen + len] = 0;
 123        for (;;) {
 124                memcpy(path + pathlen, n->path, len);
 125                if (!pathlen)
 126                        break;
 127                path[--pathlen] = '/';
 128                n = &info->name;
 129                len = tree_entry_len(n->path, n->sha1);
 130                info = info->prev;
 131                pathlen -= len;
 132        }
 133        return path;
 134}
 135
 136struct tree_desc_skip {
 137        struct tree_desc_skip *prev;
 138        const void *ptr;
 139};
 140
 141struct tree_desc_x {
 142        struct tree_desc d;
 143        struct tree_desc_skip *skip;
 144};
 145
 146static int name_compare(const char *a, int a_len,
 147                        const char *b, int b_len)
 148{
 149        int len = (a_len < b_len) ? a_len : b_len;
 150        int cmp = memcmp(a, b, len);
 151        if (cmp)
 152                return cmp;
 153        return (a_len - b_len);
 154}
 155
 156static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
 157{
 158        /*
 159         * The caller wants to pick *a* from a tree or nothing.
 160         * We are looking at *b* in a tree.
 161         *
 162         * (0) If a and b are the same name, we are trivially happy.
 163         *
 164         * There are three possibilities where *a* could be hiding
 165         * behind *b*.
 166         *
 167         * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
 168         *                                matter what.
 169         * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
 170         * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
 171         *
 172         * Otherwise we know *a* won't appear in the tree without
 173         * scanning further.
 174         */
 175
 176        int cmp = name_compare(a, a_len, b, b_len);
 177
 178        /* Most common case first -- reading sync'd trees */
 179        if (!cmp)
 180                return cmp;
 181
 182        if (0 < cmp) {
 183                /* a comes after b; it does not matter if it is case (3)
 184                if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
 185                        return 1;
 186                */
 187                return 1; /* keep looking */
 188        }
 189
 190        /* b comes after a; are we looking at case (2)? */
 191        if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
 192                return 1; /* keep looking */
 193
 194        return -1; /* a cannot appear in the tree */
 195}
 196
 197/*
 198 * From the extended tree_desc, extract the first name entry, while
 199 * paying attention to the candidate "first" name.  Most importantly,
 200 * when looking for an entry, if there are entries that sorts earlier
 201 * in the tree object representation than that name, skip them and
 202 * process the named entry first.  We will remember that we haven't
 203 * processed the first entry yet, and in the later call skip the
 204 * entry we processed early when update_extended_entry() is called.
 205 *
 206 * E.g. if the underlying tree object has these entries:
 207 *
 208 *    blob    "t-1"
 209 *    blob    "t-2"
 210 *    tree    "t"
 211 *    blob    "t=1"
 212 *
 213 * and the "first" asks for "t", remember that we still need to
 214 * process "t-1" and "t-2" but extract "t".  After processing the
 215 * entry "t" from this call, the caller will let us know by calling
 216 * update_extended_entry() that we can remember "t" has been processed
 217 * already.
 218 */
 219
 220static void extended_entry_extract(struct tree_desc_x *t,
 221                                   struct name_entry *a,
 222                                   const char *first,
 223                                   int first_len)
 224{
 225        const char *path;
 226        int len;
 227        struct tree_desc probe;
 228        struct tree_desc_skip *skip;
 229
 230        /*
 231         * Extract the first entry from the tree_desc, but skip the
 232         * ones that we already returned in earlier rounds.
 233         */
 234        while (1) {
 235                if (!t->d.size) {
 236                        entry_clear(a);
 237                        break; /* not found */
 238                }
 239                entry_extract(&t->d, a);
 240                for (skip = t->skip; skip; skip = skip->prev)
 241                        if (a->path == skip->ptr)
 242                                break; /* found */
 243                if (!skip)
 244                        break;
 245                /* We have processed this entry already. */
 246                update_tree_entry(&t->d);
 247        }
 248
 249        if (!first || !a->path)
 250                return;
 251
 252        /*
 253         * The caller wants "first" from this tree, or nothing.
 254         */
 255        path = a->path;
 256        len = tree_entry_len(a->path, a->sha1);
 257        switch (check_entry_match(first, first_len, path, len)) {
 258        case -1:
 259                entry_clear(a);
 260        case 0:
 261                return;
 262        default:
 263                break;
 264        }
 265
 266        /*
 267         * We need to look-ahead -- we suspect that a subtree whose
 268         * name is "first" may be hiding behind the current entry "path".
 269         */
 270        probe = t->d;
 271        while (probe.size) {
 272                entry_extract(&probe, a);
 273                path = a->path;
 274                len = tree_entry_len(a->path, a->sha1);
 275                switch (check_entry_match(first, first_len, path, len)) {
 276                case -1:
 277                        entry_clear(a);
 278                case 0:
 279                        return;
 280                default:
 281                        update_tree_entry(&probe);
 282                        break;
 283                }
 284                /* keep looking */
 285        }
 286        entry_clear(a);
 287}
 288
 289static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
 290{
 291        if (t->d.entry.path == a->path) {
 292                update_tree_entry(&t->d);
 293        } else {
 294                /* we have returned this entry early */
 295                struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
 296                skip->ptr = a->path;
 297                skip->prev = t->skip;
 298                t->skip = skip;
 299        }
 300}
 301
 302static void free_extended_entry(struct tree_desc_x *t)
 303{
 304        struct tree_desc_skip *p, *s;
 305
 306        for (s = t->skip; s; s = p) {
 307                p = s->prev;
 308                free(s);
 309        }
 310}
 311
 312int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 313{
 314        int ret = 0;
 315        int error = 0;
 316        struct name_entry *entry = xmalloc(n*sizeof(*entry));
 317        int i;
 318        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 319
 320        for (i = 0; i < n; i++)
 321                tx[i].d = t[i];
 322
 323        for (;;) {
 324                unsigned long mask, dirmask;
 325                const char *first = NULL;
 326                int first_len = 0;
 327                struct name_entry *e;
 328                int len;
 329
 330                for (i = 0; i < n; i++) {
 331                        e = entry + i;
 332                        extended_entry_extract(tx + i, e, NULL, 0);
 333                }
 334
 335                /*
 336                 * A tree may have "t-2" at the current location even
 337                 * though it may have "t" that is a subtree behind it,
 338                 * and another tree may return "t".  We want to grab
 339                 * all "t" from all trees to match in such a case.
 340                 */
 341                for (i = 0; i < n; i++) {
 342                        e = entry + i;
 343                        if (!e->path)
 344                                continue;
 345                        len = tree_entry_len(e->path, e->sha1);
 346                        if (!first) {
 347                                first = e->path;
 348                                first_len = len;
 349                                continue;
 350                        }
 351                        if (name_compare(e->path, len, first, first_len) < 0) {
 352                                first = e->path;
 353                                first_len = len;
 354                        }
 355                }
 356
 357                if (first) {
 358                        for (i = 0; i < n; i++) {
 359                                e = entry + i;
 360                                extended_entry_extract(tx + i, e, first, first_len);
 361                                /* Cull the ones that are not the earliest */
 362                                if (!e->path)
 363                                        continue;
 364                                len = tree_entry_len(e->path, e->sha1);
 365                                if (name_compare(e->path, len, first, first_len))
 366                                        entry_clear(e);
 367                        }
 368                }
 369
 370                /* Now we have in entry[i] the earliest name from the trees */
 371                mask = 0;
 372                dirmask = 0;
 373                for (i = 0; i < n; i++) {
 374                        if (!entry[i].path)
 375                                continue;
 376                        mask |= 1ul << i;
 377                        if (S_ISDIR(entry[i].mode))
 378                                dirmask |= 1ul << i;
 379                }
 380                if (!mask)
 381                        break;
 382                ret = info->fn(n, mask, dirmask, entry, info);
 383                if (ret < 0) {
 384                        error = ret;
 385                        if (!info->show_all_errors)
 386                                break;
 387                }
 388                mask &= ret;
 389                ret = 0;
 390                for (i = 0; i < n; i++)
 391                        if (mask & (1ul << i))
 392                                update_extended_entry(tx + i, entry + i);
 393        }
 394        free(entry);
 395        for (i = 0; i < n; i++)
 396                free_extended_entry(tx + i);
 397        free(tx);
 398        return error;
 399}
 400
 401static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 402{
 403        int namelen = strlen(name);
 404        while (t->size) {
 405                const char *entry;
 406                const unsigned char *sha1;
 407                int entrylen, cmp;
 408
 409                sha1 = tree_entry_extract(t, &entry, mode);
 410                update_tree_entry(t);
 411                entrylen = tree_entry_len(entry, sha1);
 412                if (entrylen > namelen)
 413                        continue;
 414                cmp = memcmp(name, entry, entrylen);
 415                if (cmp > 0)
 416                        continue;
 417                if (cmp < 0)
 418                        break;
 419                if (entrylen == namelen) {
 420                        hashcpy(result, sha1);
 421                        return 0;
 422                }
 423                if (name[entrylen] != '/')
 424                        continue;
 425                if (!S_ISDIR(*mode))
 426                        break;
 427                if (++entrylen == namelen) {
 428                        hashcpy(result, sha1);
 429                        return 0;
 430                }
 431                return get_tree_entry(sha1, name + entrylen, result, mode);
 432        }
 433        return -1;
 434}
 435
 436int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
 437{
 438        int retval;
 439        void *tree;
 440        unsigned long size;
 441        struct tree_desc t;
 442        unsigned char root[20];
 443
 444        tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
 445        if (!tree)
 446                return -1;
 447
 448        if (name[0] == '\0') {
 449                hashcpy(sha1, root);
 450                free(tree);
 451                return 0;
 452        }
 453
 454        init_tree_desc(&t, tree, size);
 455        retval = find_tree_entry(&t, name, sha1, mode);
 456        free(tree);
 457        return retval;
 458}
 459
 460static int match_entry(const struct name_entry *entry, int pathlen,
 461                       const char *match, int matchlen,
 462                       int *never_interesting)
 463{
 464        int m = -1; /* signals that we haven't called strncmp() */
 465
 466        if (*never_interesting) {
 467                /*
 468                 * We have not seen any match that sorts later
 469                 * than the current path.
 470                 */
 471
 472                /*
 473                 * Does match sort strictly earlier than path
 474                 * with their common parts?
 475                 */
 476                m = strncmp(match, entry->path,
 477                            (matchlen < pathlen) ? matchlen : pathlen);
 478                if (m < 0)
 479                        return 0;
 480
 481                /*
 482                 * If we come here even once, that means there is at
 483                 * least one pathspec that would sort equal to or
 484                 * later than the path we are currently looking at.
 485                 * In other words, if we have never reached this point
 486                 * after iterating all pathspecs, it means all
 487                 * pathspecs are either outside of base, or inside the
 488                 * base but sorts strictly earlier than the current
 489                 * one.  In either case, they will never match the
 490                 * subsequent entries.  In such a case, we initialized
 491                 * the variable to -1 and that is what will be
 492                 * returned, allowing the caller to terminate early.
 493                 */
 494                *never_interesting = 0;
 495        }
 496
 497        if (pathlen > matchlen)
 498                return 0;
 499
 500        if (matchlen > pathlen) {
 501                if (match[pathlen] != '/')
 502                        return 0;
 503                if (!S_ISDIR(entry->mode))
 504                        return 0;
 505        }
 506
 507        if (m == -1)
 508                /*
 509                 * we cheated and did not do strncmp(), so we do
 510                 * that here.
 511                 */
 512                m = strncmp(match, entry->path, pathlen);
 513
 514        /*
 515         * If common part matched earlier then it is a hit,
 516         * because we rejected the case where path is not a
 517         * leading directory and is shorter than match.
 518         */
 519        if (!m)
 520                return 1;
 521
 522        return 0;
 523}
 524
 525static int match_dir_prefix(const char *base, int baselen,
 526                            const char *match, int matchlen)
 527{
 528        if (strncmp(base, match, matchlen))
 529                return 0;
 530
 531        /*
 532         * If the base is a subdirectory of a path which
 533         * was specified, all of them are interesting.
 534         */
 535        if (!matchlen ||
 536            base[matchlen] == '/' ||
 537            match[matchlen - 1] == '/')
 538                return 1;
 539
 540        /* Just a random prefix match */
 541        return 0;
 542}
 543
 544/*
 545 * Is a tree entry interesting given the pathspec we have?
 546 *
 547 * Pre-condition: either baselen == base_offset (i.e. empty path)
 548 * or base[baselen-1] == '/' (i.e. with trailing slash).
 549 *
 550 * Return:
 551 *  - 2 for "yes, and all subsequent entries will be"
 552 *  - 1 for yes
 553 *  - zero for no
 554 *  - negative for "no, and no subsequent entries will be either"
 555 */
 556int tree_entry_interesting(const struct name_entry *entry,
 557                           struct strbuf *base, int base_offset,
 558                           const struct pathspec *ps)
 559{
 560        int i;
 561        int pathlen, baselen = base->len - base_offset;
 562        int never_interesting = ps->has_wildcard ? 0 : -1;
 563
 564        if (!ps->nr) {
 565                if (!ps->recursive || ps->max_depth == -1)
 566                        return 2;
 567                return !!within_depth(base->buf + base_offset, baselen,
 568                                      !!S_ISDIR(entry->mode),
 569                                      ps->max_depth);
 570        }
 571
 572        pathlen = tree_entry_len(entry->path, entry->sha1);
 573
 574        for (i = ps->nr - 1; i >= 0; i--) {
 575                const struct pathspec_item *item = ps->items+i;
 576                const char *match = item->match;
 577                const char *base_str = base->buf + base_offset;
 578                int matchlen = item->len;
 579
 580                if (baselen >= matchlen) {
 581                        /* If it doesn't match, move along... */
 582                        if (!match_dir_prefix(base_str, baselen, match, matchlen))
 583                                goto match_wildcards;
 584
 585                        if (!ps->recursive || ps->max_depth == -1)
 586                                return 2;
 587
 588                        return !!within_depth(base_str + matchlen + 1,
 589                                              baselen - matchlen - 1,
 590                                              !!S_ISDIR(entry->mode),
 591                                              ps->max_depth);
 592                }
 593
 594                /* Does the base match? */
 595                if (!strncmp(base_str, match, baselen)) {
 596                        if (match_entry(entry, pathlen,
 597                                        match + baselen, matchlen - baselen,
 598                                        &never_interesting))
 599                                return 1;
 600
 601                        if (ps->items[i].use_wildcard) {
 602                                if (!fnmatch(match + baselen, entry->path, 0))
 603                                        return 1;
 604
 605                                /*
 606                                 * Match all directories. We'll try to
 607                                 * match files later on.
 608                                 */
 609                                if (ps->recursive && S_ISDIR(entry->mode))
 610                                        return 1;
 611                        }
 612
 613                        continue;
 614                }
 615
 616match_wildcards:
 617                if (!ps->items[i].use_wildcard)
 618                        continue;
 619
 620                /*
 621                 * Concatenate base and entry->path into one and do
 622                 * fnmatch() on it.
 623                 */
 624
 625                strbuf_add(base, entry->path, pathlen);
 626
 627                if (!fnmatch(match, base->buf + base_offset, 0)) {
 628                        strbuf_setlen(base, base_offset + baselen);
 629                        return 1;
 630                }
 631                strbuf_setlen(base, base_offset + baselen);
 632
 633                /*
 634                 * Match all directories. We'll try to match files
 635                 * later on.
 636                 */
 637                if (ps->recursive && S_ISDIR(entry->mode))
 638                        return 1;
 639        }
 640        return never_interesting; /* No matches */
 641}