builtin-rev-list.con commit builtin-fsck: reports missing parent commits (4516338)
   1#include "cache.h"
   2#include "refs.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "tree-walk.h"
   8#include "diff.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "builtin.h"
  12#include "log-tree.h"
  13
  14/* bits #0-15 in revision.h */
  15
  16#define COUNTED         (1u<<16)
  17
  18static const char rev_list_usage[] =
  19"git-rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
  20"  limiting output:\n"
  21"    --max-count=nr\n"
  22"    --max-age=epoch\n"
  23"    --min-age=epoch\n"
  24"    --sparse\n"
  25"    --no-merges\n"
  26"    --remove-empty\n"
  27"    --all\n"
  28"    --stdin\n"
  29"    --quiet\n"
  30"  ordering output:\n"
  31"    --topo-order\n"
  32"    --date-order\n"
  33"  formatting output:\n"
  34"    --parents\n"
  35"    --objects | --objects-edge\n"
  36"    --unpacked\n"
  37"    --header | --pretty\n"
  38"    --abbrev=nr | --no-abbrev\n"
  39"    --abbrev-commit\n"
  40"    --left-right\n"
  41"  special purpose:\n"
  42"    --bisect\n"
  43"    --bisect-vars\n"
  44"    --bisect-all"
  45;
  46
  47static struct rev_info revs;
  48
  49static int bisect_list;
  50static int show_timestamp;
  51static int hdr_termination;
  52static const char *header_prefix;
  53
  54static void finish_commit(struct commit *commit);
  55static void show_commit(struct commit *commit)
  56{
  57        if (show_timestamp)
  58                printf("%lu ", commit->date);
  59        if (header_prefix)
  60                fputs(header_prefix, stdout);
  61        if (commit->object.flags & BOUNDARY)
  62                putchar('-');
  63        else if (revs.left_right) {
  64                if (commit->object.flags & SYMMETRIC_LEFT)
  65                        putchar('<');
  66                else
  67                        putchar('>');
  68        }
  69        if (revs.abbrev_commit && revs.abbrev)
  70                fputs(find_unique_abbrev(commit->object.sha1, revs.abbrev),
  71                      stdout);
  72        else
  73                fputs(sha1_to_hex(commit->object.sha1), stdout);
  74        if (revs.parents) {
  75                struct commit_list *parents = commit->parents;
  76                while (parents) {
  77                        printf(" %s", sha1_to_hex(parents->item->object.sha1));
  78                        parents = parents->next;
  79                }
  80        }
  81        show_decorations(commit);
  82        if (revs.commit_format == CMIT_FMT_ONELINE)
  83                putchar(' ');
  84        else
  85                putchar('\n');
  86
  87        if (revs.verbose_header) {
  88                struct strbuf buf;
  89                strbuf_init(&buf, 0);
  90                pretty_print_commit(revs.commit_format, commit,
  91                                    &buf, revs.abbrev, NULL, NULL,
  92                                    revs.date_mode, 0);
  93                if (buf.len)
  94                        printf("%s%c", buf.buf, hdr_termination);
  95                strbuf_release(&buf);
  96        }
  97        maybe_flush_or_die(stdout, "stdout");
  98        finish_commit(commit);
  99}
 100
 101static void finish_commit(struct commit *commit)
 102{
 103        if (commit->parents) {
 104                free_commit_list(commit->parents);
 105                commit->parents = NULL;
 106        }
 107        free(commit->buffer);
 108        commit->buffer = NULL;
 109}
 110
 111static void finish_object(struct object_array_entry *p)
 112{
 113        if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
 114                die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
 115}
 116
 117static void show_object(struct object_array_entry *p)
 118{
 119        /* An object with name "foo\n0000000..." can be used to
 120         * confuse downstream git-pack-objects very badly.
 121         */
 122        const char *ep = strchr(p->name, '\n');
 123
 124        finish_object(p);
 125        if (ep) {
 126                printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
 127                       (int) (ep - p->name),
 128                       p->name);
 129        }
 130        else
 131                printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
 132}
 133
 134static void show_edge(struct commit *commit)
 135{
 136        printf("-%s\n", sha1_to_hex(commit->object.sha1));
 137}
 138
 139/*
 140 * This is a truly stupid algorithm, but it's only
 141 * used for bisection, and we just don't care enough.
 142 *
 143 * We care just barely enough to avoid recursing for
 144 * non-merge entries.
 145 */
 146static int count_distance(struct commit_list *entry)
 147{
 148        int nr = 0;
 149
 150        while (entry) {
 151                struct commit *commit = entry->item;
 152                struct commit_list *p;
 153
 154                if (commit->object.flags & (UNINTERESTING | COUNTED))
 155                        break;
 156                if (!(commit->object.flags & TREESAME))
 157                        nr++;
 158                commit->object.flags |= COUNTED;
 159                p = commit->parents;
 160                entry = p;
 161                if (p) {
 162                        p = p->next;
 163                        while (p) {
 164                                nr += count_distance(p);
 165                                p = p->next;
 166                        }
 167                }
 168        }
 169
 170        return nr;
 171}
 172
 173static void clear_distance(struct commit_list *list)
 174{
 175        while (list) {
 176                struct commit *commit = list->item;
 177                commit->object.flags &= ~COUNTED;
 178                list = list->next;
 179        }
 180}
 181
 182#define DEBUG_BISECT 0
 183
 184static inline int weight(struct commit_list *elem)
 185{
 186        return *((int*)(elem->item->util));
 187}
 188
 189static inline void weight_set(struct commit_list *elem, int weight)
 190{
 191        *((int*)(elem->item->util)) = weight;
 192}
 193
 194static int count_interesting_parents(struct commit *commit)
 195{
 196        struct commit_list *p;
 197        int count;
 198
 199        for (count = 0, p = commit->parents; p; p = p->next) {
 200                if (p->item->object.flags & UNINTERESTING)
 201                        continue;
 202                count++;
 203        }
 204        return count;
 205}
 206
 207static inline int halfway(struct commit_list *p, int nr)
 208{
 209        /*
 210         * Don't short-cut something we are not going to return!
 211         */
 212        if (p->item->object.flags & TREESAME)
 213                return 0;
 214        if (DEBUG_BISECT)
 215                return 0;
 216        /*
 217         * 2 and 3 are halfway of 5.
 218         * 3 is halfway of 6 but 2 and 4 are not.
 219         */
 220        switch (2 * weight(p) - nr) {
 221        case -1: case 0: case 1:
 222                return 1;
 223        default:
 224                return 0;
 225        }
 226}
 227
 228#if !DEBUG_BISECT
 229#define show_list(a,b,c,d) do { ; } while (0)
 230#else
 231static void show_list(const char *debug, int counted, int nr,
 232                      struct commit_list *list)
 233{
 234        struct commit_list *p;
 235
 236        fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
 237
 238        for (p = list; p; p = p->next) {
 239                struct commit_list *pp;
 240                struct commit *commit = p->item;
 241                unsigned flags = commit->object.flags;
 242                enum object_type type;
 243                unsigned long size;
 244                char *buf = read_sha1_file(commit->object.sha1, &type, &size);
 245                char *ep, *sp;
 246
 247                fprintf(stderr, "%c%c%c ",
 248                        (flags & TREESAME) ? ' ' : 'T',
 249                        (flags & UNINTERESTING) ? 'U' : ' ',
 250                        (flags & COUNTED) ? 'C' : ' ');
 251                if (commit->util)
 252                        fprintf(stderr, "%3d", weight(p));
 253                else
 254                        fprintf(stderr, "---");
 255                fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
 256                for (pp = commit->parents; pp; pp = pp->next)
 257                        fprintf(stderr, " %.*s", 8,
 258                                sha1_to_hex(pp->item->object.sha1));
 259
 260                sp = strstr(buf, "\n\n");
 261                if (sp) {
 262                        sp += 2;
 263                        for (ep = sp; *ep && *ep != '\n'; ep++)
 264                                ;
 265                        fprintf(stderr, " %.*s", (int)(ep - sp), sp);
 266                }
 267                fprintf(stderr, "\n");
 268        }
 269}
 270#endif /* DEBUG_BISECT */
 271
 272static struct commit_list *best_bisection(struct commit_list *list, int nr)
 273{
 274        struct commit_list *p, *best;
 275        int best_distance = -1;
 276
 277        best = list;
 278        for (p = list; p; p = p->next) {
 279                int distance;
 280                unsigned flags = p->item->object.flags;
 281
 282                if (flags & TREESAME)
 283                        continue;
 284                distance = weight(p);
 285                if (nr - distance < distance)
 286                        distance = nr - distance;
 287                if (distance > best_distance) {
 288                        best = p;
 289                        best_distance = distance;
 290                }
 291        }
 292
 293        return best;
 294}
 295
 296struct commit_dist {
 297        struct commit *commit;
 298        int distance;
 299};
 300
 301static int compare_commit_dist(const void *a_, const void *b_)
 302{
 303        struct commit_dist *a, *b;
 304
 305        a = (struct commit_dist *)a_;
 306        b = (struct commit_dist *)b_;
 307        if (a->distance != b->distance)
 308                return b->distance - a->distance; /* desc sort */
 309        return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 310}
 311
 312static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
 313{
 314        struct commit_list *p;
 315        struct commit_dist *array = xcalloc(nr, sizeof(*array));
 316        int cnt, i;
 317
 318        for (p = list, cnt = 0; p; p = p->next) {
 319                int distance;
 320                unsigned flags = p->item->object.flags;
 321
 322                if (flags & TREESAME)
 323                        continue;
 324                distance = weight(p);
 325                if (nr - distance < distance)
 326                        distance = nr - distance;
 327                array[cnt].commit = p->item;
 328                array[cnt].distance = distance;
 329                cnt++;
 330        }
 331        qsort(array, cnt, sizeof(*array), compare_commit_dist);
 332        for (p = list, i = 0; i < cnt; i++) {
 333                struct name_decoration *r = xmalloc(sizeof(*r) + 100);
 334                struct object *obj = &(array[i].commit->object);
 335
 336                sprintf(r->name, "dist=%d", array[i].distance);
 337                r->next = add_decoration(&name_decoration, obj, r);
 338                p->item = array[i].commit;
 339                p = p->next;
 340        }
 341        if (p)
 342                p->next = NULL;
 343        free(array);
 344        return list;
 345}
 346
 347/*
 348 * zero or positive weight is the number of interesting commits it can
 349 * reach, including itself.  Especially, weight = 0 means it does not
 350 * reach any tree-changing commits (e.g. just above uninteresting one
 351 * but traversal is with pathspec).
 352 *
 353 * weight = -1 means it has one parent and its distance is yet to
 354 * be computed.
 355 *
 356 * weight = -2 means it has more than one parent and its distance is
 357 * unknown.  After running count_distance() first, they will get zero
 358 * or positive distance.
 359 */
 360static struct commit_list *do_find_bisection(struct commit_list *list,
 361                                             int nr, int *weights,
 362                                             int find_all)
 363{
 364        int n, counted;
 365        struct commit_list *p;
 366
 367        counted = 0;
 368
 369        for (n = 0, p = list; p; p = p->next) {
 370                struct commit *commit = p->item;
 371                unsigned flags = commit->object.flags;
 372
 373                p->item->util = &weights[n++];
 374                switch (count_interesting_parents(commit)) {
 375                case 0:
 376                        if (!(flags & TREESAME)) {
 377                                weight_set(p, 1);
 378                                counted++;
 379                                show_list("bisection 2 count one",
 380                                          counted, nr, list);
 381                        }
 382                        /*
 383                         * otherwise, it is known not to reach any
 384                         * tree-changing commit and gets weight 0.
 385                         */
 386                        break;
 387                case 1:
 388                        weight_set(p, -1);
 389                        break;
 390                default:
 391                        weight_set(p, -2);
 392                        break;
 393                }
 394        }
 395
 396        show_list("bisection 2 initialize", counted, nr, list);
 397
 398        /*
 399         * If you have only one parent in the resulting set
 400         * then you can reach one commit more than that parent
 401         * can reach.  So we do not have to run the expensive
 402         * count_distance() for single strand of pearls.
 403         *
 404         * However, if you have more than one parents, you cannot
 405         * just add their distance and one for yourself, since
 406         * they usually reach the same ancestor and you would
 407         * end up counting them twice that way.
 408         *
 409         * So we will first count distance of merges the usual
 410         * way, and then fill the blanks using cheaper algorithm.
 411         */
 412        for (p = list; p; p = p->next) {
 413                if (p->item->object.flags & UNINTERESTING)
 414                        continue;
 415                if (weight(p) != -2)
 416                        continue;
 417                weight_set(p, count_distance(p));
 418                clear_distance(list);
 419
 420                /* Does it happen to be at exactly half-way? */
 421                if (!find_all && halfway(p, nr))
 422                        return p;
 423                counted++;
 424        }
 425
 426        show_list("bisection 2 count_distance", counted, nr, list);
 427
 428        while (counted < nr) {
 429                for (p = list; p; p = p->next) {
 430                        struct commit_list *q;
 431                        unsigned flags = p->item->object.flags;
 432
 433                        if (0 <= weight(p))
 434                                continue;
 435                        for (q = p->item->parents; q; q = q->next) {
 436                                if (q->item->object.flags & UNINTERESTING)
 437                                        continue;
 438                                if (0 <= weight(q))
 439                                        break;
 440                        }
 441                        if (!q)
 442                                continue;
 443
 444                        /*
 445                         * weight for p is unknown but q is known.
 446                         * add one for p itself if p is to be counted,
 447                         * otherwise inherit it from q directly.
 448                         */
 449                        if (!(flags & TREESAME)) {
 450                                weight_set(p, weight(q)+1);
 451                                counted++;
 452                                show_list("bisection 2 count one",
 453                                          counted, nr, list);
 454                        }
 455                        else
 456                                weight_set(p, weight(q));
 457
 458                        /* Does it happen to be at exactly half-way? */
 459                        if (!find_all && halfway(p, nr))
 460                                return p;
 461                }
 462        }
 463
 464        show_list("bisection 2 counted all", counted, nr, list);
 465
 466        if (!find_all)
 467                return best_bisection(list, nr);
 468        else
 469                return best_bisection_sorted(list, nr);
 470}
 471
 472static struct commit_list *find_bisection(struct commit_list *list,
 473                                          int *reaches, int *all,
 474                                          int find_all)
 475{
 476        int nr, on_list;
 477        struct commit_list *p, *best, *next, *last;
 478        int *weights;
 479
 480        show_list("bisection 2 entry", 0, 0, list);
 481
 482        /*
 483         * Count the number of total and tree-changing items on the
 484         * list, while reversing the list.
 485         */
 486        for (nr = on_list = 0, last = NULL, p = list;
 487             p;
 488             p = next) {
 489                unsigned flags = p->item->object.flags;
 490
 491                next = p->next;
 492                if (flags & UNINTERESTING)
 493                        continue;
 494                p->next = last;
 495                last = p;
 496                if (!(flags & TREESAME))
 497                        nr++;
 498                on_list++;
 499        }
 500        list = last;
 501        show_list("bisection 2 sorted", 0, nr, list);
 502
 503        *all = nr;
 504        weights = xcalloc(on_list, sizeof(*weights));
 505
 506        /* Do the real work of finding bisection commit. */
 507        best = do_find_bisection(list, nr, weights, find_all);
 508        if (best) {
 509                if (!find_all)
 510                        best->next = NULL;
 511                *reaches = weight(best);
 512        }
 513        free(weights);
 514        return best;
 515}
 516
 517static void read_revisions_from_stdin(struct rev_info *revs)
 518{
 519        char line[1000];
 520
 521        while (fgets(line, sizeof(line), stdin) != NULL) {
 522                int len = strlen(line);
 523                if (len && line[len - 1] == '\n')
 524                        line[--len] = 0;
 525                if (!len)
 526                        break;
 527                if (line[0] == '-')
 528                        die("options not supported in --stdin mode");
 529                if (handle_revision_arg(line, revs, 0, 1))
 530                        die("bad revision '%s'", line);
 531        }
 532}
 533
 534int cmd_rev_list(int argc, const char **argv, const char *prefix)
 535{
 536        struct commit_list *list;
 537        int i;
 538        int read_from_stdin = 0;
 539        int bisect_show_vars = 0;
 540        int bisect_find_all = 0;
 541        int quiet = 0;
 542
 543        git_config(git_default_config);
 544        init_revisions(&revs, prefix);
 545        revs.abbrev = 0;
 546        revs.commit_format = CMIT_FMT_UNSPECIFIED;
 547        argc = setup_revisions(argc, argv, &revs, NULL);
 548
 549        for (i = 1 ; i < argc; i++) {
 550                const char *arg = argv[i];
 551
 552                if (!strcmp(arg, "--header")) {
 553                        revs.verbose_header = 1;
 554                        continue;
 555                }
 556                if (!strcmp(arg, "--timestamp")) {
 557                        show_timestamp = 1;
 558                        continue;
 559                }
 560                if (!strcmp(arg, "--bisect")) {
 561                        bisect_list = 1;
 562                        continue;
 563                }
 564                if (!strcmp(arg, "--bisect-all")) {
 565                        bisect_list = 1;
 566                        bisect_find_all = 1;
 567                        continue;
 568                }
 569                if (!strcmp(arg, "--bisect-vars")) {
 570                        bisect_list = 1;
 571                        bisect_show_vars = 1;
 572                        continue;
 573                }
 574                if (!strcmp(arg, "--stdin")) {
 575                        if (read_from_stdin++)
 576                                die("--stdin given twice?");
 577                        read_revisions_from_stdin(&revs);
 578                        continue;
 579                }
 580                if (!strcmp(arg, "--quiet")) {
 581                        quiet = 1;
 582                        continue;
 583                }
 584                usage(rev_list_usage);
 585
 586        }
 587        if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
 588                /* The command line has a --pretty  */
 589                hdr_termination = '\n';
 590                if (revs.commit_format == CMIT_FMT_ONELINE)
 591                        header_prefix = "";
 592                else
 593                        header_prefix = "commit ";
 594        }
 595        else if (revs.verbose_header)
 596                /* Only --header was specified */
 597                revs.commit_format = CMIT_FMT_RAW;
 598
 599        list = revs.commits;
 600
 601        if ((!list &&
 602             (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
 603              !revs.pending.nr)) ||
 604            revs.diff)
 605                usage(rev_list_usage);
 606
 607        save_commit_buffer = revs.verbose_header || revs.grep_filter;
 608        if (bisect_list)
 609                revs.limited = 1;
 610
 611        prepare_revision_walk(&revs);
 612        if (revs.tree_objects)
 613                mark_edges_uninteresting(revs.commits, &revs, show_edge);
 614
 615        if (bisect_list) {
 616                int reaches = reaches, all = all;
 617
 618                revs.commits = find_bisection(revs.commits, &reaches, &all,
 619                                              bisect_find_all);
 620                if (bisect_show_vars) {
 621                        int cnt;
 622                        char hex[41];
 623                        if (!revs.commits)
 624                                return 1;
 625                        /*
 626                         * revs.commits can reach "reaches" commits among
 627                         * "all" commits.  If it is good, then there are
 628                         * (all-reaches) commits left to be bisected.
 629                         * On the other hand, if it is bad, then the set
 630                         * to bisect is "reaches".
 631                         * A bisect set of size N has (N-1) commits further
 632                         * to test, as we already know one bad one.
 633                         */
 634                        cnt = all - reaches;
 635                        if (cnt < reaches)
 636                                cnt = reaches;
 637                        strcpy(hex, sha1_to_hex(revs.commits->item->object.sha1));
 638
 639                        if (bisect_find_all) {
 640                                traverse_commit_list(&revs, show_commit, show_object);
 641                                printf("------\n");
 642                        }
 643
 644                        printf("bisect_rev=%s\n"
 645                               "bisect_nr=%d\n"
 646                               "bisect_good=%d\n"
 647                               "bisect_bad=%d\n"
 648                               "bisect_all=%d\n",
 649                               hex,
 650                               cnt - 1,
 651                               all - reaches - 1,
 652                               reaches - 1,
 653                               all);
 654                        return 0;
 655                }
 656        }
 657
 658        traverse_commit_list(&revs,
 659                quiet ? finish_commit : show_commit,
 660                quiet ? finish_object : show_object);
 661
 662        return 0;
 663}