builtin-rev-list.con commit sha1-lookup: make selection of 'middle' less aggressive (12ecb01)
   1#include "cache.h"
   2#include "refs.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "tree-walk.h"
   8#include "diff.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "builtin.h"
  12#include "log-tree.h"
  13
  14/* bits #0-15 in revision.h */
  15
  16#define COUNTED         (1u<<16)
  17
  18static const char rev_list_usage[] =
  19"git-rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
  20"  limiting output:\n"
  21"    --max-count=nr\n"
  22"    --max-age=epoch\n"
  23"    --min-age=epoch\n"
  24"    --sparse\n"
  25"    --no-merges\n"
  26"    --remove-empty\n"
  27"    --all\n"
  28"    --branches\n"
  29"    --tags\n"
  30"    --remotes\n"
  31"    --stdin\n"
  32"    --quiet\n"
  33"  ordering output:\n"
  34"    --topo-order\n"
  35"    --date-order\n"
  36"    --reverse\n"
  37"  formatting output:\n"
  38"    --parents\n"
  39"    --objects | --objects-edge\n"
  40"    --unpacked\n"
  41"    --header | --pretty\n"
  42"    --abbrev=nr | --no-abbrev\n"
  43"    --abbrev-commit\n"
  44"    --left-right\n"
  45"  special purpose:\n"
  46"    --bisect\n"
  47"    --bisect-vars\n"
  48"    --bisect-all"
  49;
  50
  51static struct rev_info revs;
  52
  53static int bisect_list;
  54static int show_timestamp;
  55static int hdr_termination;
  56static const char *header_prefix;
  57
  58static void finish_commit(struct commit *commit);
  59static void show_commit(struct commit *commit)
  60{
  61        if (show_timestamp)
  62                printf("%lu ", commit->date);
  63        if (header_prefix)
  64                fputs(header_prefix, stdout);
  65        if (commit->object.flags & BOUNDARY)
  66                putchar('-');
  67        else if (commit->object.flags & UNINTERESTING)
  68                putchar('^');
  69        else if (revs.left_right) {
  70                if (commit->object.flags & SYMMETRIC_LEFT)
  71                        putchar('<');
  72                else
  73                        putchar('>');
  74        }
  75        if (revs.abbrev_commit && revs.abbrev)
  76                fputs(find_unique_abbrev(commit->object.sha1, revs.abbrev),
  77                      stdout);
  78        else
  79                fputs(sha1_to_hex(commit->object.sha1), stdout);
  80        if (revs.parents) {
  81                struct commit_list *parents = commit->parents;
  82                while (parents) {
  83                        printf(" %s", sha1_to_hex(parents->item->object.sha1));
  84                        parents = parents->next;
  85                }
  86        }
  87        show_decorations(commit);
  88        if (revs.commit_format == CMIT_FMT_ONELINE)
  89                putchar(' ');
  90        else
  91                putchar('\n');
  92
  93        if (revs.verbose_header && commit->buffer) {
  94                struct strbuf buf;
  95                strbuf_init(&buf, 0);
  96                pretty_print_commit(revs.commit_format, commit,
  97                                    &buf, revs.abbrev, NULL, NULL,
  98                                    revs.date_mode, 0);
  99                if (buf.len)
 100                        printf("%s%c", buf.buf, hdr_termination);
 101                strbuf_release(&buf);
 102        }
 103        maybe_flush_or_die(stdout, "stdout");
 104        finish_commit(commit);
 105}
 106
 107static void finish_commit(struct commit *commit)
 108{
 109        if (commit->parents) {
 110                free_commit_list(commit->parents);
 111                commit->parents = NULL;
 112        }
 113        free(commit->buffer);
 114        commit->buffer = NULL;
 115}
 116
 117static void finish_object(struct object_array_entry *p)
 118{
 119        if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
 120                die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
 121}
 122
 123static void show_object(struct object_array_entry *p)
 124{
 125        /* An object with name "foo\n0000000..." can be used to
 126         * confuse downstream git-pack-objects very badly.
 127         */
 128        const char *ep = strchr(p->name, '\n');
 129
 130        finish_object(p);
 131        if (ep) {
 132                printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
 133                       (int) (ep - p->name),
 134                       p->name);
 135        }
 136        else
 137                printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
 138}
 139
 140static void show_edge(struct commit *commit)
 141{
 142        printf("-%s\n", sha1_to_hex(commit->object.sha1));
 143}
 144
 145/*
 146 * This is a truly stupid algorithm, but it's only
 147 * used for bisection, and we just don't care enough.
 148 *
 149 * We care just barely enough to avoid recursing for
 150 * non-merge entries.
 151 */
 152static int count_distance(struct commit_list *entry)
 153{
 154        int nr = 0;
 155
 156        while (entry) {
 157                struct commit *commit = entry->item;
 158                struct commit_list *p;
 159
 160                if (commit->object.flags & (UNINTERESTING | COUNTED))
 161                        break;
 162                if (!(commit->object.flags & TREESAME))
 163                        nr++;
 164                commit->object.flags |= COUNTED;
 165                p = commit->parents;
 166                entry = p;
 167                if (p) {
 168                        p = p->next;
 169                        while (p) {
 170                                nr += count_distance(p);
 171                                p = p->next;
 172                        }
 173                }
 174        }
 175
 176        return nr;
 177}
 178
 179static void clear_distance(struct commit_list *list)
 180{
 181        while (list) {
 182                struct commit *commit = list->item;
 183                commit->object.flags &= ~COUNTED;
 184                list = list->next;
 185        }
 186}
 187
 188#define DEBUG_BISECT 0
 189
 190static inline int weight(struct commit_list *elem)
 191{
 192        return *((int*)(elem->item->util));
 193}
 194
 195static inline void weight_set(struct commit_list *elem, int weight)
 196{
 197        *((int*)(elem->item->util)) = weight;
 198}
 199
 200static int count_interesting_parents(struct commit *commit)
 201{
 202        struct commit_list *p;
 203        int count;
 204
 205        for (count = 0, p = commit->parents; p; p = p->next) {
 206                if (p->item->object.flags & UNINTERESTING)
 207                        continue;
 208                count++;
 209        }
 210        return count;
 211}
 212
 213static inline int halfway(struct commit_list *p, int nr)
 214{
 215        /*
 216         * Don't short-cut something we are not going to return!
 217         */
 218        if (p->item->object.flags & TREESAME)
 219                return 0;
 220        if (DEBUG_BISECT)
 221                return 0;
 222        /*
 223         * 2 and 3 are halfway of 5.
 224         * 3 is halfway of 6 but 2 and 4 are not.
 225         */
 226        switch (2 * weight(p) - nr) {
 227        case -1: case 0: case 1:
 228                return 1;
 229        default:
 230                return 0;
 231        }
 232}
 233
 234#if !DEBUG_BISECT
 235#define show_list(a,b,c,d) do { ; } while (0)
 236#else
 237static void show_list(const char *debug, int counted, int nr,
 238                      struct commit_list *list)
 239{
 240        struct commit_list *p;
 241
 242        fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
 243
 244        for (p = list; p; p = p->next) {
 245                struct commit_list *pp;
 246                struct commit *commit = p->item;
 247                unsigned flags = commit->object.flags;
 248                enum object_type type;
 249                unsigned long size;
 250                char *buf = read_sha1_file(commit->object.sha1, &type, &size);
 251                char *ep, *sp;
 252
 253                fprintf(stderr, "%c%c%c ",
 254                        (flags & TREESAME) ? ' ' : 'T',
 255                        (flags & UNINTERESTING) ? 'U' : ' ',
 256                        (flags & COUNTED) ? 'C' : ' ');
 257                if (commit->util)
 258                        fprintf(stderr, "%3d", weight(p));
 259                else
 260                        fprintf(stderr, "---");
 261                fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
 262                for (pp = commit->parents; pp; pp = pp->next)
 263                        fprintf(stderr, " %.*s", 8,
 264                                sha1_to_hex(pp->item->object.sha1));
 265
 266                sp = strstr(buf, "\n\n");
 267                if (sp) {
 268                        sp += 2;
 269                        for (ep = sp; *ep && *ep != '\n'; ep++)
 270                                ;
 271                        fprintf(stderr, " %.*s", (int)(ep - sp), sp);
 272                }
 273                fprintf(stderr, "\n");
 274        }
 275}
 276#endif /* DEBUG_BISECT */
 277
 278static struct commit_list *best_bisection(struct commit_list *list, int nr)
 279{
 280        struct commit_list *p, *best;
 281        int best_distance = -1;
 282
 283        best = list;
 284        for (p = list; p; p = p->next) {
 285                int distance;
 286                unsigned flags = p->item->object.flags;
 287
 288                if (flags & TREESAME)
 289                        continue;
 290                distance = weight(p);
 291                if (nr - distance < distance)
 292                        distance = nr - distance;
 293                if (distance > best_distance) {
 294                        best = p;
 295                        best_distance = distance;
 296                }
 297        }
 298
 299        return best;
 300}
 301
 302struct commit_dist {
 303        struct commit *commit;
 304        int distance;
 305};
 306
 307static int compare_commit_dist(const void *a_, const void *b_)
 308{
 309        struct commit_dist *a, *b;
 310
 311        a = (struct commit_dist *)a_;
 312        b = (struct commit_dist *)b_;
 313        if (a->distance != b->distance)
 314                return b->distance - a->distance; /* desc sort */
 315        return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 316}
 317
 318static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
 319{
 320        struct commit_list *p;
 321        struct commit_dist *array = xcalloc(nr, sizeof(*array));
 322        int cnt, i;
 323
 324        for (p = list, cnt = 0; p; p = p->next) {
 325                int distance;
 326                unsigned flags = p->item->object.flags;
 327
 328                if (flags & TREESAME)
 329                        continue;
 330                distance = weight(p);
 331                if (nr - distance < distance)
 332                        distance = nr - distance;
 333                array[cnt].commit = p->item;
 334                array[cnt].distance = distance;
 335                cnt++;
 336        }
 337        qsort(array, cnt, sizeof(*array), compare_commit_dist);
 338        for (p = list, i = 0; i < cnt; i++) {
 339                struct name_decoration *r = xmalloc(sizeof(*r) + 100);
 340                struct object *obj = &(array[i].commit->object);
 341
 342                sprintf(r->name, "dist=%d", array[i].distance);
 343                r->next = add_decoration(&name_decoration, obj, r);
 344                p->item = array[i].commit;
 345                p = p->next;
 346        }
 347        if (p)
 348                p->next = NULL;
 349        free(array);
 350        return list;
 351}
 352
 353/*
 354 * zero or positive weight is the number of interesting commits it can
 355 * reach, including itself.  Especially, weight = 0 means it does not
 356 * reach any tree-changing commits (e.g. just above uninteresting one
 357 * but traversal is with pathspec).
 358 *
 359 * weight = -1 means it has one parent and its distance is yet to
 360 * be computed.
 361 *
 362 * weight = -2 means it has more than one parent and its distance is
 363 * unknown.  After running count_distance() first, they will get zero
 364 * or positive distance.
 365 */
 366static struct commit_list *do_find_bisection(struct commit_list *list,
 367                                             int nr, int *weights,
 368                                             int find_all)
 369{
 370        int n, counted;
 371        struct commit_list *p;
 372
 373        counted = 0;
 374
 375        for (n = 0, p = list; p; p = p->next) {
 376                struct commit *commit = p->item;
 377                unsigned flags = commit->object.flags;
 378
 379                p->item->util = &weights[n++];
 380                switch (count_interesting_parents(commit)) {
 381                case 0:
 382                        if (!(flags & TREESAME)) {
 383                                weight_set(p, 1);
 384                                counted++;
 385                                show_list("bisection 2 count one",
 386                                          counted, nr, list);
 387                        }
 388                        /*
 389                         * otherwise, it is known not to reach any
 390                         * tree-changing commit and gets weight 0.
 391                         */
 392                        break;
 393                case 1:
 394                        weight_set(p, -1);
 395                        break;
 396                default:
 397                        weight_set(p, -2);
 398                        break;
 399                }
 400        }
 401
 402        show_list("bisection 2 initialize", counted, nr, list);
 403
 404        /*
 405         * If you have only one parent in the resulting set
 406         * then you can reach one commit more than that parent
 407         * can reach.  So we do not have to run the expensive
 408         * count_distance() for single strand of pearls.
 409         *
 410         * However, if you have more than one parents, you cannot
 411         * just add their distance and one for yourself, since
 412         * they usually reach the same ancestor and you would
 413         * end up counting them twice that way.
 414         *
 415         * So we will first count distance of merges the usual
 416         * way, and then fill the blanks using cheaper algorithm.
 417         */
 418        for (p = list; p; p = p->next) {
 419                if (p->item->object.flags & UNINTERESTING)
 420                        continue;
 421                if (weight(p) != -2)
 422                        continue;
 423                weight_set(p, count_distance(p));
 424                clear_distance(list);
 425
 426                /* Does it happen to be at exactly half-way? */
 427                if (!find_all && halfway(p, nr))
 428                        return p;
 429                counted++;
 430        }
 431
 432        show_list("bisection 2 count_distance", counted, nr, list);
 433
 434        while (counted < nr) {
 435                for (p = list; p; p = p->next) {
 436                        struct commit_list *q;
 437                        unsigned flags = p->item->object.flags;
 438
 439                        if (0 <= weight(p))
 440                                continue;
 441                        for (q = p->item->parents; q; q = q->next) {
 442                                if (q->item->object.flags & UNINTERESTING)
 443                                        continue;
 444                                if (0 <= weight(q))
 445                                        break;
 446                        }
 447                        if (!q)
 448                                continue;
 449
 450                        /*
 451                         * weight for p is unknown but q is known.
 452                         * add one for p itself if p is to be counted,
 453                         * otherwise inherit it from q directly.
 454                         */
 455                        if (!(flags & TREESAME)) {
 456                                weight_set(p, weight(q)+1);
 457                                counted++;
 458                                show_list("bisection 2 count one",
 459                                          counted, nr, list);
 460                        }
 461                        else
 462                                weight_set(p, weight(q));
 463
 464                        /* Does it happen to be at exactly half-way? */
 465                        if (!find_all && halfway(p, nr))
 466                                return p;
 467                }
 468        }
 469
 470        show_list("bisection 2 counted all", counted, nr, list);
 471
 472        if (!find_all)
 473                return best_bisection(list, nr);
 474        else
 475                return best_bisection_sorted(list, nr);
 476}
 477
 478static struct commit_list *find_bisection(struct commit_list *list,
 479                                          int *reaches, int *all,
 480                                          int find_all)
 481{
 482        int nr, on_list;
 483        struct commit_list *p, *best, *next, *last;
 484        int *weights;
 485
 486        show_list("bisection 2 entry", 0, 0, list);
 487
 488        /*
 489         * Count the number of total and tree-changing items on the
 490         * list, while reversing the list.
 491         */
 492        for (nr = on_list = 0, last = NULL, p = list;
 493             p;
 494             p = next) {
 495                unsigned flags = p->item->object.flags;
 496
 497                next = p->next;
 498                if (flags & UNINTERESTING)
 499                        continue;
 500                p->next = last;
 501                last = p;
 502                if (!(flags & TREESAME))
 503                        nr++;
 504                on_list++;
 505        }
 506        list = last;
 507        show_list("bisection 2 sorted", 0, nr, list);
 508
 509        *all = nr;
 510        weights = xcalloc(on_list, sizeof(*weights));
 511
 512        /* Do the real work of finding bisection commit. */
 513        best = do_find_bisection(list, nr, weights, find_all);
 514        if (best) {
 515                if (!find_all)
 516                        best->next = NULL;
 517                *reaches = weight(best);
 518        }
 519        free(weights);
 520        return best;
 521}
 522
 523static void read_revisions_from_stdin(struct rev_info *revs)
 524{
 525        char line[1000];
 526
 527        while (fgets(line, sizeof(line), stdin) != NULL) {
 528                int len = strlen(line);
 529                if (len && line[len - 1] == '\n')
 530                        line[--len] = 0;
 531                if (!len)
 532                        break;
 533                if (line[0] == '-')
 534                        die("options not supported in --stdin mode");
 535                if (handle_revision_arg(line, revs, 0, 1))
 536                        die("bad revision '%s'", line);
 537        }
 538}
 539
 540int cmd_rev_list(int argc, const char **argv, const char *prefix)
 541{
 542        struct commit_list *list;
 543        int i;
 544        int read_from_stdin = 0;
 545        int bisect_show_vars = 0;
 546        int bisect_find_all = 0;
 547        int quiet = 0;
 548
 549        git_config(git_default_config);
 550        init_revisions(&revs, prefix);
 551        revs.abbrev = 0;
 552        revs.commit_format = CMIT_FMT_UNSPECIFIED;
 553        argc = setup_revisions(argc, argv, &revs, NULL);
 554
 555        for (i = 1 ; i < argc; i++) {
 556                const char *arg = argv[i];
 557
 558                if (!strcmp(arg, "--header")) {
 559                        revs.verbose_header = 1;
 560                        continue;
 561                }
 562                if (!strcmp(arg, "--timestamp")) {
 563                        show_timestamp = 1;
 564                        continue;
 565                }
 566                if (!strcmp(arg, "--bisect")) {
 567                        bisect_list = 1;
 568                        continue;
 569                }
 570                if (!strcmp(arg, "--bisect-all")) {
 571                        bisect_list = 1;
 572                        bisect_find_all = 1;
 573                        continue;
 574                }
 575                if (!strcmp(arg, "--bisect-vars")) {
 576                        bisect_list = 1;
 577                        bisect_show_vars = 1;
 578                        continue;
 579                }
 580                if (!strcmp(arg, "--stdin")) {
 581                        if (read_from_stdin++)
 582                                die("--stdin given twice?");
 583                        read_revisions_from_stdin(&revs);
 584                        continue;
 585                }
 586                if (!strcmp(arg, "--quiet")) {
 587                        quiet = 1;
 588                        continue;
 589                }
 590                usage(rev_list_usage);
 591
 592        }
 593        if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
 594                /* The command line has a --pretty  */
 595                hdr_termination = '\n';
 596                if (revs.commit_format == CMIT_FMT_ONELINE)
 597                        header_prefix = "";
 598                else
 599                        header_prefix = "commit ";
 600        }
 601        else if (revs.verbose_header)
 602                /* Only --header was specified */
 603                revs.commit_format = CMIT_FMT_RAW;
 604
 605        list = revs.commits;
 606
 607        if ((!list &&
 608             (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
 609              !revs.pending.nr)) ||
 610            revs.diff)
 611                usage(rev_list_usage);
 612
 613        save_commit_buffer = revs.verbose_header || revs.grep_filter;
 614        if (bisect_list)
 615                revs.limited = 1;
 616
 617        if (prepare_revision_walk(&revs))
 618                die("revision walk setup failed");
 619        if (revs.tree_objects)
 620                mark_edges_uninteresting(revs.commits, &revs, show_edge);
 621
 622        if (bisect_list) {
 623                int reaches = reaches, all = all;
 624
 625                revs.commits = find_bisection(revs.commits, &reaches, &all,
 626                                              bisect_find_all);
 627                if (bisect_show_vars) {
 628                        int cnt;
 629                        char hex[41];
 630                        if (!revs.commits)
 631                                return 1;
 632                        /*
 633                         * revs.commits can reach "reaches" commits among
 634                         * "all" commits.  If it is good, then there are
 635                         * (all-reaches) commits left to be bisected.
 636                         * On the other hand, if it is bad, then the set
 637                         * to bisect is "reaches".
 638                         * A bisect set of size N has (N-1) commits further
 639                         * to test, as we already know one bad one.
 640                         */
 641                        cnt = all - reaches;
 642                        if (cnt < reaches)
 643                                cnt = reaches;
 644                        strcpy(hex, sha1_to_hex(revs.commits->item->object.sha1));
 645
 646                        if (bisect_find_all) {
 647                                traverse_commit_list(&revs, show_commit, show_object);
 648                                printf("------\n");
 649                        }
 650
 651                        printf("bisect_rev=%s\n"
 652                               "bisect_nr=%d\n"
 653                               "bisect_good=%d\n"
 654                               "bisect_bad=%d\n"
 655                               "bisect_all=%d\n",
 656                               hex,
 657                               cnt - 1,
 658                               all - reaches - 1,
 659                               reaches - 1,
 660                               all);
 661                        return 0;
 662                }
 663        }
 664
 665        traverse_commit_list(&revs,
 666                quiet ? finish_commit : show_commit,
 667                quiet ? finish_object : show_object);
 668
 669        return 0;
 670}