builtin-rev-list.con commit gitweb: speed up project listing on large work trees by limiting find depth (ca5e949)
   1#include "cache.h"
   2#include "refs.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "tree-walk.h"
   8#include "diff.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "builtin.h"
  12
  13/* bits #0-15 in revision.h */
  14
  15#define COUNTED         (1u<<16)
  16
  17static const char rev_list_usage[] =
  18"git-rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
  19"  limiting output:\n"
  20"    --max-count=nr\n"
  21"    --max-age=epoch\n"
  22"    --min-age=epoch\n"
  23"    --sparse\n"
  24"    --no-merges\n"
  25"    --remove-empty\n"
  26"    --all\n"
  27"    --stdin\n"
  28"  ordering output:\n"
  29"    --topo-order\n"
  30"    --date-order\n"
  31"  formatting output:\n"
  32"    --parents\n"
  33"    --objects | --objects-edge\n"
  34"    --unpacked\n"
  35"    --header | --pretty\n"
  36"    --abbrev=nr | --no-abbrev\n"
  37"    --abbrev-commit\n"
  38"    --left-right\n"
  39"  special purpose:\n"
  40"    --bisect\n"
  41"    --bisect-vars"
  42;
  43
  44static struct rev_info revs;
  45
  46static int bisect_list;
  47static int show_timestamp;
  48static int hdr_termination;
  49static const char *header_prefix;
  50
  51static void show_commit(struct commit *commit)
  52{
  53        if (show_timestamp)
  54                printf("%lu ", commit->date);
  55        if (header_prefix)
  56                fputs(header_prefix, stdout);
  57        if (commit->object.flags & BOUNDARY)
  58                putchar('-');
  59        else if (revs.left_right) {
  60                if (commit->object.flags & SYMMETRIC_LEFT)
  61                        putchar('<');
  62                else
  63                        putchar('>');
  64        }
  65        if (revs.abbrev_commit && revs.abbrev)
  66                fputs(find_unique_abbrev(commit->object.sha1, revs.abbrev),
  67                      stdout);
  68        else
  69                fputs(sha1_to_hex(commit->object.sha1), stdout);
  70        if (revs.parents) {
  71                struct commit_list *parents = commit->parents;
  72                while (parents) {
  73                        printf(" %s", sha1_to_hex(parents->item->object.sha1));
  74                        parents = parents->next;
  75                }
  76        }
  77        if (revs.commit_format == CMIT_FMT_ONELINE)
  78                putchar(' ');
  79        else
  80                putchar('\n');
  81
  82        if (revs.verbose_header) {
  83                struct strbuf buf;
  84                strbuf_init(&buf, 0);
  85                pretty_print_commit(revs.commit_format, commit,
  86                                        &buf, revs.abbrev, NULL, NULL, revs.date_mode);
  87                if (buf.len)
  88                        printf("%s%c", buf.buf, hdr_termination);
  89                strbuf_release(&buf);
  90        }
  91        maybe_flush_or_die(stdout, "stdout");
  92        if (commit->parents) {
  93                free_commit_list(commit->parents);
  94                commit->parents = NULL;
  95        }
  96        free(commit->buffer);
  97        commit->buffer = NULL;
  98}
  99
 100static void show_object(struct object_array_entry *p)
 101{
 102        /* An object with name "foo\n0000000..." can be used to
 103         * confuse downstream git-pack-objects very badly.
 104         */
 105        const char *ep = strchr(p->name, '\n');
 106
 107        if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
 108                die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
 109
 110        if (ep) {
 111                printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
 112                       (int) (ep - p->name),
 113                       p->name);
 114        }
 115        else
 116                printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
 117}
 118
 119static void show_edge(struct commit *commit)
 120{
 121        printf("-%s\n", sha1_to_hex(commit->object.sha1));
 122}
 123
 124/*
 125 * This is a truly stupid algorithm, but it's only
 126 * used for bisection, and we just don't care enough.
 127 *
 128 * We care just barely enough to avoid recursing for
 129 * non-merge entries.
 130 */
 131static int count_distance(struct commit_list *entry)
 132{
 133        int nr = 0;
 134
 135        while (entry) {
 136                struct commit *commit = entry->item;
 137                struct commit_list *p;
 138
 139                if (commit->object.flags & (UNINTERESTING | COUNTED))
 140                        break;
 141                if (!revs.prune_fn || (commit->object.flags & TREECHANGE))
 142                        nr++;
 143                commit->object.flags |= COUNTED;
 144                p = commit->parents;
 145                entry = p;
 146                if (p) {
 147                        p = p->next;
 148                        while (p) {
 149                                nr += count_distance(p);
 150                                p = p->next;
 151                        }
 152                }
 153        }
 154
 155        return nr;
 156}
 157
 158static void clear_distance(struct commit_list *list)
 159{
 160        while (list) {
 161                struct commit *commit = list->item;
 162                commit->object.flags &= ~COUNTED;
 163                list = list->next;
 164        }
 165}
 166
 167#define DEBUG_BISECT 0
 168
 169static inline int weight(struct commit_list *elem)
 170{
 171        return *((int*)(elem->item->util));
 172}
 173
 174static inline void weight_set(struct commit_list *elem, int weight)
 175{
 176        *((int*)(elem->item->util)) = weight;
 177}
 178
 179static int count_interesting_parents(struct commit *commit)
 180{
 181        struct commit_list *p;
 182        int count;
 183
 184        for (count = 0, p = commit->parents; p; p = p->next) {
 185                if (p->item->object.flags & UNINTERESTING)
 186                        continue;
 187                count++;
 188        }
 189        return count;
 190}
 191
 192static inline int halfway(struct commit_list *p, int nr)
 193{
 194        /*
 195         * Don't short-cut something we are not going to return!
 196         */
 197        if (revs.prune_fn && !(p->item->object.flags & TREECHANGE))
 198                return 0;
 199        if (DEBUG_BISECT)
 200                return 0;
 201        /*
 202         * 2 and 3 are halfway of 5.
 203         * 3 is halfway of 6 but 2 and 4 are not.
 204         */
 205        switch (2 * weight(p) - nr) {
 206        case -1: case 0: case 1:
 207                return 1;
 208        default:
 209                return 0;
 210        }
 211}
 212
 213#if !DEBUG_BISECT
 214#define show_list(a,b,c,d) do { ; } while (0)
 215#else
 216static void show_list(const char *debug, int counted, int nr,
 217                      struct commit_list *list)
 218{
 219        struct commit_list *p;
 220
 221        fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
 222
 223        for (p = list; p; p = p->next) {
 224                struct commit_list *pp;
 225                struct commit *commit = p->item;
 226                unsigned flags = commit->object.flags;
 227                enum object_type type;
 228                unsigned long size;
 229                char *buf = read_sha1_file(commit->object.sha1, &type, &size);
 230                char *ep, *sp;
 231
 232                fprintf(stderr, "%c%c%c ",
 233                        (flags & TREECHANGE) ? 'T' : ' ',
 234                        (flags & UNINTERESTING) ? 'U' : ' ',
 235                        (flags & COUNTED) ? 'C' : ' ');
 236                if (commit->util)
 237                        fprintf(stderr, "%3d", weight(p));
 238                else
 239                        fprintf(stderr, "---");
 240                fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
 241                for (pp = commit->parents; pp; pp = pp->next)
 242                        fprintf(stderr, " %.*s", 8,
 243                                sha1_to_hex(pp->item->object.sha1));
 244
 245                sp = strstr(buf, "\n\n");
 246                if (sp) {
 247                        sp += 2;
 248                        for (ep = sp; *ep && *ep != '\n'; ep++)
 249                                ;
 250                        fprintf(stderr, " %.*s", (int)(ep - sp), sp);
 251                }
 252                fprintf(stderr, "\n");
 253        }
 254}
 255#endif /* DEBUG_BISECT */
 256
 257static struct commit_list *best_bisection(struct commit_list *list, int nr)
 258{
 259        struct commit_list *p, *best;
 260        int best_distance = -1;
 261
 262        best = list;
 263        for (p = list; p; p = p->next) {
 264                int distance;
 265                unsigned flags = p->item->object.flags;
 266
 267                if (revs.prune_fn && !(flags & TREECHANGE))
 268                        continue;
 269                distance = weight(p);
 270                if (nr - distance < distance)
 271                        distance = nr - distance;
 272                if (distance > best_distance) {
 273                        best = p;
 274                        best_distance = distance;
 275                }
 276        }
 277
 278        return best;
 279}
 280
 281/*
 282 * zero or positive weight is the number of interesting commits it can
 283 * reach, including itself.  Especially, weight = 0 means it does not
 284 * reach any tree-changing commits (e.g. just above uninteresting one
 285 * but traversal is with pathspec).
 286 *
 287 * weight = -1 means it has one parent and its distance is yet to
 288 * be computed.
 289 *
 290 * weight = -2 means it has more than one parent and its distance is
 291 * unknown.  After running count_distance() first, they will get zero
 292 * or positive distance.
 293 */
 294static struct commit_list *do_find_bisection(struct commit_list *list,
 295                                             int nr, int *weights)
 296{
 297        int n, counted;
 298        struct commit_list *p;
 299
 300        counted = 0;
 301
 302        for (n = 0, p = list; p; p = p->next) {
 303                struct commit *commit = p->item;
 304                unsigned flags = commit->object.flags;
 305
 306                p->item->util = &weights[n++];
 307                switch (count_interesting_parents(commit)) {
 308                case 0:
 309                        if (!revs.prune_fn || (flags & TREECHANGE)) {
 310                                weight_set(p, 1);
 311                                counted++;
 312                                show_list("bisection 2 count one",
 313                                          counted, nr, list);
 314                        }
 315                        /*
 316                         * otherwise, it is known not to reach any
 317                         * tree-changing commit and gets weight 0.
 318                         */
 319                        break;
 320                case 1:
 321                        weight_set(p, -1);
 322                        break;
 323                default:
 324                        weight_set(p, -2);
 325                        break;
 326                }
 327        }
 328
 329        show_list("bisection 2 initialize", counted, nr, list);
 330
 331        /*
 332         * If you have only one parent in the resulting set
 333         * then you can reach one commit more than that parent
 334         * can reach.  So we do not have to run the expensive
 335         * count_distance() for single strand of pearls.
 336         *
 337         * However, if you have more than one parents, you cannot
 338         * just add their distance and one for yourself, since
 339         * they usually reach the same ancestor and you would
 340         * end up counting them twice that way.
 341         *
 342         * So we will first count distance of merges the usual
 343         * way, and then fill the blanks using cheaper algorithm.
 344         */
 345        for (p = list; p; p = p->next) {
 346                if (p->item->object.flags & UNINTERESTING)
 347                        continue;
 348                if (weight(p) != -2)
 349                        continue;
 350                weight_set(p, count_distance(p));
 351                clear_distance(list);
 352
 353                /* Does it happen to be at exactly half-way? */
 354                if (halfway(p, nr))
 355                        return p;
 356                counted++;
 357        }
 358
 359        show_list("bisection 2 count_distance", counted, nr, list);
 360
 361        while (counted < nr) {
 362                for (p = list; p; p = p->next) {
 363                        struct commit_list *q;
 364                        unsigned flags = p->item->object.flags;
 365
 366                        if (0 <= weight(p))
 367                                continue;
 368                        for (q = p->item->parents; q; q = q->next) {
 369                                if (q->item->object.flags & UNINTERESTING)
 370                                        continue;
 371                                if (0 <= weight(q))
 372                                        break;
 373                        }
 374                        if (!q)
 375                                continue;
 376
 377                        /*
 378                         * weight for p is unknown but q is known.
 379                         * add one for p itself if p is to be counted,
 380                         * otherwise inherit it from q directly.
 381                         */
 382                        if (!revs.prune_fn || (flags & TREECHANGE)) {
 383                                weight_set(p, weight(q)+1);
 384                                counted++;
 385                                show_list("bisection 2 count one",
 386                                          counted, nr, list);
 387                        }
 388                        else
 389                                weight_set(p, weight(q));
 390
 391                        /* Does it happen to be at exactly half-way? */
 392                        if (halfway(p, nr))
 393                                return p;
 394                }
 395        }
 396
 397        show_list("bisection 2 counted all", counted, nr, list);
 398
 399        /* Then find the best one */
 400        return best_bisection(list, nr);
 401}
 402
 403static struct commit_list *find_bisection(struct commit_list *list,
 404                                          int *reaches, int *all)
 405{
 406        int nr, on_list;
 407        struct commit_list *p, *best, *next, *last;
 408        int *weights;
 409
 410        show_list("bisection 2 entry", 0, 0, list);
 411
 412        /*
 413         * Count the number of total and tree-changing items on the
 414         * list, while reversing the list.
 415         */
 416        for (nr = on_list = 0, last = NULL, p = list;
 417             p;
 418             p = next) {
 419                unsigned flags = p->item->object.flags;
 420
 421                next = p->next;
 422                if (flags & UNINTERESTING)
 423                        continue;
 424                p->next = last;
 425                last = p;
 426                if (!revs.prune_fn || (flags & TREECHANGE))
 427                        nr++;
 428                on_list++;
 429        }
 430        list = last;
 431        show_list("bisection 2 sorted", 0, nr, list);
 432
 433        *all = nr;
 434        weights = xcalloc(on_list, sizeof(*weights));
 435
 436        /* Do the real work of finding bisection commit. */
 437        best = do_find_bisection(list, nr, weights);
 438
 439        if (best) {
 440                best->next = NULL;
 441                *reaches = weight(best);
 442        }
 443        free(weights);
 444
 445        return best;
 446}
 447
 448static void read_revisions_from_stdin(struct rev_info *revs)
 449{
 450        char line[1000];
 451
 452        while (fgets(line, sizeof(line), stdin) != NULL) {
 453                int len = strlen(line);
 454                if (line[len - 1] == '\n')
 455                        line[--len] = 0;
 456                if (!len)
 457                        break;
 458                if (line[0] == '-')
 459                        die("options not supported in --stdin mode");
 460                if (handle_revision_arg(line, revs, 0, 1))
 461                        die("bad revision '%s'", line);
 462        }
 463}
 464
 465int cmd_rev_list(int argc, const char **argv, const char *prefix)
 466{
 467        struct commit_list *list;
 468        int i;
 469        int read_from_stdin = 0;
 470        int bisect_show_vars = 0;
 471
 472        git_config(git_default_config);
 473        init_revisions(&revs, prefix);
 474        revs.abbrev = 0;
 475        revs.commit_format = CMIT_FMT_UNSPECIFIED;
 476        argc = setup_revisions(argc, argv, &revs, NULL);
 477
 478        for (i = 1 ; i < argc; i++) {
 479                const char *arg = argv[i];
 480
 481                if (!strcmp(arg, "--header")) {
 482                        revs.verbose_header = 1;
 483                        continue;
 484                }
 485                if (!strcmp(arg, "--timestamp")) {
 486                        show_timestamp = 1;
 487                        continue;
 488                }
 489                if (!strcmp(arg, "--bisect")) {
 490                        bisect_list = 1;
 491                        continue;
 492                }
 493                if (!strcmp(arg, "--bisect-vars")) {
 494                        bisect_list = 1;
 495                        bisect_show_vars = 1;
 496                        continue;
 497                }
 498                if (!strcmp(arg, "--stdin")) {
 499                        if (read_from_stdin++)
 500                                die("--stdin given twice?");
 501                        read_revisions_from_stdin(&revs);
 502                        continue;
 503                }
 504                usage(rev_list_usage);
 505
 506        }
 507        if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
 508                /* The command line has a --pretty  */
 509                hdr_termination = '\n';
 510                if (revs.commit_format == CMIT_FMT_ONELINE)
 511                        header_prefix = "";
 512                else
 513                        header_prefix = "commit ";
 514        }
 515        else if (revs.verbose_header)
 516                /* Only --header was specified */
 517                revs.commit_format = CMIT_FMT_RAW;
 518
 519        list = revs.commits;
 520
 521        if ((!list &&
 522             (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
 523              !revs.pending.nr)) ||
 524            revs.diff)
 525                usage(rev_list_usage);
 526
 527        save_commit_buffer = revs.verbose_header || revs.grep_filter;
 528        track_object_refs = 0;
 529        if (bisect_list)
 530                revs.limited = 1;
 531
 532        prepare_revision_walk(&revs);
 533        if (revs.tree_objects)
 534                mark_edges_uninteresting(revs.commits, &revs, show_edge);
 535
 536        if (bisect_list) {
 537                int reaches = reaches, all = all;
 538
 539                revs.commits = find_bisection(revs.commits, &reaches, &all);
 540                if (bisect_show_vars) {
 541                        int cnt;
 542                        if (!revs.commits)
 543                                return 1;
 544                        /*
 545                         * revs.commits can reach "reaches" commits among
 546                         * "all" commits.  If it is good, then there are
 547                         * (all-reaches) commits left to be bisected.
 548                         * On the other hand, if it is bad, then the set
 549                         * to bisect is "reaches".
 550                         * A bisect set of size N has (N-1) commits further
 551                         * to test, as we already know one bad one.
 552                         */
 553                        cnt = all-reaches;
 554                        if (cnt < reaches)
 555                                cnt = reaches;
 556                        printf("bisect_rev=%s\n"
 557                               "bisect_nr=%d\n"
 558                               "bisect_good=%d\n"
 559                               "bisect_bad=%d\n"
 560                               "bisect_all=%d\n",
 561                               sha1_to_hex(revs.commits->item->object.sha1),
 562                               cnt - 1,
 563                               all - reaches - 1,
 564                               reaches - 1,
 565                               all);
 566                        return 0;
 567                }
 568        }
 569
 570        traverse_commit_list(&revs, show_commit, show_object);
 571
 572        return 0;
 573}