builtin-blame.con commit gitweb: Allow arbitrary strings to be dug with pickaxe (4229aa5)
   1/*
   2 * Pickaxe
   3 *
   4 * Copyright (c) 2006, Junio C Hamano
   5 */
   6
   7#include "cache.h"
   8#include "builtin.h"
   9#include "blob.h"
  10#include "commit.h"
  11#include "tag.h"
  12#include "tree-walk.h"
  13#include "diff.h"
  14#include "diffcore.h"
  15#include "revision.h"
  16#include "quote.h"
  17#include "xdiff-interface.h"
  18#include "cache-tree.h"
  19#include "path-list.h"
  20#include "mailmap.h"
  21
  22static char blame_usage[] =
  23"git-blame [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
  24"  -c                  Use the same output mode as git-annotate (Default: off)\n"
  25"  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
  26"  -l                  Show long commit SHA1 (Default: off)\n"
  27"  --root              Do not treat root commits as boundaries (Default: off)\n"
  28"  -t                  Show raw timestamp (Default: off)\n"
  29"  -f, --show-name     Show original filename (Default: auto)\n"
  30"  -n, --show-number   Show original linenumber (Default: off)\n"
  31"  -s                  Suppress author name and timestamp (Default: off)\n"
  32"  -p, --porcelain     Show in a format designed for machine consumption\n"
  33"  -L n,m              Process only line range n,m, counting from 1\n"
  34"  -M, -C              Find line movements within and across files\n"
  35"  --incremental       Show blame entries as we find them, incrementally\n"
  36"  --contents file     Use <file>'s contents as the final image\n"
  37"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
  38
  39static int longest_file;
  40static int longest_author;
  41static int max_orig_digits;
  42static int max_digits;
  43static int max_score_digits;
  44static int show_root;
  45static int blank_boundary;
  46static int incremental;
  47static int cmd_is_annotate;
  48static struct path_list mailmap;
  49
  50#ifndef DEBUG
  51#define DEBUG 0
  52#endif
  53
  54/* stats */
  55static int num_read_blob;
  56static int num_get_patch;
  57static int num_commits;
  58
  59#define PICKAXE_BLAME_MOVE              01
  60#define PICKAXE_BLAME_COPY              02
  61#define PICKAXE_BLAME_COPY_HARDER       04
  62#define PICKAXE_BLAME_COPY_HARDEST      010
  63
  64/*
  65 * blame for a blame_entry with score lower than these thresholds
  66 * is not passed to the parent using move/copy logic.
  67 */
  68static unsigned blame_move_score;
  69static unsigned blame_copy_score;
  70#define BLAME_DEFAULT_MOVE_SCORE        20
  71#define BLAME_DEFAULT_COPY_SCORE        40
  72
  73/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
  74#define METAINFO_SHOWN          (1u<<12)
  75#define MORE_THAN_ONE_PATH      (1u<<13)
  76
  77/*
  78 * One blob in a commit that is being suspected
  79 */
  80struct origin {
  81        int refcnt;
  82        struct commit *commit;
  83        mmfile_t file;
  84        unsigned char blob_sha1[20];
  85        char path[FLEX_ARRAY];
  86};
  87
  88/*
  89 * Given an origin, prepare mmfile_t structure to be used by the
  90 * diff machinery
  91 */
  92static char *fill_origin_blob(struct origin *o, mmfile_t *file)
  93{
  94        if (!o->file.ptr) {
  95                enum object_type type;
  96                num_read_blob++;
  97                file->ptr = read_sha1_file(o->blob_sha1, &type,
  98                                           (unsigned long *)(&(file->size)));
  99                o->file = *file;
 100        }
 101        else
 102                *file = o->file;
 103        return file->ptr;
 104}
 105
 106/*
 107 * Origin is refcounted and usually we keep the blob contents to be
 108 * reused.
 109 */
 110static inline struct origin *origin_incref(struct origin *o)
 111{
 112        if (o)
 113                o->refcnt++;
 114        return o;
 115}
 116
 117static void origin_decref(struct origin *o)
 118{
 119        if (o && --o->refcnt <= 0) {
 120                if (o->file.ptr)
 121                        free(o->file.ptr);
 122                memset(o, 0, sizeof(*o));
 123                free(o);
 124        }
 125}
 126
 127/*
 128 * Each group of lines is described by a blame_entry; it can be split
 129 * as we pass blame to the parents.  They form a linked list in the
 130 * scoreboard structure, sorted by the target line number.
 131 */
 132struct blame_entry {
 133        struct blame_entry *prev;
 134        struct blame_entry *next;
 135
 136        /* the first line of this group in the final image;
 137         * internally all line numbers are 0 based.
 138         */
 139        int lno;
 140
 141        /* how many lines this group has */
 142        int num_lines;
 143
 144        /* the commit that introduced this group into the final image */
 145        struct origin *suspect;
 146
 147        /* true if the suspect is truly guilty; false while we have not
 148         * checked if the group came from one of its parents.
 149         */
 150        char guilty;
 151
 152        /* the line number of the first line of this group in the
 153         * suspect's file; internally all line numbers are 0 based.
 154         */
 155        int s_lno;
 156
 157        /* how significant this entry is -- cached to avoid
 158         * scanning the lines over and over.
 159         */
 160        unsigned score;
 161};
 162
 163/*
 164 * The current state of the blame assignment.
 165 */
 166struct scoreboard {
 167        /* the final commit (i.e. where we started digging from) */
 168        struct commit *final;
 169
 170        const char *path;
 171
 172        /*
 173         * The contents in the final image.
 174         * Used by many functions to obtain contents of the nth line,
 175         * indexed with scoreboard.lineno[blame_entry.lno].
 176         */
 177        const char *final_buf;
 178        unsigned long final_buf_size;
 179
 180        /* linked list of blames */
 181        struct blame_entry *ent;
 182
 183        /* look-up a line in the final buffer */
 184        int num_lines;
 185        int *lineno;
 186};
 187
 188static inline int same_suspect(struct origin *a, struct origin *b)
 189{
 190        if (a == b)
 191                return 1;
 192        if (a->commit != b->commit)
 193                return 0;
 194        return !strcmp(a->path, b->path);
 195}
 196
 197static void sanity_check_refcnt(struct scoreboard *);
 198
 199/*
 200 * If two blame entries that are next to each other came from
 201 * contiguous lines in the same origin (i.e. <commit, path> pair),
 202 * merge them together.
 203 */
 204static void coalesce(struct scoreboard *sb)
 205{
 206        struct blame_entry *ent, *next;
 207
 208        for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 209                if (same_suspect(ent->suspect, next->suspect) &&
 210                    ent->guilty == next->guilty &&
 211                    ent->s_lno + ent->num_lines == next->s_lno) {
 212                        ent->num_lines += next->num_lines;
 213                        ent->next = next->next;
 214                        if (ent->next)
 215                                ent->next->prev = ent;
 216                        origin_decref(next->suspect);
 217                        free(next);
 218                        ent->score = 0;
 219                        next = ent; /* again */
 220                }
 221        }
 222
 223        if (DEBUG) /* sanity */
 224                sanity_check_refcnt(sb);
 225}
 226
 227/*
 228 * Given a commit and a path in it, create a new origin structure.
 229 * The callers that add blame to the scoreboard should use
 230 * get_origin() to obtain shared, refcounted copy instead of calling
 231 * this function directly.
 232 */
 233static struct origin *make_origin(struct commit *commit, const char *path)
 234{
 235        struct origin *o;
 236        o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
 237        o->commit = commit;
 238        o->refcnt = 1;
 239        strcpy(o->path, path);
 240        return o;
 241}
 242
 243/*
 244 * Locate an existing origin or create a new one.
 245 */
 246static struct origin *get_origin(struct scoreboard *sb,
 247                                 struct commit *commit,
 248                                 const char *path)
 249{
 250        struct blame_entry *e;
 251
 252        for (e = sb->ent; e; e = e->next) {
 253                if (e->suspect->commit == commit &&
 254                    !strcmp(e->suspect->path, path))
 255                        return origin_incref(e->suspect);
 256        }
 257        return make_origin(commit, path);
 258}
 259
 260/*
 261 * Fill the blob_sha1 field of an origin if it hasn't, so that later
 262 * call to fill_origin_blob() can use it to locate the data.  blob_sha1
 263 * for an origin is also used to pass the blame for the entire file to
 264 * the parent to detect the case where a child's blob is identical to
 265 * that of its parent's.
 266 */
 267static int fill_blob_sha1(struct origin *origin)
 268{
 269        unsigned mode;
 270
 271        if (!is_null_sha1(origin->blob_sha1))
 272                return 0;
 273        if (get_tree_entry(origin->commit->object.sha1,
 274                           origin->path,
 275                           origin->blob_sha1, &mode))
 276                goto error_out;
 277        if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
 278                goto error_out;
 279        return 0;
 280 error_out:
 281        hashclr(origin->blob_sha1);
 282        return -1;
 283}
 284
 285/*
 286 * We have an origin -- check if the same path exists in the
 287 * parent and return an origin structure to represent it.
 288 */
 289static struct origin *find_origin(struct scoreboard *sb,
 290                                  struct commit *parent,
 291                                  struct origin *origin)
 292{
 293        struct origin *porigin = NULL;
 294        struct diff_options diff_opts;
 295        const char *paths[2];
 296
 297        if (parent->util) {
 298                /*
 299                 * Each commit object can cache one origin in that
 300                 * commit.  This is a freestanding copy of origin and
 301                 * not refcounted.
 302                 */
 303                struct origin *cached = parent->util;
 304                if (!strcmp(cached->path, origin->path)) {
 305                        /*
 306                         * The same path between origin and its parent
 307                         * without renaming -- the most common case.
 308                         */
 309                        porigin = get_origin(sb, parent, cached->path);
 310
 311                        /*
 312                         * If the origin was newly created (i.e. get_origin
 313                         * would call make_origin if none is found in the
 314                         * scoreboard), it does not know the blob_sha1,
 315                         * so copy it.  Otherwise porigin was in the
 316                         * scoreboard and already knows blob_sha1.
 317                         */
 318                        if (porigin->refcnt == 1)
 319                                hashcpy(porigin->blob_sha1, cached->blob_sha1);
 320                        return porigin;
 321                }
 322                /* otherwise it was not very useful; free it */
 323                free(parent->util);
 324                parent->util = NULL;
 325        }
 326
 327        /* See if the origin->path is different between parent
 328         * and origin first.  Most of the time they are the
 329         * same and diff-tree is fairly efficient about this.
 330         */
 331        diff_setup(&diff_opts);
 332        diff_opts.recursive = 1;
 333        diff_opts.detect_rename = 0;
 334        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 335        paths[0] = origin->path;
 336        paths[1] = NULL;
 337
 338        diff_tree_setup_paths(paths, &diff_opts);
 339        if (diff_setup_done(&diff_opts) < 0)
 340                die("diff-setup");
 341
 342        if (is_null_sha1(origin->commit->object.sha1))
 343                do_diff_cache(parent->tree->object.sha1, &diff_opts);
 344        else
 345                diff_tree_sha1(parent->tree->object.sha1,
 346                               origin->commit->tree->object.sha1,
 347                               "", &diff_opts);
 348        diffcore_std(&diff_opts);
 349
 350        /* It is either one entry that says "modified", or "created",
 351         * or nothing.
 352         */
 353        if (!diff_queued_diff.nr) {
 354                /* The path is the same as parent */
 355                porigin = get_origin(sb, parent, origin->path);
 356                hashcpy(porigin->blob_sha1, origin->blob_sha1);
 357        }
 358        else if (diff_queued_diff.nr != 1)
 359                die("internal error in blame::find_origin");
 360        else {
 361                struct diff_filepair *p = diff_queued_diff.queue[0];
 362                switch (p->status) {
 363                default:
 364                        die("internal error in blame::find_origin (%c)",
 365                            p->status);
 366                case 'M':
 367                        porigin = get_origin(sb, parent, origin->path);
 368                        hashcpy(porigin->blob_sha1, p->one->sha1);
 369                        break;
 370                case 'A':
 371                case 'T':
 372                        /* Did not exist in parent, or type changed */
 373                        break;
 374                }
 375        }
 376        diff_flush(&diff_opts);
 377        if (porigin) {
 378                /*
 379                 * Create a freestanding copy that is not part of
 380                 * the refcounted origin found in the scoreboard, and
 381                 * cache it in the commit.
 382                 */
 383                struct origin *cached;
 384
 385                cached = make_origin(porigin->commit, porigin->path);
 386                hashcpy(cached->blob_sha1, porigin->blob_sha1);
 387                parent->util = cached;
 388        }
 389        return porigin;
 390}
 391
 392/*
 393 * We have an origin -- find the path that corresponds to it in its
 394 * parent and return an origin structure to represent it.
 395 */
 396static struct origin *find_rename(struct scoreboard *sb,
 397                                  struct commit *parent,
 398                                  struct origin *origin)
 399{
 400        struct origin *porigin = NULL;
 401        struct diff_options diff_opts;
 402        int i;
 403        const char *paths[2];
 404
 405        diff_setup(&diff_opts);
 406        diff_opts.recursive = 1;
 407        diff_opts.detect_rename = DIFF_DETECT_RENAME;
 408        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 409        diff_opts.single_follow = origin->path;
 410        paths[0] = NULL;
 411        diff_tree_setup_paths(paths, &diff_opts);
 412        if (diff_setup_done(&diff_opts) < 0)
 413                die("diff-setup");
 414
 415        if (is_null_sha1(origin->commit->object.sha1))
 416                do_diff_cache(parent->tree->object.sha1, &diff_opts);
 417        else
 418                diff_tree_sha1(parent->tree->object.sha1,
 419                               origin->commit->tree->object.sha1,
 420                               "", &diff_opts);
 421        diffcore_std(&diff_opts);
 422
 423        for (i = 0; i < diff_queued_diff.nr; i++) {
 424                struct diff_filepair *p = diff_queued_diff.queue[i];
 425                if ((p->status == 'R' || p->status == 'C') &&
 426                    !strcmp(p->two->path, origin->path)) {
 427                        porigin = get_origin(sb, parent, p->one->path);
 428                        hashcpy(porigin->blob_sha1, p->one->sha1);
 429                        break;
 430                }
 431        }
 432        diff_flush(&diff_opts);
 433        return porigin;
 434}
 435
 436/*
 437 * Parsing of patch chunks...
 438 */
 439struct chunk {
 440        /* line number in postimage; up to but not including this
 441         * line is the same as preimage
 442         */
 443        int same;
 444
 445        /* preimage line number after this chunk */
 446        int p_next;
 447
 448        /* postimage line number after this chunk */
 449        int t_next;
 450};
 451
 452struct patch {
 453        struct chunk *chunks;
 454        int num;
 455};
 456
 457struct blame_diff_state {
 458        struct xdiff_emit_state xm;
 459        struct patch *ret;
 460        unsigned hunk_post_context;
 461        unsigned hunk_in_pre_context : 1;
 462};
 463
 464static void process_u_diff(void *state_, char *line, unsigned long len)
 465{
 466        struct blame_diff_state *state = state_;
 467        struct chunk *chunk;
 468        int off1, off2, len1, len2, num;
 469
 470        num = state->ret->num;
 471        if (len < 4 || line[0] != '@' || line[1] != '@') {
 472                if (state->hunk_in_pre_context && line[0] == ' ')
 473                        state->ret->chunks[num - 1].same++;
 474                else {
 475                        state->hunk_in_pre_context = 0;
 476                        if (line[0] == ' ')
 477                                state->hunk_post_context++;
 478                        else
 479                                state->hunk_post_context = 0;
 480                }
 481                return;
 482        }
 483
 484        if (num && state->hunk_post_context) {
 485                chunk = &state->ret->chunks[num - 1];
 486                chunk->p_next -= state->hunk_post_context;
 487                chunk->t_next -= state->hunk_post_context;
 488        }
 489        state->ret->num = ++num;
 490        state->ret->chunks = xrealloc(state->ret->chunks,
 491                                      sizeof(struct chunk) * num);
 492        chunk = &state->ret->chunks[num - 1];
 493        if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
 494                state->ret->num--;
 495                return;
 496        }
 497
 498        /* Line numbers in patch output are one based. */
 499        off1--;
 500        off2--;
 501
 502        chunk->same = len2 ? off2 : (off2 + 1);
 503
 504        chunk->p_next = off1 + (len1 ? len1 : 1);
 505        chunk->t_next = chunk->same + len2;
 506        state->hunk_in_pre_context = 1;
 507        state->hunk_post_context = 0;
 508}
 509
 510static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
 511                                    int context)
 512{
 513        struct blame_diff_state state;
 514        xpparam_t xpp;
 515        xdemitconf_t xecfg;
 516        xdemitcb_t ecb;
 517
 518        xpp.flags = XDF_NEED_MINIMAL;
 519        xecfg.ctxlen = context;
 520        xecfg.flags = 0;
 521        ecb.outf = xdiff_outf;
 522        ecb.priv = &state;
 523        memset(&state, 0, sizeof(state));
 524        state.xm.consume = process_u_diff;
 525        state.ret = xmalloc(sizeof(struct patch));
 526        state.ret->chunks = NULL;
 527        state.ret->num = 0;
 528
 529        xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
 530
 531        if (state.ret->num) {
 532                struct chunk *chunk;
 533                chunk = &state.ret->chunks[state.ret->num - 1];
 534                chunk->p_next -= state.hunk_post_context;
 535                chunk->t_next -= state.hunk_post_context;
 536        }
 537        return state.ret;
 538}
 539
 540/*
 541 * Run diff between two origins and grab the patch output, so that
 542 * we can pass blame for lines origin is currently suspected for
 543 * to its parent.
 544 */
 545static struct patch *get_patch(struct origin *parent, struct origin *origin)
 546{
 547        mmfile_t file_p, file_o;
 548        struct patch *patch;
 549
 550        fill_origin_blob(parent, &file_p);
 551        fill_origin_blob(origin, &file_o);
 552        if (!file_p.ptr || !file_o.ptr)
 553                return NULL;
 554        patch = compare_buffer(&file_p, &file_o, 0);
 555        num_get_patch++;
 556        return patch;
 557}
 558
 559static void free_patch(struct patch *p)
 560{
 561        free(p->chunks);
 562        free(p);
 563}
 564
 565/*
 566 * Link in a new blame entry to the scoreboard.  Entries that cover the
 567 * same line range have been removed from the scoreboard previously.
 568 */
 569static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
 570{
 571        struct blame_entry *ent, *prev = NULL;
 572
 573        origin_incref(e->suspect);
 574
 575        for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
 576                prev = ent;
 577
 578        /* prev, if not NULL, is the last one that is below e */
 579        e->prev = prev;
 580        if (prev) {
 581                e->next = prev->next;
 582                prev->next = e;
 583        }
 584        else {
 585                e->next = sb->ent;
 586                sb->ent = e;
 587        }
 588        if (e->next)
 589                e->next->prev = e;
 590}
 591
 592/*
 593 * src typically is on-stack; we want to copy the information in it to
 594 * an malloced blame_entry that is already on the linked list of the
 595 * scoreboard.  The origin of dst loses a refcnt while the origin of src
 596 * gains one.
 597 */
 598static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
 599{
 600        struct blame_entry *p, *n;
 601
 602        p = dst->prev;
 603        n = dst->next;
 604        origin_incref(src->suspect);
 605        origin_decref(dst->suspect);
 606        memcpy(dst, src, sizeof(*src));
 607        dst->prev = p;
 608        dst->next = n;
 609        dst->score = 0;
 610}
 611
 612static const char *nth_line(struct scoreboard *sb, int lno)
 613{
 614        return sb->final_buf + sb->lineno[lno];
 615}
 616
 617/*
 618 * It is known that lines between tlno to same came from parent, and e
 619 * has an overlap with that range.  it also is known that parent's
 620 * line plno corresponds to e's line tlno.
 621 *
 622 *                <---- e ----->
 623 *                   <------>
 624 *                   <------------>
 625 *             <------------>
 626 *             <------------------>
 627 *
 628 * Split e into potentially three parts; before this chunk, the chunk
 629 * to be blamed for the parent, and after that portion.
 630 */
 631static void split_overlap(struct blame_entry *split,
 632                          struct blame_entry *e,
 633                          int tlno, int plno, int same,
 634                          struct origin *parent)
 635{
 636        int chunk_end_lno;
 637        memset(split, 0, sizeof(struct blame_entry [3]));
 638
 639        if (e->s_lno < tlno) {
 640                /* there is a pre-chunk part not blamed on parent */
 641                split[0].suspect = origin_incref(e->suspect);
 642                split[0].lno = e->lno;
 643                split[0].s_lno = e->s_lno;
 644                split[0].num_lines = tlno - e->s_lno;
 645                split[1].lno = e->lno + tlno - e->s_lno;
 646                split[1].s_lno = plno;
 647        }
 648        else {
 649                split[1].lno = e->lno;
 650                split[1].s_lno = plno + (e->s_lno - tlno);
 651        }
 652
 653        if (same < e->s_lno + e->num_lines) {
 654                /* there is a post-chunk part not blamed on parent */
 655                split[2].suspect = origin_incref(e->suspect);
 656                split[2].lno = e->lno + (same - e->s_lno);
 657                split[2].s_lno = e->s_lno + (same - e->s_lno);
 658                split[2].num_lines = e->s_lno + e->num_lines - same;
 659                chunk_end_lno = split[2].lno;
 660        }
 661        else
 662                chunk_end_lno = e->lno + e->num_lines;
 663        split[1].num_lines = chunk_end_lno - split[1].lno;
 664
 665        /*
 666         * if it turns out there is nothing to blame the parent for,
 667         * forget about the splitting.  !split[1].suspect signals this.
 668         */
 669        if (split[1].num_lines < 1)
 670                return;
 671        split[1].suspect = origin_incref(parent);
 672}
 673
 674/*
 675 * split_overlap() divided an existing blame e into up to three parts
 676 * in split.  Adjust the linked list of blames in the scoreboard to
 677 * reflect the split.
 678 */
 679static void split_blame(struct scoreboard *sb,
 680                        struct blame_entry *split,
 681                        struct blame_entry *e)
 682{
 683        struct blame_entry *new_entry;
 684
 685        if (split[0].suspect && split[2].suspect) {
 686                /* The first part (reuse storage for the existing entry e) */
 687                dup_entry(e, &split[0]);
 688
 689                /* The last part -- me */
 690                new_entry = xmalloc(sizeof(*new_entry));
 691                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 692                add_blame_entry(sb, new_entry);
 693
 694                /* ... and the middle part -- parent */
 695                new_entry = xmalloc(sizeof(*new_entry));
 696                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 697                add_blame_entry(sb, new_entry);
 698        }
 699        else if (!split[0].suspect && !split[2].suspect)
 700                /*
 701                 * The parent covers the entire area; reuse storage for
 702                 * e and replace it with the parent.
 703                 */
 704                dup_entry(e, &split[1]);
 705        else if (split[0].suspect) {
 706                /* me and then parent */
 707                dup_entry(e, &split[0]);
 708
 709                new_entry = xmalloc(sizeof(*new_entry));
 710                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 711                add_blame_entry(sb, new_entry);
 712        }
 713        else {
 714                /* parent and then me */
 715                dup_entry(e, &split[1]);
 716
 717                new_entry = xmalloc(sizeof(*new_entry));
 718                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 719                add_blame_entry(sb, new_entry);
 720        }
 721
 722        if (DEBUG) { /* sanity */
 723                struct blame_entry *ent;
 724                int lno = sb->ent->lno, corrupt = 0;
 725
 726                for (ent = sb->ent; ent; ent = ent->next) {
 727                        if (lno != ent->lno)
 728                                corrupt = 1;
 729                        if (ent->s_lno < 0)
 730                                corrupt = 1;
 731                        lno += ent->num_lines;
 732                }
 733                if (corrupt) {
 734                        lno = sb->ent->lno;
 735                        for (ent = sb->ent; ent; ent = ent->next) {
 736                                printf("L %8d l %8d n %8d\n",
 737                                       lno, ent->lno, ent->num_lines);
 738                                lno = ent->lno + ent->num_lines;
 739                        }
 740                        die("oops");
 741                }
 742        }
 743}
 744
 745/*
 746 * After splitting the blame, the origins used by the
 747 * on-stack blame_entry should lose one refcnt each.
 748 */
 749static void decref_split(struct blame_entry *split)
 750{
 751        int i;
 752
 753        for (i = 0; i < 3; i++)
 754                origin_decref(split[i].suspect);
 755}
 756
 757/*
 758 * Helper for blame_chunk().  blame_entry e is known to overlap with
 759 * the patch hunk; split it and pass blame to the parent.
 760 */
 761static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
 762                          int tlno, int plno, int same,
 763                          struct origin *parent)
 764{
 765        struct blame_entry split[3];
 766
 767        split_overlap(split, e, tlno, plno, same, parent);
 768        if (split[1].suspect)
 769                split_blame(sb, split, e);
 770        decref_split(split);
 771}
 772
 773/*
 774 * Find the line number of the last line the target is suspected for.
 775 */
 776static int find_last_in_target(struct scoreboard *sb, struct origin *target)
 777{
 778        struct blame_entry *e;
 779        int last_in_target = -1;
 780
 781        for (e = sb->ent; e; e = e->next) {
 782                if (e->guilty || !same_suspect(e->suspect, target))
 783                        continue;
 784                if (last_in_target < e->s_lno + e->num_lines)
 785                        last_in_target = e->s_lno + e->num_lines;
 786        }
 787        return last_in_target;
 788}
 789
 790/*
 791 * Process one hunk from the patch between the current suspect for
 792 * blame_entry e and its parent.  Find and split the overlap, and
 793 * pass blame to the overlapping part to the parent.
 794 */
 795static void blame_chunk(struct scoreboard *sb,
 796                        int tlno, int plno, int same,
 797                        struct origin *target, struct origin *parent)
 798{
 799        struct blame_entry *e;
 800
 801        for (e = sb->ent; e; e = e->next) {
 802                if (e->guilty || !same_suspect(e->suspect, target))
 803                        continue;
 804                if (same <= e->s_lno)
 805                        continue;
 806                if (tlno < e->s_lno + e->num_lines)
 807                        blame_overlap(sb, e, tlno, plno, same, parent);
 808        }
 809}
 810
 811/*
 812 * We are looking at the origin 'target' and aiming to pass blame
 813 * for the lines it is suspected to its parent.  Run diff to find
 814 * which lines came from parent and pass blame for them.
 815 */
 816static int pass_blame_to_parent(struct scoreboard *sb,
 817                                struct origin *target,
 818                                struct origin *parent)
 819{
 820        int i, last_in_target, plno, tlno;
 821        struct patch *patch;
 822
 823        last_in_target = find_last_in_target(sb, target);
 824        if (last_in_target < 0)
 825                return 1; /* nothing remains for this target */
 826
 827        patch = get_patch(parent, target);
 828        plno = tlno = 0;
 829        for (i = 0; i < patch->num; i++) {
 830                struct chunk *chunk = &patch->chunks[i];
 831
 832                blame_chunk(sb, tlno, plno, chunk->same, target, parent);
 833                plno = chunk->p_next;
 834                tlno = chunk->t_next;
 835        }
 836        /* The rest (i.e. anything after tlno) are the same as the parent */
 837        blame_chunk(sb, tlno, plno, last_in_target, target, parent);
 838
 839        free_patch(patch);
 840        return 0;
 841}
 842
 843/*
 844 * The lines in blame_entry after splitting blames many times can become
 845 * very small and trivial, and at some point it becomes pointless to
 846 * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
 847 * ordinary C program, and it is not worth to say it was copied from
 848 * totally unrelated file in the parent.
 849 *
 850 * Compute how trivial the lines in the blame_entry are.
 851 */
 852static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
 853{
 854        unsigned score;
 855        const char *cp, *ep;
 856
 857        if (e->score)
 858                return e->score;
 859
 860        score = 1;
 861        cp = nth_line(sb, e->lno);
 862        ep = nth_line(sb, e->lno + e->num_lines);
 863        while (cp < ep) {
 864                unsigned ch = *((unsigned char *)cp);
 865                if (isalnum(ch))
 866                        score++;
 867                cp++;
 868        }
 869        e->score = score;
 870        return score;
 871}
 872
 873/*
 874 * best_so_far[] and this[] are both a split of an existing blame_entry
 875 * that passes blame to the parent.  Maintain best_so_far the best split
 876 * so far, by comparing this and best_so_far and copying this into
 877 * bst_so_far as needed.
 878 */
 879static void copy_split_if_better(struct scoreboard *sb,
 880                                 struct blame_entry *best_so_far,
 881                                 struct blame_entry *this)
 882{
 883        int i;
 884
 885        if (!this[1].suspect)
 886                return;
 887        if (best_so_far[1].suspect) {
 888                if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
 889                        return;
 890        }
 891
 892        for (i = 0; i < 3; i++)
 893                origin_incref(this[i].suspect);
 894        decref_split(best_so_far);
 895        memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
 896}
 897
 898/*
 899 * We are looking at a part of the final image represented by
 900 * ent (tlno and same are offset by ent->s_lno).
 901 * tlno is where we are looking at in the final image.
 902 * up to (but not including) same match preimage.
 903 * plno is where we are looking at in the preimage.
 904 *
 905 * <-------------- final image ---------------------->
 906 *       <------ent------>
 907 *         ^tlno ^same
 908 *    <---------preimage----->
 909 *         ^plno
 910 *
 911 * All line numbers are 0-based.
 912 */
 913static void handle_split(struct scoreboard *sb,
 914                         struct blame_entry *ent,
 915                         int tlno, int plno, int same,
 916                         struct origin *parent,
 917                         struct blame_entry *split)
 918{
 919        if (ent->num_lines <= tlno)
 920                return;
 921        if (tlno < same) {
 922                struct blame_entry this[3];
 923                tlno += ent->s_lno;
 924                same += ent->s_lno;
 925                split_overlap(this, ent, tlno, plno, same, parent);
 926                copy_split_if_better(sb, split, this);
 927                decref_split(this);
 928        }
 929}
 930
 931/*
 932 * Find the lines from parent that are the same as ent so that
 933 * we can pass blames to it.  file_p has the blob contents for
 934 * the parent.
 935 */
 936static void find_copy_in_blob(struct scoreboard *sb,
 937                              struct blame_entry *ent,
 938                              struct origin *parent,
 939                              struct blame_entry *split,
 940                              mmfile_t *file_p)
 941{
 942        const char *cp;
 943        int cnt;
 944        mmfile_t file_o;
 945        struct patch *patch;
 946        int i, plno, tlno;
 947
 948        /*
 949         * Prepare mmfile that contains only the lines in ent.
 950         */
 951        cp = nth_line(sb, ent->lno);
 952        file_o.ptr = (char*) cp;
 953        cnt = ent->num_lines;
 954
 955        while (cnt && cp < sb->final_buf + sb->final_buf_size) {
 956                if (*cp++ == '\n')
 957                        cnt--;
 958        }
 959        file_o.size = cp - file_o.ptr;
 960
 961        patch = compare_buffer(file_p, &file_o, 1);
 962
 963        /*
 964         * file_o is a part of final image we are annotating.
 965         * file_p partially may match that image.
 966         */
 967        memset(split, 0, sizeof(struct blame_entry [3]));
 968        plno = tlno = 0;
 969        for (i = 0; i < patch->num; i++) {
 970                struct chunk *chunk = &patch->chunks[i];
 971
 972                handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
 973                plno = chunk->p_next;
 974                tlno = chunk->t_next;
 975        }
 976        /* remainder, if any, all match the preimage */
 977        handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
 978        free_patch(patch);
 979}
 980
 981/*
 982 * See if lines currently target is suspected for can be attributed to
 983 * parent.
 984 */
 985static int find_move_in_parent(struct scoreboard *sb,
 986                               struct origin *target,
 987                               struct origin *parent)
 988{
 989        int last_in_target, made_progress;
 990        struct blame_entry *e, split[3];
 991        mmfile_t file_p;
 992
 993        last_in_target = find_last_in_target(sb, target);
 994        if (last_in_target < 0)
 995                return 1; /* nothing remains for this target */
 996
 997        fill_origin_blob(parent, &file_p);
 998        if (!file_p.ptr)
 999                return 0;
1000
1001        made_progress = 1;
1002        while (made_progress) {
1003                made_progress = 0;
1004                for (e = sb->ent; e; e = e->next) {
1005                        if (e->guilty || !same_suspect(e->suspect, target))
1006                                continue;
1007                        find_copy_in_blob(sb, e, parent, split, &file_p);
1008                        if (split[1].suspect &&
1009                            blame_move_score < ent_score(sb, &split[1])) {
1010                                split_blame(sb, split, e);
1011                                made_progress = 1;
1012                        }
1013                        decref_split(split);
1014                }
1015        }
1016        return 0;
1017}
1018
1019struct blame_list {
1020        struct blame_entry *ent;
1021        struct blame_entry split[3];
1022};
1023
1024/*
1025 * Count the number of entries the target is suspected for,
1026 * and prepare a list of entry and the best split.
1027 */
1028static struct blame_list *setup_blame_list(struct scoreboard *sb,
1029                                           struct origin *target,
1030                                           int *num_ents_p)
1031{
1032        struct blame_entry *e;
1033        int num_ents, i;
1034        struct blame_list *blame_list = NULL;
1035
1036        for (e = sb->ent, num_ents = 0; e; e = e->next)
1037                if (!e->guilty && same_suspect(e->suspect, target))
1038                        num_ents++;
1039        if (num_ents) {
1040                blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1041                for (e = sb->ent, i = 0; e; e = e->next)
1042                        if (!e->guilty && same_suspect(e->suspect, target))
1043                                blame_list[i++].ent = e;
1044        }
1045        *num_ents_p = num_ents;
1046        return blame_list;
1047}
1048
1049/*
1050 * For lines target is suspected for, see if we can find code movement
1051 * across file boundary from the parent commit.  porigin is the path
1052 * in the parent we already tried.
1053 */
1054static int find_copy_in_parent(struct scoreboard *sb,
1055                               struct origin *target,
1056                               struct commit *parent,
1057                               struct origin *porigin,
1058                               int opt)
1059{
1060        struct diff_options diff_opts;
1061        const char *paths[1];
1062        int i, j;
1063        int retval;
1064        struct blame_list *blame_list;
1065        int num_ents;
1066
1067        blame_list = setup_blame_list(sb, target, &num_ents);
1068        if (!blame_list)
1069                return 1; /* nothing remains for this target */
1070
1071        diff_setup(&diff_opts);
1072        diff_opts.recursive = 1;
1073        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1074
1075        paths[0] = NULL;
1076        diff_tree_setup_paths(paths, &diff_opts);
1077        if (diff_setup_done(&diff_opts) < 0)
1078                die("diff-setup");
1079
1080        /* Try "find copies harder" on new path if requested;
1081         * we do not want to use diffcore_rename() actually to
1082         * match things up; find_copies_harder is set only to
1083         * force diff_tree_sha1() to feed all filepairs to diff_queue,
1084         * and this code needs to be after diff_setup_done(), which
1085         * usually makes find-copies-harder imply copy detection.
1086         */
1087        if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1088            || ((opt & PICKAXE_BLAME_COPY_HARDER)
1089                && (!porigin || strcmp(target->path, porigin->path))))
1090                diff_opts.find_copies_harder = 1;
1091
1092        if (is_null_sha1(target->commit->object.sha1))
1093                do_diff_cache(parent->tree->object.sha1, &diff_opts);
1094        else
1095                diff_tree_sha1(parent->tree->object.sha1,
1096                               target->commit->tree->object.sha1,
1097                               "", &diff_opts);
1098
1099        if (!diff_opts.find_copies_harder)
1100                diffcore_std(&diff_opts);
1101
1102        retval = 0;
1103        while (1) {
1104                int made_progress = 0;
1105
1106                for (i = 0; i < diff_queued_diff.nr; i++) {
1107                        struct diff_filepair *p = diff_queued_diff.queue[i];
1108                        struct origin *norigin;
1109                        mmfile_t file_p;
1110                        struct blame_entry this[3];
1111
1112                        if (!DIFF_FILE_VALID(p->one))
1113                                continue; /* does not exist in parent */
1114                        if (porigin && !strcmp(p->one->path, porigin->path))
1115                                /* find_move already dealt with this path */
1116                                continue;
1117
1118                        norigin = get_origin(sb, parent, p->one->path);
1119                        hashcpy(norigin->blob_sha1, p->one->sha1);
1120                        fill_origin_blob(norigin, &file_p);
1121                        if (!file_p.ptr)
1122                                continue;
1123
1124                        for (j = 0; j < num_ents; j++) {
1125                                find_copy_in_blob(sb, blame_list[j].ent,
1126                                                  norigin, this, &file_p);
1127                                copy_split_if_better(sb, blame_list[j].split,
1128                                                     this);
1129                                decref_split(this);
1130                        }
1131                        origin_decref(norigin);
1132                }
1133
1134                for (j = 0; j < num_ents; j++) {
1135                        struct blame_entry *split = blame_list[j].split;
1136                        if (split[1].suspect &&
1137                            blame_copy_score < ent_score(sb, &split[1])) {
1138                                split_blame(sb, split, blame_list[j].ent);
1139                                made_progress = 1;
1140                        }
1141                        decref_split(split);
1142                }
1143                free(blame_list);
1144
1145                if (!made_progress)
1146                        break;
1147                blame_list = setup_blame_list(sb, target, &num_ents);
1148                if (!blame_list) {
1149                        retval = 1;
1150                        break;
1151                }
1152        }
1153        diff_flush(&diff_opts);
1154
1155        return retval;
1156}
1157
1158/*
1159 * The blobs of origin and porigin exactly match, so everything
1160 * origin is suspected for can be blamed on the parent.
1161 */
1162static void pass_whole_blame(struct scoreboard *sb,
1163                             struct origin *origin, struct origin *porigin)
1164{
1165        struct blame_entry *e;
1166
1167        if (!porigin->file.ptr && origin->file.ptr) {
1168                /* Steal its file */
1169                porigin->file = origin->file;
1170                origin->file.ptr = NULL;
1171        }
1172        for (e = sb->ent; e; e = e->next) {
1173                if (!same_suspect(e->suspect, origin))
1174                        continue;
1175                origin_incref(porigin);
1176                origin_decref(e->suspect);
1177                e->suspect = porigin;
1178        }
1179}
1180
1181#define MAXPARENT 16
1182
1183static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1184{
1185        int i, pass;
1186        struct commit *commit = origin->commit;
1187        struct commit_list *parent;
1188        struct origin *parent_origin[MAXPARENT], *porigin;
1189
1190        memset(parent_origin, 0, sizeof(parent_origin));
1191
1192        /* The first pass looks for unrenamed path to optimize for
1193         * common cases, then we look for renames in the second pass.
1194         */
1195        for (pass = 0; pass < 2; pass++) {
1196                struct origin *(*find)(struct scoreboard *,
1197                                       struct commit *, struct origin *);
1198                find = pass ? find_rename : find_origin;
1199
1200                for (i = 0, parent = commit->parents;
1201                     i < MAXPARENT && parent;
1202                     parent = parent->next, i++) {
1203                        struct commit *p = parent->item;
1204                        int j, same;
1205
1206                        if (parent_origin[i])
1207                                continue;
1208                        if (parse_commit(p))
1209                                continue;
1210                        porigin = find(sb, p, origin);
1211                        if (!porigin)
1212                                continue;
1213                        if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1214                                pass_whole_blame(sb, origin, porigin);
1215                                origin_decref(porigin);
1216                                goto finish;
1217                        }
1218                        for (j = same = 0; j < i; j++)
1219                                if (parent_origin[j] &&
1220                                    !hashcmp(parent_origin[j]->blob_sha1,
1221                                             porigin->blob_sha1)) {
1222                                        same = 1;
1223                                        break;
1224                                }
1225                        if (!same)
1226                                parent_origin[i] = porigin;
1227                        else
1228                                origin_decref(porigin);
1229                }
1230        }
1231
1232        num_commits++;
1233        for (i = 0, parent = commit->parents;
1234             i < MAXPARENT && parent;
1235             parent = parent->next, i++) {
1236                struct origin *porigin = parent_origin[i];
1237                if (!porigin)
1238                        continue;
1239                if (pass_blame_to_parent(sb, origin, porigin))
1240                        goto finish;
1241        }
1242
1243        /*
1244         * Optionally find moves in parents' files.
1245         */
1246        if (opt & PICKAXE_BLAME_MOVE)
1247                for (i = 0, parent = commit->parents;
1248                     i < MAXPARENT && parent;
1249                     parent = parent->next, i++) {
1250                        struct origin *porigin = parent_origin[i];
1251                        if (!porigin)
1252                                continue;
1253                        if (find_move_in_parent(sb, origin, porigin))
1254                                goto finish;
1255                }
1256
1257        /*
1258         * Optionally find copies from parents' files.
1259         */
1260        if (opt & PICKAXE_BLAME_COPY)
1261                for (i = 0, parent = commit->parents;
1262                     i < MAXPARENT && parent;
1263                     parent = parent->next, i++) {
1264                        struct origin *porigin = parent_origin[i];
1265                        if (find_copy_in_parent(sb, origin, parent->item,
1266                                                porigin, opt))
1267                                goto finish;
1268                }
1269
1270 finish:
1271        for (i = 0; i < MAXPARENT; i++)
1272                origin_decref(parent_origin[i]);
1273}
1274
1275/*
1276 * Information on commits, used for output.
1277 */
1278struct commit_info
1279{
1280        const char *author;
1281        const char *author_mail;
1282        unsigned long author_time;
1283        const char *author_tz;
1284
1285        /* filled only when asked for details */
1286        const char *committer;
1287        const char *committer_mail;
1288        unsigned long committer_time;
1289        const char *committer_tz;
1290
1291        const char *summary;
1292};
1293
1294/*
1295 * Parse author/committer line in the commit object buffer
1296 */
1297static void get_ac_line(const char *inbuf, const char *what,
1298                        int bufsz, char *person, const char **mail,
1299                        unsigned long *time, const char **tz)
1300{
1301        int len, tzlen, maillen;
1302        char *tmp, *endp, *timepos;
1303
1304        tmp = strstr(inbuf, what);
1305        if (!tmp)
1306                goto error_out;
1307        tmp += strlen(what);
1308        endp = strchr(tmp, '\n');
1309        if (!endp)
1310                len = strlen(tmp);
1311        else
1312                len = endp - tmp;
1313        if (bufsz <= len) {
1314        error_out:
1315                /* Ugh */
1316                *mail = *tz = "(unknown)";
1317                *time = 0;
1318                return;
1319        }
1320        memcpy(person, tmp, len);
1321
1322        tmp = person;
1323        tmp += len;
1324        *tmp = 0;
1325        while (*tmp != ' ')
1326                tmp--;
1327        *tz = tmp+1;
1328        tzlen = (person+len)-(tmp+1);
1329
1330        *tmp = 0;
1331        while (*tmp != ' ')
1332                tmp--;
1333        *time = strtoul(tmp, NULL, 10);
1334        timepos = tmp;
1335
1336        *tmp = 0;
1337        while (*tmp != ' ')
1338                tmp--;
1339        *mail = tmp + 1;
1340        *tmp = 0;
1341        maillen = timepos - tmp;
1342
1343        if (!mailmap.nr)
1344                return;
1345
1346        /*
1347         * mailmap expansion may make the name longer.
1348         * make room by pushing stuff down.
1349         */
1350        tmp = person + bufsz - (tzlen + 1);
1351        memmove(tmp, *tz, tzlen);
1352        tmp[tzlen] = 0;
1353        *tz = tmp;
1354
1355        tmp = tmp - (maillen + 1);
1356        memmove(tmp, *mail, maillen);
1357        tmp[maillen] = 0;
1358        *mail = tmp;
1359
1360        /*
1361         * Now, convert e-mail using mailmap
1362         */
1363        map_email(&mailmap, tmp + 1, person, tmp-person-1);
1364}
1365
1366static void get_commit_info(struct commit *commit,
1367                            struct commit_info *ret,
1368                            int detailed)
1369{
1370        int len;
1371        char *tmp, *endp;
1372        static char author_buf[1024];
1373        static char committer_buf[1024];
1374        static char summary_buf[1024];
1375
1376        /*
1377         * We've operated without save_commit_buffer, so
1378         * we now need to populate them for output.
1379         */
1380        if (!commit->buffer) {
1381                enum object_type type;
1382                unsigned long size;
1383                commit->buffer =
1384                        read_sha1_file(commit->object.sha1, &type, &size);
1385        }
1386        ret->author = author_buf;
1387        get_ac_line(commit->buffer, "\nauthor ",
1388                    sizeof(author_buf), author_buf, &ret->author_mail,
1389                    &ret->author_time, &ret->author_tz);
1390
1391        if (!detailed)
1392                return;
1393
1394        ret->committer = committer_buf;
1395        get_ac_line(commit->buffer, "\ncommitter ",
1396                    sizeof(committer_buf), committer_buf, &ret->committer_mail,
1397                    &ret->committer_time, &ret->committer_tz);
1398
1399        ret->summary = summary_buf;
1400        tmp = strstr(commit->buffer, "\n\n");
1401        if (!tmp) {
1402        error_out:
1403                sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1404                return;
1405        }
1406        tmp += 2;
1407        endp = strchr(tmp, '\n');
1408        if (!endp)
1409                endp = tmp + strlen(tmp);
1410        len = endp - tmp;
1411        if (len >= sizeof(summary_buf) || len == 0)
1412                goto error_out;
1413        memcpy(summary_buf, tmp, len);
1414        summary_buf[len] = 0;
1415}
1416
1417/*
1418 * To allow LF and other nonportable characters in pathnames,
1419 * they are c-style quoted as needed.
1420 */
1421static void write_filename_info(const char *path)
1422{
1423        printf("filename ");
1424        write_name_quoted(NULL, 0, path, 1, stdout);
1425        putchar('\n');
1426}
1427
1428/*
1429 * The blame_entry is found to be guilty for the range.  Mark it
1430 * as such, and show it in incremental output.
1431 */
1432static void found_guilty_entry(struct blame_entry *ent)
1433{
1434        if (ent->guilty)
1435                return;
1436        ent->guilty = 1;
1437        if (incremental) {
1438                struct origin *suspect = ent->suspect;
1439
1440                printf("%s %d %d %d\n",
1441                       sha1_to_hex(suspect->commit->object.sha1),
1442                       ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1443                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1444                        struct commit_info ci;
1445                        suspect->commit->object.flags |= METAINFO_SHOWN;
1446                        get_commit_info(suspect->commit, &ci, 1);
1447                        printf("author %s\n", ci.author);
1448                        printf("author-mail %s\n", ci.author_mail);
1449                        printf("author-time %lu\n", ci.author_time);
1450                        printf("author-tz %s\n", ci.author_tz);
1451                        printf("committer %s\n", ci.committer);
1452                        printf("committer-mail %s\n", ci.committer_mail);
1453                        printf("committer-time %lu\n", ci.committer_time);
1454                        printf("committer-tz %s\n", ci.committer_tz);
1455                        printf("summary %s\n", ci.summary);
1456                        if (suspect->commit->object.flags & UNINTERESTING)
1457                                printf("boundary\n");
1458                }
1459                write_filename_info(suspect->path);
1460        }
1461}
1462
1463/*
1464 * The main loop -- while the scoreboard has lines whose true origin
1465 * is still unknown, pick one blame_entry, and allow its current
1466 * suspect to pass blames to its parents.
1467 */
1468static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1469{
1470        while (1) {
1471                struct blame_entry *ent;
1472                struct commit *commit;
1473                struct origin *suspect = NULL;
1474
1475                /* find one suspect to break down */
1476                for (ent = sb->ent; !suspect && ent; ent = ent->next)
1477                        if (!ent->guilty)
1478                                suspect = ent->suspect;
1479                if (!suspect)
1480                        return; /* all done */
1481
1482                /*
1483                 * We will use this suspect later in the loop,
1484                 * so hold onto it in the meantime.
1485                 */
1486                origin_incref(suspect);
1487                commit = suspect->commit;
1488                if (!commit->object.parsed)
1489                        parse_commit(commit);
1490                if (!(commit->object.flags & UNINTERESTING) &&
1491                    !(revs->max_age != -1 && commit->date < revs->max_age))
1492                        pass_blame(sb, suspect, opt);
1493                else {
1494                        commit->object.flags |= UNINTERESTING;
1495                        if (commit->object.parsed)
1496                                mark_parents_uninteresting(commit);
1497                }
1498                /* treat root commit as boundary */
1499                if (!commit->parents && !show_root)
1500                        commit->object.flags |= UNINTERESTING;
1501
1502                /* Take responsibility for the remaining entries */
1503                for (ent = sb->ent; ent; ent = ent->next)
1504                        if (same_suspect(ent->suspect, suspect))
1505                                found_guilty_entry(ent);
1506                origin_decref(suspect);
1507
1508                if (DEBUG) /* sanity */
1509                        sanity_check_refcnt(sb);
1510        }
1511}
1512
1513static const char *format_time(unsigned long time, const char *tz_str,
1514                               int show_raw_time)
1515{
1516        static char time_buf[128];
1517        time_t t = time;
1518        int minutes, tz;
1519        struct tm *tm;
1520
1521        if (show_raw_time) {
1522                sprintf(time_buf, "%lu %s", time, tz_str);
1523                return time_buf;
1524        }
1525
1526        tz = atoi(tz_str);
1527        minutes = tz < 0 ? -tz : tz;
1528        minutes = (minutes / 100)*60 + (minutes % 100);
1529        minutes = tz < 0 ? -minutes : minutes;
1530        t = time + minutes * 60;
1531        tm = gmtime(&t);
1532
1533        strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1534        strcat(time_buf, tz_str);
1535        return time_buf;
1536}
1537
1538#define OUTPUT_ANNOTATE_COMPAT  001
1539#define OUTPUT_LONG_OBJECT_NAME 002
1540#define OUTPUT_RAW_TIMESTAMP    004
1541#define OUTPUT_PORCELAIN        010
1542#define OUTPUT_SHOW_NAME        020
1543#define OUTPUT_SHOW_NUMBER      040
1544#define OUTPUT_SHOW_SCORE      0100
1545#define OUTPUT_NO_AUTHOR       0200
1546
1547static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1548{
1549        int cnt;
1550        const char *cp;
1551        struct origin *suspect = ent->suspect;
1552        char hex[41];
1553
1554        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1555        printf("%s%c%d %d %d\n",
1556               hex,
1557               ent->guilty ? ' ' : '*', // purely for debugging
1558               ent->s_lno + 1,
1559               ent->lno + 1,
1560               ent->num_lines);
1561        if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1562                struct commit_info ci;
1563                suspect->commit->object.flags |= METAINFO_SHOWN;
1564                get_commit_info(suspect->commit, &ci, 1);
1565                printf("author %s\n", ci.author);
1566                printf("author-mail %s\n", ci.author_mail);
1567                printf("author-time %lu\n", ci.author_time);
1568                printf("author-tz %s\n", ci.author_tz);
1569                printf("committer %s\n", ci.committer);
1570                printf("committer-mail %s\n", ci.committer_mail);
1571                printf("committer-time %lu\n", ci.committer_time);
1572                printf("committer-tz %s\n", ci.committer_tz);
1573                write_filename_info(suspect->path);
1574                printf("summary %s\n", ci.summary);
1575                if (suspect->commit->object.flags & UNINTERESTING)
1576                        printf("boundary\n");
1577        }
1578        else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1579                write_filename_info(suspect->path);
1580
1581        cp = nth_line(sb, ent->lno);
1582        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1583                char ch;
1584                if (cnt)
1585                        printf("%s %d %d\n", hex,
1586                               ent->s_lno + 1 + cnt,
1587                               ent->lno + 1 + cnt);
1588                putchar('\t');
1589                do {
1590                        ch = *cp++;
1591                        putchar(ch);
1592                } while (ch != '\n' &&
1593                         cp < sb->final_buf + sb->final_buf_size);
1594        }
1595}
1596
1597static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1598{
1599        int cnt;
1600        const char *cp;
1601        struct origin *suspect = ent->suspect;
1602        struct commit_info ci;
1603        char hex[41];
1604        int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1605
1606        get_commit_info(suspect->commit, &ci, 1);
1607        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1608
1609        cp = nth_line(sb, ent->lno);
1610        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1611                char ch;
1612                int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1613
1614                if (suspect->commit->object.flags & UNINTERESTING) {
1615                        if (blank_boundary)
1616                                memset(hex, ' ', length);
1617                        else if (!cmd_is_annotate) {
1618                                length--;
1619                                putchar('^');
1620                        }
1621                }
1622
1623                printf("%.*s", length, hex);
1624                if (opt & OUTPUT_ANNOTATE_COMPAT)
1625                        printf("\t(%10s\t%10s\t%d)", ci.author,
1626                               format_time(ci.author_time, ci.author_tz,
1627                                           show_raw_time),
1628                               ent->lno + 1 + cnt);
1629                else {
1630                        if (opt & OUTPUT_SHOW_SCORE)
1631                                printf(" %*d %02d",
1632                                       max_score_digits, ent->score,
1633                                       ent->suspect->refcnt);
1634                        if (opt & OUTPUT_SHOW_NAME)
1635                                printf(" %-*.*s", longest_file, longest_file,
1636                                       suspect->path);
1637                        if (opt & OUTPUT_SHOW_NUMBER)
1638                                printf(" %*d", max_orig_digits,
1639                                       ent->s_lno + 1 + cnt);
1640
1641                        if (!(opt & OUTPUT_NO_AUTHOR))
1642                                printf(" (%-*.*s %10s",
1643                                       longest_author, longest_author,
1644                                       ci.author,
1645                                       format_time(ci.author_time,
1646                                                   ci.author_tz,
1647                                                   show_raw_time));
1648                        printf(" %*d) ",
1649                               max_digits, ent->lno + 1 + cnt);
1650                }
1651                do {
1652                        ch = *cp++;
1653                        putchar(ch);
1654                } while (ch != '\n' &&
1655                         cp < sb->final_buf + sb->final_buf_size);
1656        }
1657}
1658
1659static void output(struct scoreboard *sb, int option)
1660{
1661        struct blame_entry *ent;
1662
1663        if (option & OUTPUT_PORCELAIN) {
1664                for (ent = sb->ent; ent; ent = ent->next) {
1665                        struct blame_entry *oth;
1666                        struct origin *suspect = ent->suspect;
1667                        struct commit *commit = suspect->commit;
1668                        if (commit->object.flags & MORE_THAN_ONE_PATH)
1669                                continue;
1670                        for (oth = ent->next; oth; oth = oth->next) {
1671                                if ((oth->suspect->commit != commit) ||
1672                                    !strcmp(oth->suspect->path, suspect->path))
1673                                        continue;
1674                                commit->object.flags |= MORE_THAN_ONE_PATH;
1675                                break;
1676                        }
1677                }
1678        }
1679
1680        for (ent = sb->ent; ent; ent = ent->next) {
1681                if (option & OUTPUT_PORCELAIN)
1682                        emit_porcelain(sb, ent);
1683                else {
1684                        emit_other(sb, ent, option);
1685                }
1686        }
1687}
1688
1689/*
1690 * To allow quick access to the contents of nth line in the
1691 * final image, prepare an index in the scoreboard.
1692 */
1693static int prepare_lines(struct scoreboard *sb)
1694{
1695        const char *buf = sb->final_buf;
1696        unsigned long len = sb->final_buf_size;
1697        int num = 0, incomplete = 0, bol = 1;
1698
1699        if (len && buf[len-1] != '\n')
1700                incomplete++; /* incomplete line at the end */
1701        while (len--) {
1702                if (bol) {
1703                        sb->lineno = xrealloc(sb->lineno,
1704                                              sizeof(int* ) * (num + 1));
1705                        sb->lineno[num] = buf - sb->final_buf;
1706                        bol = 0;
1707                }
1708                if (*buf++ == '\n') {
1709                        num++;
1710                        bol = 1;
1711                }
1712        }
1713        sb->lineno = xrealloc(sb->lineno,
1714                              sizeof(int* ) * (num + incomplete + 1));
1715        sb->lineno[num + incomplete] = buf - sb->final_buf;
1716        sb->num_lines = num + incomplete;
1717        return sb->num_lines;
1718}
1719
1720/*
1721 * Add phony grafts for use with -S; this is primarily to
1722 * support git-cvsserver that wants to give a linear history
1723 * to its clients.
1724 */
1725static int read_ancestry(const char *graft_file)
1726{
1727        FILE *fp = fopen(graft_file, "r");
1728        char buf[1024];
1729        if (!fp)
1730                return -1;
1731        while (fgets(buf, sizeof(buf), fp)) {
1732                /* The format is just "Commit Parent1 Parent2 ...\n" */
1733                int len = strlen(buf);
1734                struct commit_graft *graft = read_graft_line(buf, len);
1735                if (graft)
1736                        register_commit_graft(graft, 0);
1737        }
1738        fclose(fp);
1739        return 0;
1740}
1741
1742/*
1743 * How many columns do we need to show line numbers in decimal?
1744 */
1745static int lineno_width(int lines)
1746{
1747        int i, width;
1748
1749        for (width = 1, i = 10; i <= lines + 1; width++)
1750                i *= 10;
1751        return width;
1752}
1753
1754/*
1755 * How many columns do we need to show line numbers, authors,
1756 * and filenames?
1757 */
1758static void find_alignment(struct scoreboard *sb, int *option)
1759{
1760        int longest_src_lines = 0;
1761        int longest_dst_lines = 0;
1762        unsigned largest_score = 0;
1763        struct blame_entry *e;
1764
1765        for (e = sb->ent; e; e = e->next) {
1766                struct origin *suspect = e->suspect;
1767                struct commit_info ci;
1768                int num;
1769
1770                if (strcmp(suspect->path, sb->path))
1771                        *option |= OUTPUT_SHOW_NAME;
1772                num = strlen(suspect->path);
1773                if (longest_file < num)
1774                        longest_file = num;
1775                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1776                        suspect->commit->object.flags |= METAINFO_SHOWN;
1777                        get_commit_info(suspect->commit, &ci, 1);
1778                        num = strlen(ci.author);
1779                        if (longest_author < num)
1780                                longest_author = num;
1781                }
1782                num = e->s_lno + e->num_lines;
1783                if (longest_src_lines < num)
1784                        longest_src_lines = num;
1785                num = e->lno + e->num_lines;
1786                if (longest_dst_lines < num)
1787                        longest_dst_lines = num;
1788                if (largest_score < ent_score(sb, e))
1789                        largest_score = ent_score(sb, e);
1790        }
1791        max_orig_digits = lineno_width(longest_src_lines);
1792        max_digits = lineno_width(longest_dst_lines);
1793        max_score_digits = lineno_width(largest_score);
1794}
1795
1796/*
1797 * For debugging -- origin is refcounted, and this asserts that
1798 * we do not underflow.
1799 */
1800static void sanity_check_refcnt(struct scoreboard *sb)
1801{
1802        int baa = 0;
1803        struct blame_entry *ent;
1804
1805        for (ent = sb->ent; ent; ent = ent->next) {
1806                /* Nobody should have zero or negative refcnt */
1807                if (ent->suspect->refcnt <= 0) {
1808                        fprintf(stderr, "%s in %s has negative refcnt %d\n",
1809                                ent->suspect->path,
1810                                sha1_to_hex(ent->suspect->commit->object.sha1),
1811                                ent->suspect->refcnt);
1812                        baa = 1;
1813                }
1814        }
1815        for (ent = sb->ent; ent; ent = ent->next) {
1816                /* Mark the ones that haven't been checked */
1817                if (0 < ent->suspect->refcnt)
1818                        ent->suspect->refcnt = -ent->suspect->refcnt;
1819        }
1820        for (ent = sb->ent; ent; ent = ent->next) {
1821                /*
1822                 * ... then pick each and see if they have the the
1823                 * correct refcnt.
1824                 */
1825                int found;
1826                struct blame_entry *e;
1827                struct origin *suspect = ent->suspect;
1828
1829                if (0 < suspect->refcnt)
1830                        continue;
1831                suspect->refcnt = -suspect->refcnt; /* Unmark */
1832                for (found = 0, e = sb->ent; e; e = e->next) {
1833                        if (e->suspect != suspect)
1834                                continue;
1835                        found++;
1836                }
1837                if (suspect->refcnt != found) {
1838                        fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1839                                ent->suspect->path,
1840                                sha1_to_hex(ent->suspect->commit->object.sha1),
1841                                ent->suspect->refcnt, found);
1842                        baa = 2;
1843                }
1844        }
1845        if (baa) {
1846                int opt = 0160;
1847                find_alignment(sb, &opt);
1848                output(sb, opt);
1849                die("Baa %d!", baa);
1850        }
1851}
1852
1853/*
1854 * Used for the command line parsing; check if the path exists
1855 * in the working tree.
1856 */
1857static int has_path_in_work_tree(const char *path)
1858{
1859        struct stat st;
1860        return !lstat(path, &st);
1861}
1862
1863static unsigned parse_score(const char *arg)
1864{
1865        char *end;
1866        unsigned long score = strtoul(arg, &end, 10);
1867        if (*end)
1868                return 0;
1869        return score;
1870}
1871
1872static const char *add_prefix(const char *prefix, const char *path)
1873{
1874        if (!prefix || !prefix[0])
1875                return path;
1876        return prefix_path(prefix, strlen(prefix), path);
1877}
1878
1879/*
1880 * Parsing of (comma separated) one item in the -L option
1881 */
1882static const char *parse_loc(const char *spec,
1883                             struct scoreboard *sb, long lno,
1884                             long begin, long *ret)
1885{
1886        char *term;
1887        const char *line;
1888        long num;
1889        int reg_error;
1890        regex_t regexp;
1891        regmatch_t match[1];
1892
1893        /* Allow "-L <something>,+20" to mean starting at <something>
1894         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1895         * <something>.
1896         */
1897        if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1898                num = strtol(spec + 1, &term, 10);
1899                if (term != spec + 1) {
1900                        if (spec[0] == '-')
1901                                num = 0 - num;
1902                        if (0 < num)
1903                                *ret = begin + num - 2;
1904                        else if (!num)
1905                                *ret = begin;
1906                        else
1907                                *ret = begin + num;
1908                        return term;
1909                }
1910                return spec;
1911        }
1912        num = strtol(spec, &term, 10);
1913        if (term != spec) {
1914                *ret = num;
1915                return term;
1916        }
1917        if (spec[0] != '/')
1918                return spec;
1919
1920        /* it could be a regexp of form /.../ */
1921        for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1922                if (*term == '\\')
1923                        term++;
1924        }
1925        if (*term != '/')
1926                return spec;
1927
1928        /* try [spec+1 .. term-1] as regexp */
1929        *term = 0;
1930        begin--; /* input is in human terms */
1931        line = nth_line(sb, begin);
1932
1933        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1934            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1935                const char *cp = line + match[0].rm_so;
1936                const char *nline;
1937
1938                while (begin++ < lno) {
1939                        nline = nth_line(sb, begin);
1940                        if (line <= cp && cp < nline)
1941                                break;
1942                        line = nline;
1943                }
1944                *ret = begin;
1945                regfree(&regexp);
1946                *term++ = '/';
1947                return term;
1948        }
1949        else {
1950                char errbuf[1024];
1951                regerror(reg_error, &regexp, errbuf, 1024);
1952                die("-L parameter '%s': %s", spec + 1, errbuf);
1953        }
1954}
1955
1956/*
1957 * Parsing of -L option
1958 */
1959static void prepare_blame_range(struct scoreboard *sb,
1960                                const char *bottomtop,
1961                                long lno,
1962                                long *bottom, long *top)
1963{
1964        const char *term;
1965
1966        term = parse_loc(bottomtop, sb, lno, 1, bottom);
1967        if (*term == ',') {
1968                term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1969                if (*term)
1970                        usage(blame_usage);
1971        }
1972        if (*term)
1973                usage(blame_usage);
1974}
1975
1976static int git_blame_config(const char *var, const char *value)
1977{
1978        if (!strcmp(var, "blame.showroot")) {
1979                show_root = git_config_bool(var, value);
1980                return 0;
1981        }
1982        if (!strcmp(var, "blame.blankboundary")) {
1983                blank_boundary = git_config_bool(var, value);
1984                return 0;
1985        }
1986        return git_default_config(var, value);
1987}
1988
1989static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
1990{
1991        struct commit *commit;
1992        struct origin *origin;
1993        unsigned char head_sha1[20];
1994        char *buf;
1995        const char *ident;
1996        int fd;
1997        time_t now;
1998        unsigned long fin_size;
1999        int size, len;
2000        struct cache_entry *ce;
2001        unsigned mode;
2002
2003        if (get_sha1("HEAD", head_sha1))
2004                die("No such ref: HEAD");
2005
2006        time(&now);
2007        commit = xcalloc(1, sizeof(*commit));
2008        commit->parents = xcalloc(1, sizeof(*commit->parents));
2009        commit->parents->item = lookup_commit_reference(head_sha1);
2010        commit->object.parsed = 1;
2011        commit->date = now;
2012        commit->object.type = OBJ_COMMIT;
2013
2014        origin = make_origin(commit, path);
2015
2016        if (!contents_from || strcmp("-", contents_from)) {
2017                struct stat st;
2018                const char *read_from;
2019
2020                if (contents_from) {
2021                        if (stat(contents_from, &st) < 0)
2022                                die("Cannot stat %s", contents_from);
2023                        read_from = contents_from;
2024                }
2025                else {
2026                        if (lstat(path, &st) < 0)
2027                                die("Cannot lstat %s", path);
2028                        read_from = path;
2029                }
2030                fin_size = xsize_t(st.st_size);
2031                buf = xmalloc(fin_size+1);
2032                mode = canon_mode(st.st_mode);
2033                switch (st.st_mode & S_IFMT) {
2034                case S_IFREG:
2035                        fd = open(read_from, O_RDONLY);
2036                        if (fd < 0)
2037                                die("cannot open %s", read_from);
2038                        if (read_in_full(fd, buf, fin_size) != fin_size)
2039                                die("cannot read %s", read_from);
2040                        break;
2041                case S_IFLNK:
2042                        if (readlink(read_from, buf, fin_size+1) != fin_size)
2043                                die("cannot readlink %s", read_from);
2044                        break;
2045                default:
2046                        die("unsupported file type %s", read_from);
2047                }
2048        }
2049        else {
2050                /* Reading from stdin */
2051                contents_from = "standard input";
2052                buf = NULL;
2053                fin_size = 0;
2054                mode = 0;
2055                while (1) {
2056                        ssize_t cnt = 8192;
2057                        buf = xrealloc(buf, fin_size + cnt);
2058                        cnt = xread(0, buf + fin_size, cnt);
2059                        if (cnt < 0)
2060                                die("read error %s from stdin",
2061                                    strerror(errno));
2062                        if (!cnt)
2063                                break;
2064                        fin_size += cnt;
2065                }
2066                buf = xrealloc(buf, fin_size + 1);
2067        }
2068        buf[fin_size] = 0;
2069        origin->file.ptr = buf;
2070        origin->file.size = fin_size;
2071        pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1);
2072        commit->util = origin;
2073
2074        /*
2075         * Read the current index, replace the path entry with
2076         * origin->blob_sha1 without mucking with its mode or type
2077         * bits; we are not going to write this index out -- we just
2078         * want to run "diff-index --cached".
2079         */
2080        discard_cache();
2081        read_cache();
2082
2083        len = strlen(path);
2084        if (!mode) {
2085                int pos = cache_name_pos(path, len);
2086                if (0 <= pos)
2087                        mode = ntohl(active_cache[pos]->ce_mode);
2088                else
2089                        /* Let's not bother reading from HEAD tree */
2090                        mode = S_IFREG | 0644;
2091        }
2092        size = cache_entry_size(len);
2093        ce = xcalloc(1, size);
2094        hashcpy(ce->sha1, origin->blob_sha1);
2095        memcpy(ce->name, path, len);
2096        ce->ce_flags = create_ce_flags(len, 0);
2097        ce->ce_mode = create_ce_mode(mode);
2098        add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2099
2100        /*
2101         * We are not going to write this out, so this does not matter
2102         * right now, but someday we might optimize diff-index --cached
2103         * with cache-tree information.
2104         */
2105        cache_tree_invalidate_path(active_cache_tree, path);
2106
2107        commit->buffer = xmalloc(400);
2108        ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2109        snprintf(commit->buffer, 400,
2110                "tree 0000000000000000000000000000000000000000\n"
2111                "parent %s\n"
2112                "author %s\n"
2113                "committer %s\n\n"
2114                "Version of %s from %s\n",
2115                sha1_to_hex(head_sha1),
2116                ident, ident, path, contents_from ? contents_from : path);
2117        return commit;
2118}
2119
2120int cmd_blame(int argc, const char **argv, const char *prefix)
2121{
2122        struct rev_info revs;
2123        const char *path;
2124        struct scoreboard sb;
2125        struct origin *o;
2126        struct blame_entry *ent;
2127        int i, seen_dashdash, unk, opt;
2128        long bottom, top, lno;
2129        int output_option = 0;
2130        int show_stats = 0;
2131        const char *revs_file = NULL;
2132        const char *final_commit_name = NULL;
2133        enum object_type type;
2134        const char *bottomtop = NULL;
2135        const char *contents_from = NULL;
2136
2137        cmd_is_annotate = !strcmp(argv[0], "annotate");
2138
2139        git_config(git_blame_config);
2140        save_commit_buffer = 0;
2141
2142        opt = 0;
2143        seen_dashdash = 0;
2144        for (unk = i = 1; i < argc; i++) {
2145                const char *arg = argv[i];
2146                if (*arg != '-')
2147                        break;
2148                else if (!strcmp("-b", arg))
2149                        blank_boundary = 1;
2150                else if (!strcmp("--root", arg))
2151                        show_root = 1;
2152                else if (!strcmp(arg, "--show-stats"))
2153                        show_stats = 1;
2154                else if (!strcmp("-c", arg))
2155                        output_option |= OUTPUT_ANNOTATE_COMPAT;
2156                else if (!strcmp("-t", arg))
2157                        output_option |= OUTPUT_RAW_TIMESTAMP;
2158                else if (!strcmp("-l", arg))
2159                        output_option |= OUTPUT_LONG_OBJECT_NAME;
2160                else if (!strcmp("-s", arg))
2161                        output_option |= OUTPUT_NO_AUTHOR;
2162                else if (!strcmp("-S", arg) && ++i < argc)
2163                        revs_file = argv[i];
2164                else if (!prefixcmp(arg, "-M")) {
2165                        opt |= PICKAXE_BLAME_MOVE;
2166                        blame_move_score = parse_score(arg+2);
2167                }
2168                else if (!prefixcmp(arg, "-C")) {
2169                        /*
2170                         * -C enables copy from removed files;
2171                         * -C -C enables copy from existing files, but only
2172                         *       when blaming a new file;
2173                         * -C -C -C enables copy from existing files for
2174                         *          everybody
2175                         */
2176                        if (opt & PICKAXE_BLAME_COPY_HARDER)
2177                                opt |= PICKAXE_BLAME_COPY_HARDEST;
2178                        if (opt & PICKAXE_BLAME_COPY)
2179                                opt |= PICKAXE_BLAME_COPY_HARDER;
2180                        opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2181                        blame_copy_score = parse_score(arg+2);
2182                }
2183                else if (!prefixcmp(arg, "-L")) {
2184                        if (!arg[2]) {
2185                                if (++i >= argc)
2186                                        usage(blame_usage);
2187                                arg = argv[i];
2188                        }
2189                        else
2190                                arg += 2;
2191                        if (bottomtop)
2192                                die("More than one '-L n,m' option given");
2193                        bottomtop = arg;
2194                }
2195                else if (!strcmp("--contents", arg)) {
2196                        if (++i >= argc)
2197                                usage(blame_usage);
2198                        contents_from = argv[i];
2199                }
2200                else if (!strcmp("--incremental", arg))
2201                        incremental = 1;
2202                else if (!strcmp("--score-debug", arg))
2203                        output_option |= OUTPUT_SHOW_SCORE;
2204                else if (!strcmp("-f", arg) ||
2205                         !strcmp("--show-name", arg))
2206                        output_option |= OUTPUT_SHOW_NAME;
2207                else if (!strcmp("-n", arg) ||
2208                         !strcmp("--show-number", arg))
2209                        output_option |= OUTPUT_SHOW_NUMBER;
2210                else if (!strcmp("-p", arg) ||
2211                         !strcmp("--porcelain", arg))
2212                        output_option |= OUTPUT_PORCELAIN;
2213                else if (!strcmp("--", arg)) {
2214                        seen_dashdash = 1;
2215                        i++;
2216                        break;
2217                }
2218                else
2219                        argv[unk++] = arg;
2220        }
2221
2222        if (!incremental)
2223                setup_pager();
2224
2225        if (!blame_move_score)
2226                blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2227        if (!blame_copy_score)
2228                blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2229
2230        /*
2231         * We have collected options unknown to us in argv[1..unk]
2232         * which are to be passed to revision machinery if we are
2233         * going to do the "bottom" processing.
2234         *
2235         * The remaining are:
2236         *
2237         * (1) if seen_dashdash, its either
2238         *     "-options -- <path>" or
2239         *     "-options -- <path> <rev>".
2240         *     but the latter is allowed only if there is no
2241         *     options that we passed to revision machinery.
2242         *
2243         * (2) otherwise, we may have "--" somewhere later and
2244         *     might be looking at the first one of multiple 'rev'
2245         *     parameters (e.g. " master ^next ^maint -- path").
2246         *     See if there is a dashdash first, and give the
2247         *     arguments before that to revision machinery.
2248         *     After that there must be one 'path'.
2249         *
2250         * (3) otherwise, its one of the three:
2251         *     "-options <path> <rev>"
2252         *     "-options <rev> <path>"
2253         *     "-options <path>"
2254         *     but again the first one is allowed only if
2255         *     there is no options that we passed to revision
2256         *     machinery.
2257         */
2258
2259        if (seen_dashdash) {
2260                /* (1) */
2261                if (argc <= i)
2262                        usage(blame_usage);
2263                path = add_prefix(prefix, argv[i]);
2264                if (i + 1 == argc - 1) {
2265                        if (unk != 1)
2266                                usage(blame_usage);
2267                        argv[unk++] = argv[i + 1];
2268                }
2269                else if (i + 1 != argc)
2270                        /* garbage at end */
2271                        usage(blame_usage);
2272        }
2273        else {
2274                int j;
2275                for (j = i; !seen_dashdash && j < argc; j++)
2276                        if (!strcmp(argv[j], "--"))
2277                                seen_dashdash = j;
2278                if (seen_dashdash) {
2279                        /* (2) */
2280                        if (seen_dashdash + 1 != argc - 1)
2281                                usage(blame_usage);
2282                        path = add_prefix(prefix, argv[seen_dashdash + 1]);
2283                        for (j = i; j < seen_dashdash; j++)
2284                                argv[unk++] = argv[j];
2285                }
2286                else {
2287                        /* (3) */
2288                        if (argc <= i)
2289                                usage(blame_usage);
2290                        path = add_prefix(prefix, argv[i]);
2291                        if (i + 1 == argc - 1) {
2292                                final_commit_name = argv[i + 1];
2293
2294                                /* if (unk == 1) we could be getting
2295                                 * old-style
2296                                 */
2297                                if (unk == 1 && !has_path_in_work_tree(path)) {
2298                                        path = add_prefix(prefix, argv[i + 1]);
2299                                        final_commit_name = argv[i];
2300                                }
2301                        }
2302                        else if (i != argc - 1)
2303                                usage(blame_usage); /* garbage at end */
2304
2305                        if (!has_path_in_work_tree(path))
2306                                die("cannot stat path %s: %s",
2307                                    path, strerror(errno));
2308                }
2309        }
2310
2311        if (final_commit_name)
2312                argv[unk++] = final_commit_name;
2313
2314        /*
2315         * Now we got rev and path.  We do not want the path pruning
2316         * but we may want "bottom" processing.
2317         */
2318        argv[unk++] = "--"; /* terminate the rev name */
2319        argv[unk] = NULL;
2320
2321        init_revisions(&revs, NULL);
2322        setup_revisions(unk, argv, &revs, NULL);
2323        memset(&sb, 0, sizeof(sb));
2324
2325        /*
2326         * There must be one and only one positive commit in the
2327         * revs->pending array.
2328         */
2329        for (i = 0; i < revs.pending.nr; i++) {
2330                struct object *obj = revs.pending.objects[i].item;
2331                if (obj->flags & UNINTERESTING)
2332                        continue;
2333                while (obj->type == OBJ_TAG)
2334                        obj = deref_tag(obj, NULL, 0);
2335                if (obj->type != OBJ_COMMIT)
2336                        die("Non commit %s?",
2337                            revs.pending.objects[i].name);
2338                if (sb.final)
2339                        die("More than one commit to dig from %s and %s?",
2340                            revs.pending.objects[i].name,
2341                            final_commit_name);
2342                sb.final = (struct commit *) obj;
2343                final_commit_name = revs.pending.objects[i].name;
2344        }
2345
2346        if (!sb.final) {
2347                /*
2348                 * "--not A B -- path" without anything positive;
2349                 * do not default to HEAD, but use the working tree
2350                 * or "--contents".
2351                 */
2352                sb.final = fake_working_tree_commit(path, contents_from);
2353                add_pending_object(&revs, &(sb.final->object), ":");
2354        }
2355        else if (contents_from)
2356                die("Cannot use --contents with final commit object name");
2357
2358        /*
2359         * If we have bottom, this will mark the ancestors of the
2360         * bottom commits we would reach while traversing as
2361         * uninteresting.
2362         */
2363        prepare_revision_walk(&revs);
2364
2365        if (is_null_sha1(sb.final->object.sha1)) {
2366                char *buf;
2367                o = sb.final->util;
2368                buf = xmalloc(o->file.size + 1);
2369                memcpy(buf, o->file.ptr, o->file.size + 1);
2370                sb.final_buf = buf;
2371                sb.final_buf_size = o->file.size;
2372        }
2373        else {
2374                o = get_origin(&sb, sb.final, path);
2375                if (fill_blob_sha1(o))
2376                        die("no such path %s in %s", path, final_commit_name);
2377
2378                sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2379                                              &sb.final_buf_size);
2380        }
2381        num_read_blob++;
2382        lno = prepare_lines(&sb);
2383
2384        bottom = top = 0;
2385        if (bottomtop)
2386                prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2387        if (bottom && top && top < bottom) {
2388                long tmp;
2389                tmp = top; top = bottom; bottom = tmp;
2390        }
2391        if (bottom < 1)
2392                bottom = 1;
2393        if (top < 1)
2394                top = lno;
2395        bottom--;
2396        if (lno < top)
2397                die("file %s has only %lu lines", path, lno);
2398
2399        ent = xcalloc(1, sizeof(*ent));
2400        ent->lno = bottom;
2401        ent->num_lines = top - bottom;
2402        ent->suspect = o;
2403        ent->s_lno = bottom;
2404
2405        sb.ent = ent;
2406        sb.path = path;
2407
2408        if (revs_file && read_ancestry(revs_file))
2409                die("reading graft file %s failed: %s",
2410                    revs_file, strerror(errno));
2411
2412        read_mailmap(&mailmap, ".mailmap", NULL);
2413
2414        assign_blame(&sb, &revs, opt);
2415
2416        if (incremental)
2417                return 0;
2418
2419        coalesce(&sb);
2420
2421        if (!(output_option & OUTPUT_PORCELAIN))
2422                find_alignment(&sb, &output_option);
2423
2424        output(&sb, output_option);
2425        free((void *)sb.final_buf);
2426        for (ent = sb.ent; ent; ) {
2427                struct blame_entry *e = ent->next;
2428                free(ent);
2429                ent = e;
2430        }
2431
2432        if (show_stats) {
2433                printf("num read blob: %d\n", num_read_blob);
2434                printf("num get patch: %d\n", num_get_patch);
2435                printf("num commits: %d\n", num_commits);
2436        }
2437        return 0;
2438}