builtin-blame.con commit Merge branch 'master' into js/merge (8042ed1)
   1/*
   2 * Pickaxe
   3 *
   4 * Copyright (c) 2006, Junio C Hamano
   5 */
   6
   7#include "cache.h"
   8#include "builtin.h"
   9#include "blob.h"
  10#include "commit.h"
  11#include "tag.h"
  12#include "tree-walk.h"
  13#include "diff.h"
  14#include "diffcore.h"
  15#include "revision.h"
  16#include "xdiff-interface.h"
  17
  18#include <time.h>
  19#include <sys/time.h>
  20#include <regex.h>
  21
  22static char blame_usage[] =
  23"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
  24"  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
  25"  -l, --long          Show long commit SHA1 (Default: off)\n"
  26"  -t, --time          Show raw timestamp (Default: off)\n"
  27"  -f, --show-name     Show original filename (Default: auto)\n"
  28"  -n, --show-number   Show original linenumber (Default: off)\n"
  29"  -p, --porcelain     Show in a format designed for machine consumption\n"
  30"  -L n,m              Process only line range n,m, counting from 1\n"
  31"  -M, -C              Find line movements within and across files\n"
  32"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
  33
  34static int longest_file;
  35static int longest_author;
  36static int max_orig_digits;
  37static int max_digits;
  38static int max_score_digits;
  39
  40#ifndef DEBUG
  41#define DEBUG 0
  42#endif
  43
  44/* stats */
  45static int num_read_blob;
  46static int num_get_patch;
  47static int num_commits;
  48
  49#define PICKAXE_BLAME_MOVE              01
  50#define PICKAXE_BLAME_COPY              02
  51#define PICKAXE_BLAME_COPY_HARDER       04
  52
  53/*
  54 * blame for a blame_entry with score lower than these thresholds
  55 * is not passed to the parent using move/copy logic.
  56 */
  57static unsigned blame_move_score;
  58static unsigned blame_copy_score;
  59#define BLAME_DEFAULT_MOVE_SCORE        20
  60#define BLAME_DEFAULT_COPY_SCORE        40
  61
  62/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
  63#define METAINFO_SHOWN          (1u<<12)
  64#define MORE_THAN_ONE_PATH      (1u<<13)
  65
  66/*
  67 * One blob in a commit that is being suspected
  68 */
  69struct origin {
  70        int refcnt;
  71        struct commit *commit;
  72        mmfile_t file;
  73        unsigned char blob_sha1[20];
  74        char path[FLEX_ARRAY];
  75};
  76
  77static char *fill_origin_blob(struct origin *o, mmfile_t *file)
  78{
  79        if (!o->file.ptr) {
  80                char type[10];
  81                num_read_blob++;
  82                file->ptr = read_sha1_file(o->blob_sha1, type,
  83                                           (unsigned long *)(&(file->size)));
  84                o->file = *file;
  85        }
  86        else
  87                *file = o->file;
  88        return file->ptr;
  89}
  90
  91static inline struct origin *origin_incref(struct origin *o)
  92{
  93        if (o)
  94                o->refcnt++;
  95        return o;
  96}
  97
  98static void origin_decref(struct origin *o)
  99{
 100        if (o && --o->refcnt <= 0) {
 101                if (o->file.ptr)
 102                        free(o->file.ptr);
 103                memset(o, 0, sizeof(*o));
 104                free(o);
 105        }
 106}
 107
 108struct blame_entry {
 109        struct blame_entry *prev;
 110        struct blame_entry *next;
 111
 112        /* the first line of this group in the final image;
 113         * internally all line numbers are 0 based.
 114         */
 115        int lno;
 116
 117        /* how many lines this group has */
 118        int num_lines;
 119
 120        /* the commit that introduced this group into the final image */
 121        struct origin *suspect;
 122
 123        /* true if the suspect is truly guilty; false while we have not
 124         * checked if the group came from one of its parents.
 125         */
 126        char guilty;
 127
 128        /* the line number of the first line of this group in the
 129         * suspect's file; internally all line numbers are 0 based.
 130         */
 131        int s_lno;
 132
 133        /* how significant this entry is -- cached to avoid
 134         * scanning the lines over and over
 135         */
 136        unsigned score;
 137};
 138
 139struct scoreboard {
 140        /* the final commit (i.e. where we started digging from) */
 141        struct commit *final;
 142
 143        const char *path;
 144
 145        /* the contents in the final; pointed into by buf pointers of
 146         * blame_entries
 147         */
 148        const char *final_buf;
 149        unsigned long final_buf_size;
 150
 151        /* linked list of blames */
 152        struct blame_entry *ent;
 153
 154        /* look-up a line in the final buffer */
 155        int num_lines;
 156        int *lineno;
 157};
 158
 159static int cmp_suspect(struct origin *a, struct origin *b)
 160{
 161        int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 162        if (cmp)
 163                return cmp;
 164        return strcmp(a->path, b->path);
 165}
 166
 167#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
 168
 169static void sanity_check_refcnt(struct scoreboard *);
 170
 171static void coalesce(struct scoreboard *sb)
 172{
 173        struct blame_entry *ent, *next;
 174
 175        for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 176                if (!cmp_suspect(ent->suspect, next->suspect) &&
 177                    ent->guilty == next->guilty &&
 178                    ent->s_lno + ent->num_lines == next->s_lno) {
 179                        ent->num_lines += next->num_lines;
 180                        ent->next = next->next;
 181                        if (ent->next)
 182                                ent->next->prev = ent;
 183                        origin_decref(next->suspect);
 184                        free(next);
 185                        ent->score = 0;
 186                        next = ent; /* again */
 187                }
 188        }
 189
 190        if (DEBUG) /* sanity */
 191                sanity_check_refcnt(sb);
 192}
 193
 194static struct origin *make_origin(struct commit *commit, const char *path)
 195{
 196        struct origin *o;
 197        o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
 198        o->commit = commit;
 199        o->refcnt = 1;
 200        strcpy(o->path, path);
 201        return o;
 202}
 203
 204static struct origin *get_origin(struct scoreboard *sb,
 205                                 struct commit *commit,
 206                                 const char *path)
 207{
 208        struct blame_entry *e;
 209
 210        for (e = sb->ent; e; e = e->next) {
 211                if (e->suspect->commit == commit &&
 212                    !strcmp(e->suspect->path, path))
 213                        return origin_incref(e->suspect);
 214        }
 215        return make_origin(commit, path);
 216}
 217
 218static int fill_blob_sha1(struct origin *origin)
 219{
 220        unsigned mode;
 221        char type[10];
 222
 223        if (!is_null_sha1(origin->blob_sha1))
 224                return 0;
 225        if (get_tree_entry(origin->commit->object.sha1,
 226                           origin->path,
 227                           origin->blob_sha1, &mode))
 228                goto error_out;
 229        if (sha1_object_info(origin->blob_sha1, type, NULL) ||
 230            strcmp(type, blob_type))
 231                goto error_out;
 232        return 0;
 233 error_out:
 234        hashclr(origin->blob_sha1);
 235        return -1;
 236}
 237
 238static struct origin *find_origin(struct scoreboard *sb,
 239                                  struct commit *parent,
 240                                  struct origin *origin)
 241{
 242        struct origin *porigin = NULL;
 243        struct diff_options diff_opts;
 244        const char *paths[2];
 245
 246        if (parent->util) {
 247                /* This is a freestanding copy of origin and not
 248                 * refcounted.
 249                 */
 250                struct origin *cached = parent->util;
 251                if (!strcmp(cached->path, origin->path)) {
 252                        porigin = get_origin(sb, parent, cached->path);
 253                        if (porigin->refcnt == 1)
 254                                hashcpy(porigin->blob_sha1, cached->blob_sha1);
 255                        return porigin;
 256                }
 257                /* otherwise it was not very useful; free it */
 258                free(parent->util);
 259                parent->util = NULL;
 260        }
 261
 262        /* See if the origin->path is different between parent
 263         * and origin first.  Most of the time they are the
 264         * same and diff-tree is fairly efficient about this.
 265         */
 266        diff_setup(&diff_opts);
 267        diff_opts.recursive = 1;
 268        diff_opts.detect_rename = 0;
 269        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 270        paths[0] = origin->path;
 271        paths[1] = NULL;
 272
 273        diff_tree_setup_paths(paths, &diff_opts);
 274        if (diff_setup_done(&diff_opts) < 0)
 275                die("diff-setup");
 276        diff_tree_sha1(parent->tree->object.sha1,
 277                       origin->commit->tree->object.sha1,
 278                       "", &diff_opts);
 279        diffcore_std(&diff_opts);
 280
 281        /* It is either one entry that says "modified", or "created",
 282         * or nothing.
 283         */
 284        if (!diff_queued_diff.nr) {
 285                /* The path is the same as parent */
 286                porigin = get_origin(sb, parent, origin->path);
 287                hashcpy(porigin->blob_sha1, origin->blob_sha1);
 288        }
 289        else if (diff_queued_diff.nr != 1)
 290                die("internal error in blame::find_origin");
 291        else {
 292                struct diff_filepair *p = diff_queued_diff.queue[0];
 293                switch (p->status) {
 294                default:
 295                        die("internal error in blame::find_origin (%c)",
 296                            p->status);
 297                case 'M':
 298                        porigin = get_origin(sb, parent, origin->path);
 299                        hashcpy(porigin->blob_sha1, p->one->sha1);
 300                        break;
 301                case 'A':
 302                case 'T':
 303                        /* Did not exist in parent, or type changed */
 304                        break;
 305                }
 306        }
 307        diff_flush(&diff_opts);
 308        if (porigin) {
 309                struct origin *cached;
 310                cached = make_origin(porigin->commit, porigin->path);
 311                hashcpy(cached->blob_sha1, porigin->blob_sha1);
 312                parent->util = cached;
 313        }
 314        return porigin;
 315}
 316
 317static struct origin *find_rename(struct scoreboard *sb,
 318                                  struct commit *parent,
 319                                  struct origin *origin)
 320{
 321        struct origin *porigin = NULL;
 322        struct diff_options diff_opts;
 323        int i;
 324        const char *paths[2];
 325
 326        diff_setup(&diff_opts);
 327        diff_opts.recursive = 1;
 328        diff_opts.detect_rename = DIFF_DETECT_RENAME;
 329        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 330        diff_opts.single_follow = origin->path;
 331        paths[0] = NULL;
 332        diff_tree_setup_paths(paths, &diff_opts);
 333        if (diff_setup_done(&diff_opts) < 0)
 334                die("diff-setup");
 335        diff_tree_sha1(parent->tree->object.sha1,
 336                       origin->commit->tree->object.sha1,
 337                       "", &diff_opts);
 338        diffcore_std(&diff_opts);
 339
 340        for (i = 0; i < diff_queued_diff.nr; i++) {
 341                struct diff_filepair *p = diff_queued_diff.queue[i];
 342                if ((p->status == 'R' || p->status == 'C') &&
 343                    !strcmp(p->two->path, origin->path)) {
 344                        porigin = get_origin(sb, parent, p->one->path);
 345                        hashcpy(porigin->blob_sha1, p->one->sha1);
 346                        break;
 347                }
 348        }
 349        diff_flush(&diff_opts);
 350        return porigin;
 351}
 352
 353struct chunk {
 354        /* line number in postimage; up to but not including this
 355         * line is the same as preimage
 356         */
 357        int same;
 358
 359        /* preimage line number after this chunk */
 360        int p_next;
 361
 362        /* postimage line number after this chunk */
 363        int t_next;
 364};
 365
 366struct patch {
 367        struct chunk *chunks;
 368        int num;
 369};
 370
 371struct blame_diff_state {
 372        struct xdiff_emit_state xm;
 373        struct patch *ret;
 374        unsigned hunk_post_context;
 375        unsigned hunk_in_pre_context : 1;
 376};
 377
 378static void process_u_diff(void *state_, char *line, unsigned long len)
 379{
 380        struct blame_diff_state *state = state_;
 381        struct chunk *chunk;
 382        int off1, off2, len1, len2, num;
 383
 384        num = state->ret->num;
 385        if (len < 4 || line[0] != '@' || line[1] != '@') {
 386                if (state->hunk_in_pre_context && line[0] == ' ')
 387                        state->ret->chunks[num - 1].same++;
 388                else {
 389                        state->hunk_in_pre_context = 0;
 390                        if (line[0] == ' ')
 391                                state->hunk_post_context++;
 392                        else
 393                                state->hunk_post_context = 0;
 394                }
 395                return;
 396        }
 397
 398        if (num && state->hunk_post_context) {
 399                chunk = &state->ret->chunks[num - 1];
 400                chunk->p_next -= state->hunk_post_context;
 401                chunk->t_next -= state->hunk_post_context;
 402        }
 403        state->ret->num = ++num;
 404        state->ret->chunks = xrealloc(state->ret->chunks,
 405                                      sizeof(struct chunk) * num);
 406        chunk = &state->ret->chunks[num - 1];
 407        if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
 408                state->ret->num--;
 409                return;
 410        }
 411
 412        /* Line numbers in patch output are one based. */
 413        off1--;
 414        off2--;
 415
 416        chunk->same = len2 ? off2 : (off2 + 1);
 417
 418        chunk->p_next = off1 + (len1 ? len1 : 1);
 419        chunk->t_next = chunk->same + len2;
 420        state->hunk_in_pre_context = 1;
 421        state->hunk_post_context = 0;
 422}
 423
 424static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
 425                                    int context)
 426{
 427        struct blame_diff_state state;
 428        xpparam_t xpp;
 429        xdemitconf_t xecfg;
 430        xdemitcb_t ecb;
 431
 432        xpp.flags = XDF_NEED_MINIMAL;
 433        xecfg.ctxlen = context;
 434        xecfg.flags = 0;
 435        ecb.outf = xdiff_outf;
 436        ecb.priv = &state;
 437        memset(&state, 0, sizeof(state));
 438        state.xm.consume = process_u_diff;
 439        state.ret = xmalloc(sizeof(struct patch));
 440        state.ret->chunks = NULL;
 441        state.ret->num = 0;
 442
 443        xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
 444
 445        if (state.ret->num) {
 446                struct chunk *chunk;
 447                chunk = &state.ret->chunks[state.ret->num - 1];
 448                chunk->p_next -= state.hunk_post_context;
 449                chunk->t_next -= state.hunk_post_context;
 450        }
 451        return state.ret;
 452}
 453
 454static struct patch *get_patch(struct origin *parent, struct origin *origin)
 455{
 456        mmfile_t file_p, file_o;
 457        struct patch *patch;
 458
 459        fill_origin_blob(parent, &file_p);
 460        fill_origin_blob(origin, &file_o);
 461        if (!file_p.ptr || !file_o.ptr)
 462                return NULL;
 463        patch = compare_buffer(&file_p, &file_o, 0);
 464        num_get_patch++;
 465        return patch;
 466}
 467
 468static void free_patch(struct patch *p)
 469{
 470        free(p->chunks);
 471        free(p);
 472}
 473
 474static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
 475{
 476        struct blame_entry *ent, *prev = NULL;
 477
 478        origin_incref(e->suspect);
 479
 480        for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
 481                prev = ent;
 482
 483        /* prev, if not NULL, is the last one that is below e */
 484        e->prev = prev;
 485        if (prev) {
 486                e->next = prev->next;
 487                prev->next = e;
 488        }
 489        else {
 490                e->next = sb->ent;
 491                sb->ent = e;
 492        }
 493        if (e->next)
 494                e->next->prev = e;
 495}
 496
 497static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
 498{
 499        struct blame_entry *p, *n;
 500
 501        p = dst->prev;
 502        n = dst->next;
 503        origin_incref(src->suspect);
 504        origin_decref(dst->suspect);
 505        memcpy(dst, src, sizeof(*src));
 506        dst->prev = p;
 507        dst->next = n;
 508        dst->score = 0;
 509}
 510
 511static const char *nth_line(struct scoreboard *sb, int lno)
 512{
 513        return sb->final_buf + sb->lineno[lno];
 514}
 515
 516static void split_overlap(struct blame_entry *split,
 517                          struct blame_entry *e,
 518                          int tlno, int plno, int same,
 519                          struct origin *parent)
 520{
 521        /* it is known that lines between tlno to same came from
 522         * parent, and e has an overlap with that range.  it also is
 523         * known that parent's line plno corresponds to e's line tlno.
 524         *
 525         *                <---- e ----->
 526         *                   <------>
 527         *                   <------------>
 528         *             <------------>
 529         *             <------------------>
 530         *
 531         * Potentially we need to split e into three parts; before
 532         * this chunk, the chunk to be blamed for parent, and after
 533         * that portion.
 534         */
 535        int chunk_end_lno;
 536        memset(split, 0, sizeof(struct blame_entry [3]));
 537
 538        if (e->s_lno < tlno) {
 539                /* there is a pre-chunk part not blamed on parent */
 540                split[0].suspect = origin_incref(e->suspect);
 541                split[0].lno = e->lno;
 542                split[0].s_lno = e->s_lno;
 543                split[0].num_lines = tlno - e->s_lno;
 544                split[1].lno = e->lno + tlno - e->s_lno;
 545                split[1].s_lno = plno;
 546        }
 547        else {
 548                split[1].lno = e->lno;
 549                split[1].s_lno = plno + (e->s_lno - tlno);
 550        }
 551
 552        if (same < e->s_lno + e->num_lines) {
 553                /* there is a post-chunk part not blamed on parent */
 554                split[2].suspect = origin_incref(e->suspect);
 555                split[2].lno = e->lno + (same - e->s_lno);
 556                split[2].s_lno = e->s_lno + (same - e->s_lno);
 557                split[2].num_lines = e->s_lno + e->num_lines - same;
 558                chunk_end_lno = split[2].lno;
 559        }
 560        else
 561                chunk_end_lno = e->lno + e->num_lines;
 562        split[1].num_lines = chunk_end_lno - split[1].lno;
 563
 564        if (split[1].num_lines < 1)
 565                return;
 566        split[1].suspect = origin_incref(parent);
 567}
 568
 569static void split_blame(struct scoreboard *sb,
 570                        struct blame_entry *split,
 571                        struct blame_entry *e)
 572{
 573        struct blame_entry *new_entry;
 574
 575        if (split[0].suspect && split[2].suspect) {
 576                /* we need to split e into two and add another for parent */
 577                dup_entry(e, &split[0]);
 578
 579                new_entry = xmalloc(sizeof(*new_entry));
 580                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 581                add_blame_entry(sb, new_entry);
 582
 583                new_entry = xmalloc(sizeof(*new_entry));
 584                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 585                add_blame_entry(sb, new_entry);
 586        }
 587        else if (!split[0].suspect && !split[2].suspect)
 588                /* parent covers the entire area */
 589                dup_entry(e, &split[1]);
 590        else if (split[0].suspect) {
 591                dup_entry(e, &split[0]);
 592
 593                new_entry = xmalloc(sizeof(*new_entry));
 594                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 595                add_blame_entry(sb, new_entry);
 596        }
 597        else {
 598                dup_entry(e, &split[1]);
 599
 600                new_entry = xmalloc(sizeof(*new_entry));
 601                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 602                add_blame_entry(sb, new_entry);
 603        }
 604
 605        if (DEBUG) { /* sanity */
 606                struct blame_entry *ent;
 607                int lno = sb->ent->lno, corrupt = 0;
 608
 609                for (ent = sb->ent; ent; ent = ent->next) {
 610                        if (lno != ent->lno)
 611                                corrupt = 1;
 612                        if (ent->s_lno < 0)
 613                                corrupt = 1;
 614                        lno += ent->num_lines;
 615                }
 616                if (corrupt) {
 617                        lno = sb->ent->lno;
 618                        for (ent = sb->ent; ent; ent = ent->next) {
 619                                printf("L %8d l %8d n %8d\n",
 620                                       lno, ent->lno, ent->num_lines);
 621                                lno = ent->lno + ent->num_lines;
 622                        }
 623                        die("oops");
 624                }
 625        }
 626}
 627
 628static void decref_split(struct blame_entry *split)
 629{
 630        int i;
 631
 632        for (i = 0; i < 3; i++)
 633                origin_decref(split[i].suspect);
 634}
 635
 636static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
 637                          int tlno, int plno, int same,
 638                          struct origin *parent)
 639{
 640        struct blame_entry split[3];
 641
 642        split_overlap(split, e, tlno, plno, same, parent);
 643        if (split[1].suspect)
 644                split_blame(sb, split, e);
 645        decref_split(split);
 646}
 647
 648static int find_last_in_target(struct scoreboard *sb, struct origin *target)
 649{
 650        struct blame_entry *e;
 651        int last_in_target = -1;
 652
 653        for (e = sb->ent; e; e = e->next) {
 654                if (e->guilty || cmp_suspect(e->suspect, target))
 655                        continue;
 656                if (last_in_target < e->s_lno + e->num_lines)
 657                        last_in_target = e->s_lno + e->num_lines;
 658        }
 659        return last_in_target;
 660}
 661
 662static void blame_chunk(struct scoreboard *sb,
 663                        int tlno, int plno, int same,
 664                        struct origin *target, struct origin *parent)
 665{
 666        struct blame_entry *e;
 667
 668        for (e = sb->ent; e; e = e->next) {
 669                if (e->guilty || cmp_suspect(e->suspect, target))
 670                        continue;
 671                if (same <= e->s_lno)
 672                        continue;
 673                if (tlno < e->s_lno + e->num_lines)
 674                        blame_overlap(sb, e, tlno, plno, same, parent);
 675        }
 676}
 677
 678static int pass_blame_to_parent(struct scoreboard *sb,
 679                                struct origin *target,
 680                                struct origin *parent)
 681{
 682        int i, last_in_target, plno, tlno;
 683        struct patch *patch;
 684
 685        last_in_target = find_last_in_target(sb, target);
 686        if (last_in_target < 0)
 687                return 1; /* nothing remains for this target */
 688
 689        patch = get_patch(parent, target);
 690        plno = tlno = 0;
 691        for (i = 0; i < patch->num; i++) {
 692                struct chunk *chunk = &patch->chunks[i];
 693
 694                blame_chunk(sb, tlno, plno, chunk->same, target, parent);
 695                plno = chunk->p_next;
 696                tlno = chunk->t_next;
 697        }
 698        /* rest (i.e. anything above tlno) are the same as parent */
 699        blame_chunk(sb, tlno, plno, last_in_target, target, parent);
 700
 701        free_patch(patch);
 702        return 0;
 703}
 704
 705static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
 706{
 707        unsigned score;
 708        const char *cp, *ep;
 709
 710        if (e->score)
 711                return e->score;
 712
 713        score = 1;
 714        cp = nth_line(sb, e->lno);
 715        ep = nth_line(sb, e->lno + e->num_lines);
 716        while (cp < ep) {
 717                unsigned ch = *((unsigned char *)cp);
 718                if (isalnum(ch))
 719                        score++;
 720                cp++;
 721        }
 722        e->score = score;
 723        return score;
 724}
 725
 726static void copy_split_if_better(struct scoreboard *sb,
 727                                 struct blame_entry *best_so_far,
 728                                 struct blame_entry *this)
 729{
 730        int i;
 731
 732        if (!this[1].suspect)
 733                return;
 734        if (best_so_far[1].suspect) {
 735                if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
 736                        return;
 737        }
 738
 739        for (i = 0; i < 3; i++)
 740                origin_incref(this[i].suspect);
 741        decref_split(best_so_far);
 742        memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
 743}
 744
 745static void find_copy_in_blob(struct scoreboard *sb,
 746                              struct blame_entry *ent,
 747                              struct origin *parent,
 748                              struct blame_entry *split,
 749                              mmfile_t *file_p)
 750{
 751        const char *cp;
 752        int cnt;
 753        mmfile_t file_o;
 754        struct patch *patch;
 755        int i, plno, tlno;
 756
 757        cp = nth_line(sb, ent->lno);
 758        file_o.ptr = (char*) cp;
 759        cnt = ent->num_lines;
 760
 761        while (cnt && cp < sb->final_buf + sb->final_buf_size) {
 762                if (*cp++ == '\n')
 763                        cnt--;
 764        }
 765        file_o.size = cp - file_o.ptr;
 766
 767        patch = compare_buffer(file_p, &file_o, 1);
 768
 769        memset(split, 0, sizeof(struct blame_entry [3]));
 770        plno = tlno = 0;
 771        for (i = 0; i < patch->num; i++) {
 772                struct chunk *chunk = &patch->chunks[i];
 773
 774                /* tlno to chunk->same are the same as ent */
 775                if (ent->num_lines <= tlno)
 776                        break;
 777                if (tlno < chunk->same) {
 778                        struct blame_entry this[3];
 779                        split_overlap(this, ent,
 780                                      tlno + ent->s_lno, plno,
 781                                      chunk->same + ent->s_lno,
 782                                      parent);
 783                        copy_split_if_better(sb, split, this);
 784                        decref_split(this);
 785                }
 786                plno = chunk->p_next;
 787                tlno = chunk->t_next;
 788        }
 789        free_patch(patch);
 790}
 791
 792static int find_move_in_parent(struct scoreboard *sb,
 793                               struct origin *target,
 794                               struct origin *parent)
 795{
 796        int last_in_target, made_progress;
 797        struct blame_entry *e, split[3];
 798        mmfile_t file_p;
 799
 800        last_in_target = find_last_in_target(sb, target);
 801        if (last_in_target < 0)
 802                return 1; /* nothing remains for this target */
 803
 804        fill_origin_blob(parent, &file_p);
 805        if (!file_p.ptr)
 806                return 0;
 807
 808        made_progress = 1;
 809        while (made_progress) {
 810                made_progress = 0;
 811                for (e = sb->ent; e; e = e->next) {
 812                        if (e->guilty || cmp_suspect(e->suspect, target))
 813                                continue;
 814                        find_copy_in_blob(sb, e, parent, split, &file_p);
 815                        if (split[1].suspect &&
 816                            blame_move_score < ent_score(sb, &split[1])) {
 817                                split_blame(sb, split, e);
 818                                made_progress = 1;
 819                        }
 820                        decref_split(split);
 821                }
 822        }
 823        return 0;
 824}
 825
 826
 827struct blame_list {
 828        struct blame_entry *ent;
 829        struct blame_entry split[3];
 830};
 831
 832static struct blame_list *setup_blame_list(struct scoreboard *sb,
 833                                           struct origin *target,
 834                                           int *num_ents_p)
 835{
 836        struct blame_entry *e;
 837        int num_ents, i;
 838        struct blame_list *blame_list = NULL;
 839
 840        /* Count the number of entries the target is suspected for,
 841         * and prepare a list of entry and the best split.
 842         */
 843        for (e = sb->ent, num_ents = 0; e; e = e->next)
 844                if (!e->guilty && !cmp_suspect(e->suspect, target))
 845                        num_ents++;
 846        if (num_ents) {
 847                blame_list = xcalloc(num_ents, sizeof(struct blame_list));
 848                for (e = sb->ent, i = 0; e; e = e->next)
 849                        if (!e->guilty && !cmp_suspect(e->suspect, target))
 850                                blame_list[i++].ent = e;
 851        }
 852        *num_ents_p = num_ents;
 853        return blame_list;
 854}
 855
 856static int find_copy_in_parent(struct scoreboard *sb,
 857                               struct origin *target,
 858                               struct commit *parent,
 859                               struct origin *porigin,
 860                               int opt)
 861{
 862        struct diff_options diff_opts;
 863        const char *paths[1];
 864        int i, j;
 865        int retval;
 866        struct blame_list *blame_list;
 867        int num_ents;
 868
 869        blame_list = setup_blame_list(sb, target, &num_ents);
 870        if (!blame_list)
 871                return 1; /* nothing remains for this target */
 872
 873        diff_setup(&diff_opts);
 874        diff_opts.recursive = 1;
 875        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 876
 877        paths[0] = NULL;
 878        diff_tree_setup_paths(paths, &diff_opts);
 879        if (diff_setup_done(&diff_opts) < 0)
 880                die("diff-setup");
 881
 882        /* Try "find copies harder" on new path if requested;
 883         * we do not want to use diffcore_rename() actually to
 884         * match things up; find_copies_harder is set only to
 885         * force diff_tree_sha1() to feed all filepairs to diff_queue,
 886         * and this code needs to be after diff_setup_done(), which
 887         * usually makes find-copies-harder imply copy detection.
 888         */
 889        if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
 890            (!porigin || strcmp(target->path, porigin->path)))
 891                diff_opts.find_copies_harder = 1;
 892
 893        diff_tree_sha1(parent->tree->object.sha1,
 894                       target->commit->tree->object.sha1,
 895                       "", &diff_opts);
 896
 897        if (!diff_opts.find_copies_harder)
 898                diffcore_std(&diff_opts);
 899
 900        retval = 0;
 901        while (1) {
 902                int made_progress = 0;
 903
 904                for (i = 0; i < diff_queued_diff.nr; i++) {
 905                        struct diff_filepair *p = diff_queued_diff.queue[i];
 906                        struct origin *norigin;
 907                        mmfile_t file_p;
 908                        struct blame_entry this[3];
 909
 910                        if (!DIFF_FILE_VALID(p->one))
 911                                continue; /* does not exist in parent */
 912                        if (porigin && !strcmp(p->one->path, porigin->path))
 913                                /* find_move already dealt with this path */
 914                                continue;
 915
 916                        norigin = get_origin(sb, parent, p->one->path);
 917                        hashcpy(norigin->blob_sha1, p->one->sha1);
 918                        fill_origin_blob(norigin, &file_p);
 919                        if (!file_p.ptr)
 920                                continue;
 921
 922                        for (j = 0; j < num_ents; j++) {
 923                                find_copy_in_blob(sb, blame_list[j].ent,
 924                                                  norigin, this, &file_p);
 925                                copy_split_if_better(sb, blame_list[j].split,
 926                                                     this);
 927                                decref_split(this);
 928                        }
 929                        origin_decref(norigin);
 930                }
 931
 932                for (j = 0; j < num_ents; j++) {
 933                        struct blame_entry *split = blame_list[j].split;
 934                        if (split[1].suspect &&
 935                            blame_copy_score < ent_score(sb, &split[1])) {
 936                                split_blame(sb, split, blame_list[j].ent);
 937                                made_progress = 1;
 938                        }
 939                        decref_split(split);
 940                }
 941                free(blame_list);
 942
 943                if (!made_progress)
 944                        break;
 945                blame_list = setup_blame_list(sb, target, &num_ents);
 946                if (!blame_list) {
 947                        retval = 1;
 948                        break;
 949                }
 950        }
 951        diff_flush(&diff_opts);
 952
 953        return retval;
 954}
 955
 956/* The blobs of origin and porigin exactly match, so everything
 957 * origin is suspected for can be blamed on the parent.
 958 */
 959static void pass_whole_blame(struct scoreboard *sb,
 960                             struct origin *origin, struct origin *porigin)
 961{
 962        struct blame_entry *e;
 963
 964        if (!porigin->file.ptr && origin->file.ptr) {
 965                /* Steal its file */
 966                porigin->file = origin->file;
 967                origin->file.ptr = NULL;
 968        }
 969        for (e = sb->ent; e; e = e->next) {
 970                if (cmp_suspect(e->suspect, origin))
 971                        continue;
 972                origin_incref(porigin);
 973                origin_decref(e->suspect);
 974                e->suspect = porigin;
 975        }
 976}
 977
 978#define MAXPARENT 16
 979
 980static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
 981{
 982        int i, pass;
 983        struct commit *commit = origin->commit;
 984        struct commit_list *parent;
 985        struct origin *parent_origin[MAXPARENT], *porigin;
 986
 987        memset(parent_origin, 0, sizeof(parent_origin));
 988
 989        /* The first pass looks for unrenamed path to optimize for
 990         * common cases, then we look for renames in the second pass.
 991         */
 992        for (pass = 0; pass < 2; pass++) {
 993                struct origin *(*find)(struct scoreboard *,
 994                                       struct commit *, struct origin *);
 995                find = pass ? find_rename : find_origin;
 996
 997                for (i = 0, parent = commit->parents;
 998                     i < MAXPARENT && parent;
 999                     parent = parent->next, i++) {
1000                        struct commit *p = parent->item;
1001                        int j, same;
1002
1003                        if (parent_origin[i])
1004                                continue;
1005                        if (parse_commit(p))
1006                                continue;
1007                        porigin = find(sb, p, origin);
1008                        if (!porigin)
1009                                continue;
1010                        if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1011                                pass_whole_blame(sb, origin, porigin);
1012                                origin_decref(porigin);
1013                                goto finish;
1014                        }
1015                        for (j = same = 0; j < i; j++)
1016                                if (parent_origin[j] &&
1017                                    !hashcmp(parent_origin[j]->blob_sha1,
1018                                             porigin->blob_sha1)) {
1019                                        same = 1;
1020                                        break;
1021                                }
1022                        if (!same)
1023                                parent_origin[i] = porigin;
1024                        else
1025                                origin_decref(porigin);
1026                }
1027        }
1028
1029        num_commits++;
1030        for (i = 0, parent = commit->parents;
1031             i < MAXPARENT && parent;
1032             parent = parent->next, i++) {
1033                struct origin *porigin = parent_origin[i];
1034                if (!porigin)
1035                        continue;
1036                if (pass_blame_to_parent(sb, origin, porigin))
1037                        goto finish;
1038        }
1039
1040        /*
1041         * Optionally run "miff" to find moves in parents' files here.
1042         */
1043        if (opt & PICKAXE_BLAME_MOVE)
1044                for (i = 0, parent = commit->parents;
1045                     i < MAXPARENT && parent;
1046                     parent = parent->next, i++) {
1047                        struct origin *porigin = parent_origin[i];
1048                        if (!porigin)
1049                                continue;
1050                        if (find_move_in_parent(sb, origin, porigin))
1051                                goto finish;
1052                }
1053
1054        /*
1055         * Optionally run "ciff" to find copies from parents' files here.
1056         */
1057        if (opt & PICKAXE_BLAME_COPY)
1058                for (i = 0, parent = commit->parents;
1059                     i < MAXPARENT && parent;
1060                     parent = parent->next, i++) {
1061                        struct origin *porigin = parent_origin[i];
1062                        if (find_copy_in_parent(sb, origin, parent->item,
1063                                                porigin, opt))
1064                                goto finish;
1065                }
1066
1067 finish:
1068        for (i = 0; i < MAXPARENT; i++)
1069                origin_decref(parent_origin[i]);
1070}
1071
1072static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1073{
1074        while (1) {
1075                struct blame_entry *ent;
1076                struct commit *commit;
1077                struct origin *suspect = NULL;
1078
1079                /* find one suspect to break down */
1080                for (ent = sb->ent; !suspect && ent; ent = ent->next)
1081                        if (!ent->guilty)
1082                                suspect = ent->suspect;
1083                if (!suspect)
1084                        return; /* all done */
1085
1086                origin_incref(suspect);
1087                commit = suspect->commit;
1088                if (!commit->object.parsed)
1089                        parse_commit(commit);
1090                if (!(commit->object.flags & UNINTERESTING) &&
1091                    !(revs->max_age != -1 && commit->date  < revs->max_age))
1092                        pass_blame(sb, suspect, opt);
1093
1094                /* Take responsibility for the remaining entries */
1095                for (ent = sb->ent; ent; ent = ent->next)
1096                        if (!cmp_suspect(ent->suspect, suspect))
1097                                ent->guilty = 1;
1098                origin_decref(suspect);
1099
1100                if (DEBUG) /* sanity */
1101                        sanity_check_refcnt(sb);
1102        }
1103}
1104
1105static const char *format_time(unsigned long time, const char *tz_str,
1106                               int show_raw_time)
1107{
1108        static char time_buf[128];
1109        time_t t = time;
1110        int minutes, tz;
1111        struct tm *tm;
1112
1113        if (show_raw_time) {
1114                sprintf(time_buf, "%lu %s", time, tz_str);
1115                return time_buf;
1116        }
1117
1118        tz = atoi(tz_str);
1119        minutes = tz < 0 ? -tz : tz;
1120        minutes = (minutes / 100)*60 + (minutes % 100);
1121        minutes = tz < 0 ? -minutes : minutes;
1122        t = time + minutes * 60;
1123        tm = gmtime(&t);
1124
1125        strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1126        strcat(time_buf, tz_str);
1127        return time_buf;
1128}
1129
1130struct commit_info
1131{
1132        char *author;
1133        char *author_mail;
1134        unsigned long author_time;
1135        char *author_tz;
1136
1137        /* filled only when asked for details */
1138        char *committer;
1139        char *committer_mail;
1140        unsigned long committer_time;
1141        char *committer_tz;
1142
1143        char *summary;
1144};
1145
1146static void get_ac_line(const char *inbuf, const char *what,
1147                        int bufsz, char *person, char **mail,
1148                        unsigned long *time, char **tz)
1149{
1150        int len;
1151        char *tmp, *endp;
1152
1153        tmp = strstr(inbuf, what);
1154        if (!tmp)
1155                goto error_out;
1156        tmp += strlen(what);
1157        endp = strchr(tmp, '\n');
1158        if (!endp)
1159                len = strlen(tmp);
1160        else
1161                len = endp - tmp;
1162        if (bufsz <= len) {
1163        error_out:
1164                /* Ugh */
1165                person = *mail = *tz = "(unknown)";
1166                *time = 0;
1167                return;
1168        }
1169        memcpy(person, tmp, len);
1170
1171        tmp = person;
1172        tmp += len;
1173        *tmp = 0;
1174        while (*tmp != ' ')
1175                tmp--;
1176        *tz = tmp+1;
1177
1178        *tmp = 0;
1179        while (*tmp != ' ')
1180                tmp--;
1181        *time = strtoul(tmp, NULL, 10);
1182
1183        *tmp = 0;
1184        while (*tmp != ' ')
1185                tmp--;
1186        *mail = tmp + 1;
1187        *tmp = 0;
1188}
1189
1190static void get_commit_info(struct commit *commit,
1191                            struct commit_info *ret,
1192                            int detailed)
1193{
1194        int len;
1195        char *tmp, *endp;
1196        static char author_buf[1024];
1197        static char committer_buf[1024];
1198        static char summary_buf[1024];
1199
1200        /* We've operated without save_commit_buffer, so
1201         * we now need to populate them for output.
1202         */
1203        if (!commit->buffer) {
1204                char type[20];
1205                unsigned long size;
1206                commit->buffer =
1207                        read_sha1_file(commit->object.sha1, type, &size);
1208        }
1209        ret->author = author_buf;
1210        get_ac_line(commit->buffer, "\nauthor ",
1211                    sizeof(author_buf), author_buf, &ret->author_mail,
1212                    &ret->author_time, &ret->author_tz);
1213
1214        if (!detailed)
1215                return;
1216
1217        ret->committer = committer_buf;
1218        get_ac_line(commit->buffer, "\ncommitter ",
1219                    sizeof(committer_buf), committer_buf, &ret->committer_mail,
1220                    &ret->committer_time, &ret->committer_tz);
1221
1222        ret->summary = summary_buf;
1223        tmp = strstr(commit->buffer, "\n\n");
1224        if (!tmp) {
1225        error_out:
1226                sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1227                return;
1228        }
1229        tmp += 2;
1230        endp = strchr(tmp, '\n');
1231        if (!endp)
1232                goto error_out;
1233        len = endp - tmp;
1234        if (len >= sizeof(summary_buf))
1235                goto error_out;
1236        memcpy(summary_buf, tmp, len);
1237        summary_buf[len] = 0;
1238}
1239
1240#define OUTPUT_ANNOTATE_COMPAT  001
1241#define OUTPUT_LONG_OBJECT_NAME 002
1242#define OUTPUT_RAW_TIMESTAMP    004
1243#define OUTPUT_PORCELAIN        010
1244#define OUTPUT_SHOW_NAME        020
1245#define OUTPUT_SHOW_NUMBER      040
1246#define OUTPUT_SHOW_SCORE      0100
1247
1248static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1249{
1250        int cnt;
1251        const char *cp;
1252        struct origin *suspect = ent->suspect;
1253        char hex[41];
1254
1255        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1256        printf("%s%c%d %d %d\n",
1257               hex,
1258               ent->guilty ? ' ' : '*', // purely for debugging
1259               ent->s_lno + 1,
1260               ent->lno + 1,
1261               ent->num_lines);
1262        if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1263                struct commit_info ci;
1264                suspect->commit->object.flags |= METAINFO_SHOWN;
1265                get_commit_info(suspect->commit, &ci, 1);
1266                printf("author %s\n", ci.author);
1267                printf("author-mail %s\n", ci.author_mail);
1268                printf("author-time %lu\n", ci.author_time);
1269                printf("author-tz %s\n", ci.author_tz);
1270                printf("committer %s\n", ci.committer);
1271                printf("committer-mail %s\n", ci.committer_mail);
1272                printf("committer-time %lu\n", ci.committer_time);
1273                printf("committer-tz %s\n", ci.committer_tz);
1274                printf("filename %s\n", suspect->path);
1275                printf("summary %s\n", ci.summary);
1276        }
1277        else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1278                printf("filename %s\n", suspect->path);
1279
1280        cp = nth_line(sb, ent->lno);
1281        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1282                char ch;
1283                if (cnt)
1284                        printf("%s %d %d\n", hex,
1285                               ent->s_lno + 1 + cnt,
1286                               ent->lno + 1 + cnt);
1287                putchar('\t');
1288                do {
1289                        ch = *cp++;
1290                        putchar(ch);
1291                } while (ch != '\n' &&
1292                         cp < sb->final_buf + sb->final_buf_size);
1293        }
1294}
1295
1296static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1297{
1298        int cnt;
1299        const char *cp;
1300        struct origin *suspect = ent->suspect;
1301        struct commit_info ci;
1302        char hex[41];
1303        int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1304
1305        get_commit_info(suspect->commit, &ci, 1);
1306        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1307
1308        cp = nth_line(sb, ent->lno);
1309        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1310                char ch;
1311
1312                printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex);
1313                if (opt & OUTPUT_ANNOTATE_COMPAT)
1314                        printf("\t(%10s\t%10s\t%d)", ci.author,
1315                               format_time(ci.author_time, ci.author_tz,
1316                                           show_raw_time),
1317                               ent->lno + 1 + cnt);
1318                else {
1319                        if (opt & OUTPUT_SHOW_SCORE)
1320                                printf(" %*d %02d",
1321                                       max_score_digits, ent->score,
1322                                       ent->suspect->refcnt);
1323                        if (opt & OUTPUT_SHOW_NAME)
1324                                printf(" %-*.*s", longest_file, longest_file,
1325                                       suspect->path);
1326                        if (opt & OUTPUT_SHOW_NUMBER)
1327                                printf(" %*d", max_orig_digits,
1328                                       ent->s_lno + 1 + cnt);
1329                        printf(" (%-*.*s %10s %*d) ",
1330                               longest_author, longest_author, ci.author,
1331                               format_time(ci.author_time, ci.author_tz,
1332                                           show_raw_time),
1333                               max_digits, ent->lno + 1 + cnt);
1334                }
1335                do {
1336                        ch = *cp++;
1337                        putchar(ch);
1338                } while (ch != '\n' &&
1339                         cp < sb->final_buf + sb->final_buf_size);
1340        }
1341}
1342
1343static void output(struct scoreboard *sb, int option)
1344{
1345        struct blame_entry *ent;
1346
1347        if (option & OUTPUT_PORCELAIN) {
1348                for (ent = sb->ent; ent; ent = ent->next) {
1349                        struct blame_entry *oth;
1350                        struct origin *suspect = ent->suspect;
1351                        struct commit *commit = suspect->commit;
1352                        if (commit->object.flags & MORE_THAN_ONE_PATH)
1353                                continue;
1354                        for (oth = ent->next; oth; oth = oth->next) {
1355                                if ((oth->suspect->commit != commit) ||
1356                                    !strcmp(oth->suspect->path, suspect->path))
1357                                        continue;
1358                                commit->object.flags |= MORE_THAN_ONE_PATH;
1359                                break;
1360                        }
1361                }
1362        }
1363
1364        for (ent = sb->ent; ent; ent = ent->next) {
1365                if (option & OUTPUT_PORCELAIN)
1366                        emit_porcelain(sb, ent);
1367                else {
1368                        emit_other(sb, ent, option);
1369                }
1370        }
1371}
1372
1373static int prepare_lines(struct scoreboard *sb)
1374{
1375        const char *buf = sb->final_buf;
1376        unsigned long len = sb->final_buf_size;
1377        int num = 0, incomplete = 0, bol = 1;
1378
1379        if (len && buf[len-1] != '\n')
1380                incomplete++; /* incomplete line at the end */
1381        while (len--) {
1382                if (bol) {
1383                        sb->lineno = xrealloc(sb->lineno,
1384                                              sizeof(int* ) * (num + 1));
1385                        sb->lineno[num] = buf - sb->final_buf;
1386                        bol = 0;
1387                }
1388                if (*buf++ == '\n') {
1389                        num++;
1390                        bol = 1;
1391                }
1392        }
1393        sb->lineno = xrealloc(sb->lineno,
1394                              sizeof(int* ) * (num + incomplete + 1));
1395        sb->lineno[num + incomplete] = buf - sb->final_buf;
1396        sb->num_lines = num + incomplete;
1397        return sb->num_lines;
1398}
1399
1400static int read_ancestry(const char *graft_file)
1401{
1402        FILE *fp = fopen(graft_file, "r");
1403        char buf[1024];
1404        if (!fp)
1405                return -1;
1406        while (fgets(buf, sizeof(buf), fp)) {
1407                /* The format is just "Commit Parent1 Parent2 ...\n" */
1408                int len = strlen(buf);
1409                struct commit_graft *graft = read_graft_line(buf, len);
1410                if (graft)
1411                        register_commit_graft(graft, 0);
1412        }
1413        fclose(fp);
1414        return 0;
1415}
1416
1417static int lineno_width(int lines)
1418{
1419        int i, width;
1420
1421        for (width = 1, i = 10; i <= lines + 1; width++)
1422                i *= 10;
1423        return width;
1424}
1425
1426static void find_alignment(struct scoreboard *sb, int *option)
1427{
1428        int longest_src_lines = 0;
1429        int longest_dst_lines = 0;
1430        unsigned largest_score = 0;
1431        struct blame_entry *e;
1432
1433        for (e = sb->ent; e; e = e->next) {
1434                struct origin *suspect = e->suspect;
1435                struct commit_info ci;
1436                int num;
1437
1438                if (strcmp(suspect->path, sb->path))
1439                        *option |= OUTPUT_SHOW_NAME;
1440                num = strlen(suspect->path);
1441                if (longest_file < num)
1442                        longest_file = num;
1443                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1444                        suspect->commit->object.flags |= METAINFO_SHOWN;
1445                        get_commit_info(suspect->commit, &ci, 1);
1446                        num = strlen(ci.author);
1447                        if (longest_author < num)
1448                                longest_author = num;
1449                }
1450                num = e->s_lno + e->num_lines;
1451                if (longest_src_lines < num)
1452                        longest_src_lines = num;
1453                num = e->lno + e->num_lines;
1454                if (longest_dst_lines < num)
1455                        longest_dst_lines = num;
1456                if (largest_score < ent_score(sb, e))
1457                        largest_score = ent_score(sb, e);
1458        }
1459        max_orig_digits = lineno_width(longest_src_lines);
1460        max_digits = lineno_width(longest_dst_lines);
1461        max_score_digits = lineno_width(largest_score);
1462}
1463
1464static void sanity_check_refcnt(struct scoreboard *sb)
1465{
1466        int baa = 0;
1467        struct blame_entry *ent;
1468
1469        for (ent = sb->ent; ent; ent = ent->next) {
1470                /* Nobody should have zero or negative refcnt */
1471                if (ent->suspect->refcnt <= 0) {
1472                        fprintf(stderr, "%s in %s has negative refcnt %d\n",
1473                                ent->suspect->path,
1474                                sha1_to_hex(ent->suspect->commit->object.sha1),
1475                                ent->suspect->refcnt);
1476                        baa = 1;
1477                }
1478        }
1479        for (ent = sb->ent; ent; ent = ent->next) {
1480                /* Mark the ones that haven't been checked */
1481                if (0 < ent->suspect->refcnt)
1482                        ent->suspect->refcnt = -ent->suspect->refcnt;
1483        }
1484        for (ent = sb->ent; ent; ent = ent->next) {
1485                /* then pick each and see if they have the the correct
1486                 * refcnt.
1487                 */
1488                int found;
1489                struct blame_entry *e;
1490                struct origin *suspect = ent->suspect;
1491
1492                if (0 < suspect->refcnt)
1493                        continue;
1494                suspect->refcnt = -suspect->refcnt; /* Unmark */
1495                for (found = 0, e = sb->ent; e; e = e->next) {
1496                        if (e->suspect != suspect)
1497                                continue;
1498                        found++;
1499                }
1500                if (suspect->refcnt != found) {
1501                        fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1502                                ent->suspect->path,
1503                                sha1_to_hex(ent->suspect->commit->object.sha1),
1504                                ent->suspect->refcnt, found);
1505                        baa = 2;
1506                }
1507        }
1508        if (baa) {
1509                int opt = 0160;
1510                find_alignment(sb, &opt);
1511                output(sb, opt);
1512                die("Baa %d!", baa);
1513        }
1514}
1515
1516static int has_path_in_work_tree(const char *path)
1517{
1518        struct stat st;
1519        return !lstat(path, &st);
1520}
1521
1522static unsigned parse_score(const char *arg)
1523{
1524        char *end;
1525        unsigned long score = strtoul(arg, &end, 10);
1526        if (*end)
1527                return 0;
1528        return score;
1529}
1530
1531static const char *add_prefix(const char *prefix, const char *path)
1532{
1533        if (!prefix || !prefix[0])
1534                return path;
1535        return prefix_path(prefix, strlen(prefix), path);
1536}
1537
1538static const char *parse_loc(const char *spec,
1539                             struct scoreboard *sb, long lno,
1540                             long begin, long *ret)
1541{
1542        char *term;
1543        const char *line;
1544        long num;
1545        int reg_error;
1546        regex_t regexp;
1547        regmatch_t match[1];
1548
1549        /* Allow "-L <something>,+20" to mean starting at <something>
1550         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1551         * <something>.
1552         */
1553        if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1554                num = strtol(spec + 1, &term, 10);
1555                if (term != spec + 1) {
1556                        if (spec[0] == '-')
1557                                num = 0 - num;
1558                        if (0 < num)
1559                                *ret = begin + num - 2;
1560                        else if (!num)
1561                                *ret = begin;
1562                        else
1563                                *ret = begin + num;
1564                        return term;
1565                }
1566                return spec;
1567        }
1568        num = strtol(spec, &term, 10);
1569        if (term != spec) {
1570                *ret = num;
1571                return term;
1572        }
1573        if (spec[0] != '/')
1574                return spec;
1575
1576        /* it could be a regexp of form /.../ */
1577        for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1578                if (*term == '\\')
1579                        term++;
1580        }
1581        if (*term != '/')
1582                return spec;
1583
1584        /* try [spec+1 .. term-1] as regexp */
1585        *term = 0;
1586        begin--; /* input is in human terms */
1587        line = nth_line(sb, begin);
1588
1589        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1590            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1591                const char *cp = line + match[0].rm_so;
1592                const char *nline;
1593
1594                while (begin++ < lno) {
1595                        nline = nth_line(sb, begin);
1596                        if (line <= cp && cp < nline)
1597                                break;
1598                        line = nline;
1599                }
1600                *ret = begin;
1601                regfree(&regexp);
1602                *term++ = '/';
1603                return term;
1604        }
1605        else {
1606                char errbuf[1024];
1607                regerror(reg_error, &regexp, errbuf, 1024);
1608                die("-L parameter '%s': %s", spec + 1, errbuf);
1609        }
1610}
1611
1612static void prepare_blame_range(struct scoreboard *sb,
1613                                const char *bottomtop,
1614                                long lno,
1615                                long *bottom, long *top)
1616{
1617        const char *term;
1618
1619        term = parse_loc(bottomtop, sb, lno, 1, bottom);
1620        if (*term == ',') {
1621                term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1622                if (*term)
1623                        usage(blame_usage);
1624        }
1625        if (*term)
1626                usage(blame_usage);
1627}
1628
1629int cmd_blame(int argc, const char **argv, const char *prefix)
1630{
1631        struct rev_info revs;
1632        const char *path;
1633        struct scoreboard sb;
1634        struct origin *o;
1635        struct blame_entry *ent;
1636        int i, seen_dashdash, unk, opt;
1637        long bottom, top, lno;
1638        int output_option = 0;
1639        const char *revs_file = NULL;
1640        const char *final_commit_name = NULL;
1641        char type[10];
1642        const char *bottomtop = NULL;
1643
1644        save_commit_buffer = 0;
1645
1646        opt = 0;
1647        seen_dashdash = 0;
1648        for (unk = i = 1; i < argc; i++) {
1649                const char *arg = argv[i];
1650                if (*arg != '-')
1651                        break;
1652                else if (!strcmp("-c", arg))
1653                        output_option |= OUTPUT_ANNOTATE_COMPAT;
1654                else if (!strcmp("-t", arg))
1655                        output_option |= OUTPUT_RAW_TIMESTAMP;
1656                else if (!strcmp("-l", arg))
1657                        output_option |= OUTPUT_LONG_OBJECT_NAME;
1658                else if (!strcmp("-S", arg) && ++i < argc)
1659                        revs_file = argv[i];
1660                else if (!strncmp("-M", arg, 2)) {
1661                        opt |= PICKAXE_BLAME_MOVE;
1662                        blame_move_score = parse_score(arg+2);
1663                }
1664                else if (!strncmp("-C", arg, 2)) {
1665                        if (opt & PICKAXE_BLAME_COPY)
1666                                opt |= PICKAXE_BLAME_COPY_HARDER;
1667                        opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1668                        blame_copy_score = parse_score(arg+2);
1669                }
1670                else if (!strncmp("-L", arg, 2)) {
1671                        if (!arg[2]) {
1672                                if (++i >= argc)
1673                                        usage(blame_usage);
1674                                arg = argv[i];
1675                        }
1676                        else
1677                                arg += 2;
1678                        if (bottomtop)
1679                                die("More than one '-L n,m' option given");
1680                        bottomtop = arg;
1681                }
1682                else if (!strcmp("--score-debug", arg))
1683                        output_option |= OUTPUT_SHOW_SCORE;
1684                else if (!strcmp("-f", arg) ||
1685                         !strcmp("--show-name", arg))
1686                        output_option |= OUTPUT_SHOW_NAME;
1687                else if (!strcmp("-n", arg) ||
1688                         !strcmp("--show-number", arg))
1689                        output_option |= OUTPUT_SHOW_NUMBER;
1690                else if (!strcmp("-p", arg) ||
1691                         !strcmp("--porcelain", arg))
1692                        output_option |= OUTPUT_PORCELAIN;
1693                else if (!strcmp("--", arg)) {
1694                        seen_dashdash = 1;
1695                        i++;
1696                        break;
1697                }
1698                else
1699                        argv[unk++] = arg;
1700        }
1701
1702        if (!blame_move_score)
1703                blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1704        if (!blame_copy_score)
1705                blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1706
1707        /* We have collected options unknown to us in argv[1..unk]
1708         * which are to be passed to revision machinery if we are
1709         * going to do the "bottom" procesing.
1710         *
1711         * The remaining are:
1712         *
1713         * (1) if seen_dashdash, its either
1714         *     "-options -- <path>" or
1715         *     "-options -- <path> <rev>".
1716         *     but the latter is allowed only if there is no
1717         *     options that we passed to revision machinery.
1718         *
1719         * (2) otherwise, we may have "--" somewhere later and
1720         *     might be looking at the first one of multiple 'rev'
1721         *     parameters (e.g. " master ^next ^maint -- path").
1722         *     See if there is a dashdash first, and give the
1723         *     arguments before that to revision machinery.
1724         *     After that there must be one 'path'.
1725         *
1726         * (3) otherwise, its one of the three:
1727         *     "-options <path> <rev>"
1728         *     "-options <rev> <path>"
1729         *     "-options <path>"
1730         *     but again the first one is allowed only if
1731         *     there is no options that we passed to revision
1732         *     machinery.
1733         */
1734
1735        if (seen_dashdash) {
1736                /* (1) */
1737                if (argc <= i)
1738                        usage(blame_usage);
1739                path = add_prefix(prefix, argv[i]);
1740                if (i + 1 == argc - 1) {
1741                        if (unk != 1)
1742                                usage(blame_usage);
1743                        argv[unk++] = argv[i + 1];
1744                }
1745                else if (i + 1 != argc)
1746                        /* garbage at end */
1747                        usage(blame_usage);
1748        }
1749        else {
1750                int j;
1751                for (j = i; !seen_dashdash && j < argc; j++)
1752                        if (!strcmp(argv[j], "--"))
1753                                seen_dashdash = j;
1754                if (seen_dashdash) {
1755                        if (seen_dashdash + 1 != argc - 1)
1756                                usage(blame_usage);
1757                        path = add_prefix(prefix, argv[seen_dashdash + 1]);
1758                        for (j = i; j < seen_dashdash; j++)
1759                                argv[unk++] = argv[j];
1760                }
1761                else {
1762                        /* (3) */
1763                        path = add_prefix(prefix, argv[i]);
1764                        if (i + 1 == argc - 1) {
1765                                final_commit_name = argv[i + 1];
1766
1767                                /* if (unk == 1) we could be getting
1768                                 * old-style
1769                                 */
1770                                if (unk == 1 && !has_path_in_work_tree(path)) {
1771                                        path = add_prefix(prefix, argv[i + 1]);
1772                                        final_commit_name = argv[i];
1773                                }
1774                        }
1775                        else if (i != argc - 1)
1776                                usage(blame_usage); /* garbage at end */
1777
1778                        if (!has_path_in_work_tree(path))
1779                                die("cannot stat path %s: %s",
1780                                    path, strerror(errno));
1781                }
1782        }
1783
1784        if (final_commit_name)
1785                argv[unk++] = final_commit_name;
1786
1787        /* Now we got rev and path.  We do not want the path pruning
1788         * but we may want "bottom" processing.
1789         */
1790        argv[unk++] = "--"; /* terminate the rev name */
1791        argv[unk] = NULL;
1792
1793        init_revisions(&revs, NULL);
1794        setup_revisions(unk, argv, &revs, "HEAD");
1795        memset(&sb, 0, sizeof(sb));
1796
1797        /* There must be one and only one positive commit in the
1798         * revs->pending array.
1799         */
1800        for (i = 0; i < revs.pending.nr; i++) {
1801                struct object *obj = revs.pending.objects[i].item;
1802                if (obj->flags & UNINTERESTING)
1803                        continue;
1804                while (obj->type == OBJ_TAG)
1805                        obj = deref_tag(obj, NULL, 0);
1806                if (obj->type != OBJ_COMMIT)
1807                        die("Non commit %s?",
1808                            revs.pending.objects[i].name);
1809                if (sb.final)
1810                        die("More than one commit to dig from %s and %s?",
1811                            revs.pending.objects[i].name,
1812                            final_commit_name);
1813                sb.final = (struct commit *) obj;
1814                final_commit_name = revs.pending.objects[i].name;
1815        }
1816
1817        if (!sb.final) {
1818                /* "--not A B -- path" without anything positive */
1819                unsigned char head_sha1[20];
1820
1821                final_commit_name = "HEAD";
1822                if (get_sha1(final_commit_name, head_sha1))
1823                        die("No such ref: HEAD");
1824                sb.final = lookup_commit_reference(head_sha1);
1825                add_pending_object(&revs, &(sb.final->object), "HEAD");
1826        }
1827
1828        /* If we have bottom, this will mark the ancestors of the
1829         * bottom commits we would reach while traversing as
1830         * uninteresting.
1831         */
1832        prepare_revision_walk(&revs);
1833
1834        o = get_origin(&sb, sb.final, path);
1835        if (fill_blob_sha1(o))
1836                die("no such path %s in %s", path, final_commit_name);
1837
1838        sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1839        num_read_blob++;
1840        lno = prepare_lines(&sb);
1841
1842        bottom = top = 0;
1843        if (bottomtop)
1844                prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
1845        if (bottom && top && top < bottom) {
1846                long tmp;
1847                tmp = top; top = bottom; bottom = tmp;
1848        }
1849        if (bottom < 1)
1850                bottom = 1;
1851        if (top < 1)
1852                top = lno;
1853        bottom--;
1854        if (lno < top)
1855                die("file %s has only %lu lines", path, lno);
1856
1857        ent = xcalloc(1, sizeof(*ent));
1858        ent->lno = bottom;
1859        ent->num_lines = top - bottom;
1860        ent->suspect = o;
1861        ent->s_lno = bottom;
1862
1863        sb.ent = ent;
1864        sb.path = path;
1865
1866        if (revs_file && read_ancestry(revs_file))
1867                die("reading graft file %s failed: %s",
1868                    revs_file, strerror(errno));
1869
1870        assign_blame(&sb, &revs, opt);
1871
1872        coalesce(&sb);
1873
1874        if (!(output_option & OUTPUT_PORCELAIN))
1875                find_alignment(&sb, &output_option);
1876
1877        output(&sb, output_option);
1878        free((void *)sb.final_buf);
1879        for (ent = sb.ent; ent; ) {
1880                struct blame_entry *e = ent->next;
1881                free(ent);
1882                ent = e;
1883        }
1884
1885        if (DEBUG) {
1886                printf("num read blob: %d\n", num_read_blob);
1887                printf("num get patch: %d\n", num_get_patch);
1888                printf("num commits: %d\n", num_commits);
1889        }
1890        return 0;
1891}