builtin-blame.con commit Merge branch 'maint' (5485115)
   1/*
   2 * Pickaxe
   3 *
   4 * Copyright (c) 2006, Junio C Hamano
   5 */
   6
   7#include "cache.h"
   8#include "builtin.h"
   9#include "blob.h"
  10#include "commit.h"
  11#include "tag.h"
  12#include "tree-walk.h"
  13#include "diff.h"
  14#include "diffcore.h"
  15#include "revision.h"
  16#include "xdiff-interface.h"
  17
  18static char blame_usage[] =
  19"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
  20"  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
  21"  -l, --long          Show long commit SHA1 (Default: off)\n"
  22"  -t, --time          Show raw timestamp (Default: off)\n"
  23"  -f, --show-name     Show original filename (Default: auto)\n"
  24"  -n, --show-number   Show original linenumber (Default: off)\n"
  25"  -p, --porcelain     Show in a format designed for machine consumption\n"
  26"  -L n,m              Process only line range n,m, counting from 1\n"
  27"  -M, -C              Find line movements within and across files\n"
  28"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
  29
  30static int longest_file;
  31static int longest_author;
  32static int max_orig_digits;
  33static int max_digits;
  34static int max_score_digits;
  35
  36#ifndef DEBUG
  37#define DEBUG 0
  38#endif
  39
  40/* stats */
  41static int num_read_blob;
  42static int num_get_patch;
  43static int num_commits;
  44
  45#define PICKAXE_BLAME_MOVE              01
  46#define PICKAXE_BLAME_COPY              02
  47#define PICKAXE_BLAME_COPY_HARDER       04
  48
  49/*
  50 * blame for a blame_entry with score lower than these thresholds
  51 * is not passed to the parent using move/copy logic.
  52 */
  53static unsigned blame_move_score;
  54static unsigned blame_copy_score;
  55#define BLAME_DEFAULT_MOVE_SCORE        20
  56#define BLAME_DEFAULT_COPY_SCORE        40
  57
  58/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
  59#define METAINFO_SHOWN          (1u<<12)
  60#define MORE_THAN_ONE_PATH      (1u<<13)
  61
  62/*
  63 * One blob in a commit that is being suspected
  64 */
  65struct origin {
  66        int refcnt;
  67        struct commit *commit;
  68        mmfile_t file;
  69        unsigned char blob_sha1[20];
  70        char path[FLEX_ARRAY];
  71};
  72
  73static char *fill_origin_blob(struct origin *o, mmfile_t *file)
  74{
  75        if (!o->file.ptr) {
  76                char type[10];
  77                num_read_blob++;
  78                file->ptr = read_sha1_file(o->blob_sha1, type,
  79                                           (unsigned long *)(&(file->size)));
  80                o->file = *file;
  81        }
  82        else
  83                *file = o->file;
  84        return file->ptr;
  85}
  86
  87static inline struct origin *origin_incref(struct origin *o)
  88{
  89        if (o)
  90                o->refcnt++;
  91        return o;
  92}
  93
  94static void origin_decref(struct origin *o)
  95{
  96        if (o && --o->refcnt <= 0) {
  97                if (o->file.ptr)
  98                        free(o->file.ptr);
  99                memset(o, 0, sizeof(*o));
 100                free(o);
 101        }
 102}
 103
 104struct blame_entry {
 105        struct blame_entry *prev;
 106        struct blame_entry *next;
 107
 108        /* the first line of this group in the final image;
 109         * internally all line numbers are 0 based.
 110         */
 111        int lno;
 112
 113        /* how many lines this group has */
 114        int num_lines;
 115
 116        /* the commit that introduced this group into the final image */
 117        struct origin *suspect;
 118
 119        /* true if the suspect is truly guilty; false while we have not
 120         * checked if the group came from one of its parents.
 121         */
 122        char guilty;
 123
 124        /* the line number of the first line of this group in the
 125         * suspect's file; internally all line numbers are 0 based.
 126         */
 127        int s_lno;
 128
 129        /* how significant this entry is -- cached to avoid
 130         * scanning the lines over and over
 131         */
 132        unsigned score;
 133};
 134
 135struct scoreboard {
 136        /* the final commit (i.e. where we started digging from) */
 137        struct commit *final;
 138
 139        const char *path;
 140
 141        /* the contents in the final; pointed into by buf pointers of
 142         * blame_entries
 143         */
 144        const char *final_buf;
 145        unsigned long final_buf_size;
 146
 147        /* linked list of blames */
 148        struct blame_entry *ent;
 149
 150        /* look-up a line in the final buffer */
 151        int num_lines;
 152        int *lineno;
 153};
 154
 155static int cmp_suspect(struct origin *a, struct origin *b)
 156{
 157        int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 158        if (cmp)
 159                return cmp;
 160        return strcmp(a->path, b->path);
 161}
 162
 163#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
 164
 165static void sanity_check_refcnt(struct scoreboard *);
 166
 167static void coalesce(struct scoreboard *sb)
 168{
 169        struct blame_entry *ent, *next;
 170
 171        for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 172                if (!cmp_suspect(ent->suspect, next->suspect) &&
 173                    ent->guilty == next->guilty &&
 174                    ent->s_lno + ent->num_lines == next->s_lno) {
 175                        ent->num_lines += next->num_lines;
 176                        ent->next = next->next;
 177                        if (ent->next)
 178                                ent->next->prev = ent;
 179                        origin_decref(next->suspect);
 180                        free(next);
 181                        ent->score = 0;
 182                        next = ent; /* again */
 183                }
 184        }
 185
 186        if (DEBUG) /* sanity */
 187                sanity_check_refcnt(sb);
 188}
 189
 190static struct origin *make_origin(struct commit *commit, const char *path)
 191{
 192        struct origin *o;
 193        o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
 194        o->commit = commit;
 195        o->refcnt = 1;
 196        strcpy(o->path, path);
 197        return o;
 198}
 199
 200static struct origin *get_origin(struct scoreboard *sb,
 201                                 struct commit *commit,
 202                                 const char *path)
 203{
 204        struct blame_entry *e;
 205
 206        for (e = sb->ent; e; e = e->next) {
 207                if (e->suspect->commit == commit &&
 208                    !strcmp(e->suspect->path, path))
 209                        return origin_incref(e->suspect);
 210        }
 211        return make_origin(commit, path);
 212}
 213
 214static int fill_blob_sha1(struct origin *origin)
 215{
 216        unsigned mode;
 217        char type[10];
 218
 219        if (!is_null_sha1(origin->blob_sha1))
 220                return 0;
 221        if (get_tree_entry(origin->commit->object.sha1,
 222                           origin->path,
 223                           origin->blob_sha1, &mode))
 224                goto error_out;
 225        if (sha1_object_info(origin->blob_sha1, type, NULL) ||
 226            strcmp(type, blob_type))
 227                goto error_out;
 228        return 0;
 229 error_out:
 230        hashclr(origin->blob_sha1);
 231        return -1;
 232}
 233
 234static struct origin *find_origin(struct scoreboard *sb,
 235                                  struct commit *parent,
 236                                  struct origin *origin)
 237{
 238        struct origin *porigin = NULL;
 239        struct diff_options diff_opts;
 240        const char *paths[2];
 241
 242        if (parent->util) {
 243                /* This is a freestanding copy of origin and not
 244                 * refcounted.
 245                 */
 246                struct origin *cached = parent->util;
 247                if (!strcmp(cached->path, origin->path)) {
 248                        porigin = get_origin(sb, parent, cached->path);
 249                        if (porigin->refcnt == 1)
 250                                hashcpy(porigin->blob_sha1, cached->blob_sha1);
 251                        return porigin;
 252                }
 253                /* otherwise it was not very useful; free it */
 254                free(parent->util);
 255                parent->util = NULL;
 256        }
 257
 258        /* See if the origin->path is different between parent
 259         * and origin first.  Most of the time they are the
 260         * same and diff-tree is fairly efficient about this.
 261         */
 262        diff_setup(&diff_opts);
 263        diff_opts.recursive = 1;
 264        diff_opts.detect_rename = 0;
 265        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 266        paths[0] = origin->path;
 267        paths[1] = NULL;
 268
 269        diff_tree_setup_paths(paths, &diff_opts);
 270        if (diff_setup_done(&diff_opts) < 0)
 271                die("diff-setup");
 272        diff_tree_sha1(parent->tree->object.sha1,
 273                       origin->commit->tree->object.sha1,
 274                       "", &diff_opts);
 275        diffcore_std(&diff_opts);
 276
 277        /* It is either one entry that says "modified", or "created",
 278         * or nothing.
 279         */
 280        if (!diff_queued_diff.nr) {
 281                /* The path is the same as parent */
 282                porigin = get_origin(sb, parent, origin->path);
 283                hashcpy(porigin->blob_sha1, origin->blob_sha1);
 284        }
 285        else if (diff_queued_diff.nr != 1)
 286                die("internal error in blame::find_origin");
 287        else {
 288                struct diff_filepair *p = diff_queued_diff.queue[0];
 289                switch (p->status) {
 290                default:
 291                        die("internal error in blame::find_origin (%c)",
 292                            p->status);
 293                case 'M':
 294                        porigin = get_origin(sb, parent, origin->path);
 295                        hashcpy(porigin->blob_sha1, p->one->sha1);
 296                        break;
 297                case 'A':
 298                case 'T':
 299                        /* Did not exist in parent, or type changed */
 300                        break;
 301                }
 302        }
 303        diff_flush(&diff_opts);
 304        if (porigin) {
 305                struct origin *cached;
 306                cached = make_origin(porigin->commit, porigin->path);
 307                hashcpy(cached->blob_sha1, porigin->blob_sha1);
 308                parent->util = cached;
 309        }
 310        return porigin;
 311}
 312
 313static struct origin *find_rename(struct scoreboard *sb,
 314                                  struct commit *parent,
 315                                  struct origin *origin)
 316{
 317        struct origin *porigin = NULL;
 318        struct diff_options diff_opts;
 319        int i;
 320        const char *paths[2];
 321
 322        diff_setup(&diff_opts);
 323        diff_opts.recursive = 1;
 324        diff_opts.detect_rename = DIFF_DETECT_RENAME;
 325        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 326        diff_opts.single_follow = origin->path;
 327        paths[0] = NULL;
 328        diff_tree_setup_paths(paths, &diff_opts);
 329        if (diff_setup_done(&diff_opts) < 0)
 330                die("diff-setup");
 331        diff_tree_sha1(parent->tree->object.sha1,
 332                       origin->commit->tree->object.sha1,
 333                       "", &diff_opts);
 334        diffcore_std(&diff_opts);
 335
 336        for (i = 0; i < diff_queued_diff.nr; i++) {
 337                struct diff_filepair *p = diff_queued_diff.queue[i];
 338                if ((p->status == 'R' || p->status == 'C') &&
 339                    !strcmp(p->two->path, origin->path)) {
 340                        porigin = get_origin(sb, parent, p->one->path);
 341                        hashcpy(porigin->blob_sha1, p->one->sha1);
 342                        break;
 343                }
 344        }
 345        diff_flush(&diff_opts);
 346        return porigin;
 347}
 348
 349struct chunk {
 350        /* line number in postimage; up to but not including this
 351         * line is the same as preimage
 352         */
 353        int same;
 354
 355        /* preimage line number after this chunk */
 356        int p_next;
 357
 358        /* postimage line number after this chunk */
 359        int t_next;
 360};
 361
 362struct patch {
 363        struct chunk *chunks;
 364        int num;
 365};
 366
 367struct blame_diff_state {
 368        struct xdiff_emit_state xm;
 369        struct patch *ret;
 370        unsigned hunk_post_context;
 371        unsigned hunk_in_pre_context : 1;
 372};
 373
 374static void process_u_diff(void *state_, char *line, unsigned long len)
 375{
 376        struct blame_diff_state *state = state_;
 377        struct chunk *chunk;
 378        int off1, off2, len1, len2, num;
 379
 380        num = state->ret->num;
 381        if (len < 4 || line[0] != '@' || line[1] != '@') {
 382                if (state->hunk_in_pre_context && line[0] == ' ')
 383                        state->ret->chunks[num - 1].same++;
 384                else {
 385                        state->hunk_in_pre_context = 0;
 386                        if (line[0] == ' ')
 387                                state->hunk_post_context++;
 388                        else
 389                                state->hunk_post_context = 0;
 390                }
 391                return;
 392        }
 393
 394        if (num && state->hunk_post_context) {
 395                chunk = &state->ret->chunks[num - 1];
 396                chunk->p_next -= state->hunk_post_context;
 397                chunk->t_next -= state->hunk_post_context;
 398        }
 399        state->ret->num = ++num;
 400        state->ret->chunks = xrealloc(state->ret->chunks,
 401                                      sizeof(struct chunk) * num);
 402        chunk = &state->ret->chunks[num - 1];
 403        if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
 404                state->ret->num--;
 405                return;
 406        }
 407
 408        /* Line numbers in patch output are one based. */
 409        off1--;
 410        off2--;
 411
 412        chunk->same = len2 ? off2 : (off2 + 1);
 413
 414        chunk->p_next = off1 + (len1 ? len1 : 1);
 415        chunk->t_next = chunk->same + len2;
 416        state->hunk_in_pre_context = 1;
 417        state->hunk_post_context = 0;
 418}
 419
 420static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
 421                                    int context)
 422{
 423        struct blame_diff_state state;
 424        xpparam_t xpp;
 425        xdemitconf_t xecfg;
 426        xdemitcb_t ecb;
 427
 428        xpp.flags = XDF_NEED_MINIMAL;
 429        xecfg.ctxlen = context;
 430        xecfg.flags = 0;
 431        ecb.outf = xdiff_outf;
 432        ecb.priv = &state;
 433        memset(&state, 0, sizeof(state));
 434        state.xm.consume = process_u_diff;
 435        state.ret = xmalloc(sizeof(struct patch));
 436        state.ret->chunks = NULL;
 437        state.ret->num = 0;
 438
 439        xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
 440
 441        if (state.ret->num) {
 442                struct chunk *chunk;
 443                chunk = &state.ret->chunks[state.ret->num - 1];
 444                chunk->p_next -= state.hunk_post_context;
 445                chunk->t_next -= state.hunk_post_context;
 446        }
 447        return state.ret;
 448}
 449
 450static struct patch *get_patch(struct origin *parent, struct origin *origin)
 451{
 452        mmfile_t file_p, file_o;
 453        struct patch *patch;
 454
 455        fill_origin_blob(parent, &file_p);
 456        fill_origin_blob(origin, &file_o);
 457        if (!file_p.ptr || !file_o.ptr)
 458                return NULL;
 459        patch = compare_buffer(&file_p, &file_o, 0);
 460        num_get_patch++;
 461        return patch;
 462}
 463
 464static void free_patch(struct patch *p)
 465{
 466        free(p->chunks);
 467        free(p);
 468}
 469
 470static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
 471{
 472        struct blame_entry *ent, *prev = NULL;
 473
 474        origin_incref(e->suspect);
 475
 476        for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
 477                prev = ent;
 478
 479        /* prev, if not NULL, is the last one that is below e */
 480        e->prev = prev;
 481        if (prev) {
 482                e->next = prev->next;
 483                prev->next = e;
 484        }
 485        else {
 486                e->next = sb->ent;
 487                sb->ent = e;
 488        }
 489        if (e->next)
 490                e->next->prev = e;
 491}
 492
 493static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
 494{
 495        struct blame_entry *p, *n;
 496
 497        p = dst->prev;
 498        n = dst->next;
 499        origin_incref(src->suspect);
 500        origin_decref(dst->suspect);
 501        memcpy(dst, src, sizeof(*src));
 502        dst->prev = p;
 503        dst->next = n;
 504        dst->score = 0;
 505}
 506
 507static const char *nth_line(struct scoreboard *sb, int lno)
 508{
 509        return sb->final_buf + sb->lineno[lno];
 510}
 511
 512static void split_overlap(struct blame_entry *split,
 513                          struct blame_entry *e,
 514                          int tlno, int plno, int same,
 515                          struct origin *parent)
 516{
 517        /* it is known that lines between tlno to same came from
 518         * parent, and e has an overlap with that range.  it also is
 519         * known that parent's line plno corresponds to e's line tlno.
 520         *
 521         *                <---- e ----->
 522         *                   <------>
 523         *                   <------------>
 524         *             <------------>
 525         *             <------------------>
 526         *
 527         * Potentially we need to split e into three parts; before
 528         * this chunk, the chunk to be blamed for parent, and after
 529         * that portion.
 530         */
 531        int chunk_end_lno;
 532        memset(split, 0, sizeof(struct blame_entry [3]));
 533
 534        if (e->s_lno < tlno) {
 535                /* there is a pre-chunk part not blamed on parent */
 536                split[0].suspect = origin_incref(e->suspect);
 537                split[0].lno = e->lno;
 538                split[0].s_lno = e->s_lno;
 539                split[0].num_lines = tlno - e->s_lno;
 540                split[1].lno = e->lno + tlno - e->s_lno;
 541                split[1].s_lno = plno;
 542        }
 543        else {
 544                split[1].lno = e->lno;
 545                split[1].s_lno = plno + (e->s_lno - tlno);
 546        }
 547
 548        if (same < e->s_lno + e->num_lines) {
 549                /* there is a post-chunk part not blamed on parent */
 550                split[2].suspect = origin_incref(e->suspect);
 551                split[2].lno = e->lno + (same - e->s_lno);
 552                split[2].s_lno = e->s_lno + (same - e->s_lno);
 553                split[2].num_lines = e->s_lno + e->num_lines - same;
 554                chunk_end_lno = split[2].lno;
 555        }
 556        else
 557                chunk_end_lno = e->lno + e->num_lines;
 558        split[1].num_lines = chunk_end_lno - split[1].lno;
 559
 560        if (split[1].num_lines < 1)
 561                return;
 562        split[1].suspect = origin_incref(parent);
 563}
 564
 565static void split_blame(struct scoreboard *sb,
 566                        struct blame_entry *split,
 567                        struct blame_entry *e)
 568{
 569        struct blame_entry *new_entry;
 570
 571        if (split[0].suspect && split[2].suspect) {
 572                /* we need to split e into two and add another for parent */
 573                dup_entry(e, &split[0]);
 574
 575                new_entry = xmalloc(sizeof(*new_entry));
 576                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 577                add_blame_entry(sb, new_entry);
 578
 579                new_entry = xmalloc(sizeof(*new_entry));
 580                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 581                add_blame_entry(sb, new_entry);
 582        }
 583        else if (!split[0].suspect && !split[2].suspect)
 584                /* parent covers the entire area */
 585                dup_entry(e, &split[1]);
 586        else if (split[0].suspect) {
 587                dup_entry(e, &split[0]);
 588
 589                new_entry = xmalloc(sizeof(*new_entry));
 590                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 591                add_blame_entry(sb, new_entry);
 592        }
 593        else {
 594                dup_entry(e, &split[1]);
 595
 596                new_entry = xmalloc(sizeof(*new_entry));
 597                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 598                add_blame_entry(sb, new_entry);
 599        }
 600
 601        if (DEBUG) { /* sanity */
 602                struct blame_entry *ent;
 603                int lno = sb->ent->lno, corrupt = 0;
 604
 605                for (ent = sb->ent; ent; ent = ent->next) {
 606                        if (lno != ent->lno)
 607                                corrupt = 1;
 608                        if (ent->s_lno < 0)
 609                                corrupt = 1;
 610                        lno += ent->num_lines;
 611                }
 612                if (corrupt) {
 613                        lno = sb->ent->lno;
 614                        for (ent = sb->ent; ent; ent = ent->next) {
 615                                printf("L %8d l %8d n %8d\n",
 616                                       lno, ent->lno, ent->num_lines);
 617                                lno = ent->lno + ent->num_lines;
 618                        }
 619                        die("oops");
 620                }
 621        }
 622}
 623
 624static void decref_split(struct blame_entry *split)
 625{
 626        int i;
 627
 628        for (i = 0; i < 3; i++)
 629                origin_decref(split[i].suspect);
 630}
 631
 632static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
 633                          int tlno, int plno, int same,
 634                          struct origin *parent)
 635{
 636        struct blame_entry split[3];
 637
 638        split_overlap(split, e, tlno, plno, same, parent);
 639        if (split[1].suspect)
 640                split_blame(sb, split, e);
 641        decref_split(split);
 642}
 643
 644static int find_last_in_target(struct scoreboard *sb, struct origin *target)
 645{
 646        struct blame_entry *e;
 647        int last_in_target = -1;
 648
 649        for (e = sb->ent; e; e = e->next) {
 650                if (e->guilty || cmp_suspect(e->suspect, target))
 651                        continue;
 652                if (last_in_target < e->s_lno + e->num_lines)
 653                        last_in_target = e->s_lno + e->num_lines;
 654        }
 655        return last_in_target;
 656}
 657
 658static void blame_chunk(struct scoreboard *sb,
 659                        int tlno, int plno, int same,
 660                        struct origin *target, struct origin *parent)
 661{
 662        struct blame_entry *e;
 663
 664        for (e = sb->ent; e; e = e->next) {
 665                if (e->guilty || cmp_suspect(e->suspect, target))
 666                        continue;
 667                if (same <= e->s_lno)
 668                        continue;
 669                if (tlno < e->s_lno + e->num_lines)
 670                        blame_overlap(sb, e, tlno, plno, same, parent);
 671        }
 672}
 673
 674static int pass_blame_to_parent(struct scoreboard *sb,
 675                                struct origin *target,
 676                                struct origin *parent)
 677{
 678        int i, last_in_target, plno, tlno;
 679        struct patch *patch;
 680
 681        last_in_target = find_last_in_target(sb, target);
 682        if (last_in_target < 0)
 683                return 1; /* nothing remains for this target */
 684
 685        patch = get_patch(parent, target);
 686        plno = tlno = 0;
 687        for (i = 0; i < patch->num; i++) {
 688                struct chunk *chunk = &patch->chunks[i];
 689
 690                blame_chunk(sb, tlno, plno, chunk->same, target, parent);
 691                plno = chunk->p_next;
 692                tlno = chunk->t_next;
 693        }
 694        /* rest (i.e. anything above tlno) are the same as parent */
 695        blame_chunk(sb, tlno, plno, last_in_target, target, parent);
 696
 697        free_patch(patch);
 698        return 0;
 699}
 700
 701static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
 702{
 703        unsigned score;
 704        const char *cp, *ep;
 705
 706        if (e->score)
 707                return e->score;
 708
 709        score = 1;
 710        cp = nth_line(sb, e->lno);
 711        ep = nth_line(sb, e->lno + e->num_lines);
 712        while (cp < ep) {
 713                unsigned ch = *((unsigned char *)cp);
 714                if (isalnum(ch))
 715                        score++;
 716                cp++;
 717        }
 718        e->score = score;
 719        return score;
 720}
 721
 722static void copy_split_if_better(struct scoreboard *sb,
 723                                 struct blame_entry *best_so_far,
 724                                 struct blame_entry *this)
 725{
 726        int i;
 727
 728        if (!this[1].suspect)
 729                return;
 730        if (best_so_far[1].suspect) {
 731                if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
 732                        return;
 733        }
 734
 735        for (i = 0; i < 3; i++)
 736                origin_incref(this[i].suspect);
 737        decref_split(best_so_far);
 738        memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
 739}
 740
 741static void find_copy_in_blob(struct scoreboard *sb,
 742                              struct blame_entry *ent,
 743                              struct origin *parent,
 744                              struct blame_entry *split,
 745                              mmfile_t *file_p)
 746{
 747        const char *cp;
 748        int cnt;
 749        mmfile_t file_o;
 750        struct patch *patch;
 751        int i, plno, tlno;
 752
 753        cp = nth_line(sb, ent->lno);
 754        file_o.ptr = (char*) cp;
 755        cnt = ent->num_lines;
 756
 757        while (cnt && cp < sb->final_buf + sb->final_buf_size) {
 758                if (*cp++ == '\n')
 759                        cnt--;
 760        }
 761        file_o.size = cp - file_o.ptr;
 762
 763        patch = compare_buffer(file_p, &file_o, 1);
 764
 765        memset(split, 0, sizeof(struct blame_entry [3]));
 766        plno = tlno = 0;
 767        for (i = 0; i < patch->num; i++) {
 768                struct chunk *chunk = &patch->chunks[i];
 769
 770                /* tlno to chunk->same are the same as ent */
 771                if (ent->num_lines <= tlno)
 772                        break;
 773                if (tlno < chunk->same) {
 774                        struct blame_entry this[3];
 775                        split_overlap(this, ent,
 776                                      tlno + ent->s_lno, plno,
 777                                      chunk->same + ent->s_lno,
 778                                      parent);
 779                        copy_split_if_better(sb, split, this);
 780                        decref_split(this);
 781                }
 782                plno = chunk->p_next;
 783                tlno = chunk->t_next;
 784        }
 785        free_patch(patch);
 786}
 787
 788static int find_move_in_parent(struct scoreboard *sb,
 789                               struct origin *target,
 790                               struct origin *parent)
 791{
 792        int last_in_target, made_progress;
 793        struct blame_entry *e, split[3];
 794        mmfile_t file_p;
 795
 796        last_in_target = find_last_in_target(sb, target);
 797        if (last_in_target < 0)
 798                return 1; /* nothing remains for this target */
 799
 800        fill_origin_blob(parent, &file_p);
 801        if (!file_p.ptr)
 802                return 0;
 803
 804        made_progress = 1;
 805        while (made_progress) {
 806                made_progress = 0;
 807                for (e = sb->ent; e; e = e->next) {
 808                        if (e->guilty || cmp_suspect(e->suspect, target))
 809                                continue;
 810                        find_copy_in_blob(sb, e, parent, split, &file_p);
 811                        if (split[1].suspect &&
 812                            blame_move_score < ent_score(sb, &split[1])) {
 813                                split_blame(sb, split, e);
 814                                made_progress = 1;
 815                        }
 816                        decref_split(split);
 817                }
 818        }
 819        return 0;
 820}
 821
 822
 823struct blame_list {
 824        struct blame_entry *ent;
 825        struct blame_entry split[3];
 826};
 827
 828static struct blame_list *setup_blame_list(struct scoreboard *sb,
 829                                           struct origin *target,
 830                                           int *num_ents_p)
 831{
 832        struct blame_entry *e;
 833        int num_ents, i;
 834        struct blame_list *blame_list = NULL;
 835
 836        /* Count the number of entries the target is suspected for,
 837         * and prepare a list of entry and the best split.
 838         */
 839        for (e = sb->ent, num_ents = 0; e; e = e->next)
 840                if (!e->guilty && !cmp_suspect(e->suspect, target))
 841                        num_ents++;
 842        if (num_ents) {
 843                blame_list = xcalloc(num_ents, sizeof(struct blame_list));
 844                for (e = sb->ent, i = 0; e; e = e->next)
 845                        if (!e->guilty && !cmp_suspect(e->suspect, target))
 846                                blame_list[i++].ent = e;
 847        }
 848        *num_ents_p = num_ents;
 849        return blame_list;
 850}
 851
 852static int find_copy_in_parent(struct scoreboard *sb,
 853                               struct origin *target,
 854                               struct commit *parent,
 855                               struct origin *porigin,
 856                               int opt)
 857{
 858        struct diff_options diff_opts;
 859        const char *paths[1];
 860        int i, j;
 861        int retval;
 862        struct blame_list *blame_list;
 863        int num_ents;
 864
 865        blame_list = setup_blame_list(sb, target, &num_ents);
 866        if (!blame_list)
 867                return 1; /* nothing remains for this target */
 868
 869        diff_setup(&diff_opts);
 870        diff_opts.recursive = 1;
 871        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 872
 873        paths[0] = NULL;
 874        diff_tree_setup_paths(paths, &diff_opts);
 875        if (diff_setup_done(&diff_opts) < 0)
 876                die("diff-setup");
 877
 878        /* Try "find copies harder" on new path if requested;
 879         * we do not want to use diffcore_rename() actually to
 880         * match things up; find_copies_harder is set only to
 881         * force diff_tree_sha1() to feed all filepairs to diff_queue,
 882         * and this code needs to be after diff_setup_done(), which
 883         * usually makes find-copies-harder imply copy detection.
 884         */
 885        if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
 886            (!porigin || strcmp(target->path, porigin->path)))
 887                diff_opts.find_copies_harder = 1;
 888
 889        diff_tree_sha1(parent->tree->object.sha1,
 890                       target->commit->tree->object.sha1,
 891                       "", &diff_opts);
 892
 893        if (!diff_opts.find_copies_harder)
 894                diffcore_std(&diff_opts);
 895
 896        retval = 0;
 897        while (1) {
 898                int made_progress = 0;
 899
 900                for (i = 0; i < diff_queued_diff.nr; i++) {
 901                        struct diff_filepair *p = diff_queued_diff.queue[i];
 902                        struct origin *norigin;
 903                        mmfile_t file_p;
 904                        struct blame_entry this[3];
 905
 906                        if (!DIFF_FILE_VALID(p->one))
 907                                continue; /* does not exist in parent */
 908                        if (porigin && !strcmp(p->one->path, porigin->path))
 909                                /* find_move already dealt with this path */
 910                                continue;
 911
 912                        norigin = get_origin(sb, parent, p->one->path);
 913                        hashcpy(norigin->blob_sha1, p->one->sha1);
 914                        fill_origin_blob(norigin, &file_p);
 915                        if (!file_p.ptr)
 916                                continue;
 917
 918                        for (j = 0; j < num_ents; j++) {
 919                                find_copy_in_blob(sb, blame_list[j].ent,
 920                                                  norigin, this, &file_p);
 921                                copy_split_if_better(sb, blame_list[j].split,
 922                                                     this);
 923                                decref_split(this);
 924                        }
 925                        origin_decref(norigin);
 926                }
 927
 928                for (j = 0; j < num_ents; j++) {
 929                        struct blame_entry *split = blame_list[j].split;
 930                        if (split[1].suspect &&
 931                            blame_copy_score < ent_score(sb, &split[1])) {
 932                                split_blame(sb, split, blame_list[j].ent);
 933                                made_progress = 1;
 934                        }
 935                        decref_split(split);
 936                }
 937                free(blame_list);
 938
 939                if (!made_progress)
 940                        break;
 941                blame_list = setup_blame_list(sb, target, &num_ents);
 942                if (!blame_list) {
 943                        retval = 1;
 944                        break;
 945                }
 946        }
 947        diff_flush(&diff_opts);
 948
 949        return retval;
 950}
 951
 952/* The blobs of origin and porigin exactly match, so everything
 953 * origin is suspected for can be blamed on the parent.
 954 */
 955static void pass_whole_blame(struct scoreboard *sb,
 956                             struct origin *origin, struct origin *porigin)
 957{
 958        struct blame_entry *e;
 959
 960        if (!porigin->file.ptr && origin->file.ptr) {
 961                /* Steal its file */
 962                porigin->file = origin->file;
 963                origin->file.ptr = NULL;
 964        }
 965        for (e = sb->ent; e; e = e->next) {
 966                if (cmp_suspect(e->suspect, origin))
 967                        continue;
 968                origin_incref(porigin);
 969                origin_decref(e->suspect);
 970                e->suspect = porigin;
 971        }
 972}
 973
 974#define MAXPARENT 16
 975
 976static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
 977{
 978        int i, pass;
 979        struct commit *commit = origin->commit;
 980        struct commit_list *parent;
 981        struct origin *parent_origin[MAXPARENT], *porigin;
 982
 983        memset(parent_origin, 0, sizeof(parent_origin));
 984
 985        /* The first pass looks for unrenamed path to optimize for
 986         * common cases, then we look for renames in the second pass.
 987         */
 988        for (pass = 0; pass < 2; pass++) {
 989                struct origin *(*find)(struct scoreboard *,
 990                                       struct commit *, struct origin *);
 991                find = pass ? find_rename : find_origin;
 992
 993                for (i = 0, parent = commit->parents;
 994                     i < MAXPARENT && parent;
 995                     parent = parent->next, i++) {
 996                        struct commit *p = parent->item;
 997                        int j, same;
 998
 999                        if (parent_origin[i])
1000                                continue;
1001                        if (parse_commit(p))
1002                                continue;
1003                        porigin = find(sb, p, origin);
1004                        if (!porigin)
1005                                continue;
1006                        if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1007                                pass_whole_blame(sb, origin, porigin);
1008                                origin_decref(porigin);
1009                                goto finish;
1010                        }
1011                        for (j = same = 0; j < i; j++)
1012                                if (parent_origin[j] &&
1013                                    !hashcmp(parent_origin[j]->blob_sha1,
1014                                             porigin->blob_sha1)) {
1015                                        same = 1;
1016                                        break;
1017                                }
1018                        if (!same)
1019                                parent_origin[i] = porigin;
1020                        else
1021                                origin_decref(porigin);
1022                }
1023        }
1024
1025        num_commits++;
1026        for (i = 0, parent = commit->parents;
1027             i < MAXPARENT && parent;
1028             parent = parent->next, i++) {
1029                struct origin *porigin = parent_origin[i];
1030                if (!porigin)
1031                        continue;
1032                if (pass_blame_to_parent(sb, origin, porigin))
1033                        goto finish;
1034        }
1035
1036        /*
1037         * Optionally run "miff" to find moves in parents' files here.
1038         */
1039        if (opt & PICKAXE_BLAME_MOVE)
1040                for (i = 0, parent = commit->parents;
1041                     i < MAXPARENT && parent;
1042                     parent = parent->next, i++) {
1043                        struct origin *porigin = parent_origin[i];
1044                        if (!porigin)
1045                                continue;
1046                        if (find_move_in_parent(sb, origin, porigin))
1047                                goto finish;
1048                }
1049
1050        /*
1051         * Optionally run "ciff" to find copies from parents' files here.
1052         */
1053        if (opt & PICKAXE_BLAME_COPY)
1054                for (i = 0, parent = commit->parents;
1055                     i < MAXPARENT && parent;
1056                     parent = parent->next, i++) {
1057                        struct origin *porigin = parent_origin[i];
1058                        if (find_copy_in_parent(sb, origin, parent->item,
1059                                                porigin, opt))
1060                                goto finish;
1061                }
1062
1063 finish:
1064        for (i = 0; i < MAXPARENT; i++)
1065                origin_decref(parent_origin[i]);
1066}
1067
1068static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1069{
1070        while (1) {
1071                struct blame_entry *ent;
1072                struct commit *commit;
1073                struct origin *suspect = NULL;
1074
1075                /* find one suspect to break down */
1076                for (ent = sb->ent; !suspect && ent; ent = ent->next)
1077                        if (!ent->guilty)
1078                                suspect = ent->suspect;
1079                if (!suspect)
1080                        return; /* all done */
1081
1082                origin_incref(suspect);
1083                commit = suspect->commit;
1084                if (!commit->object.parsed)
1085                        parse_commit(commit);
1086                if (!(commit->object.flags & UNINTERESTING) &&
1087                    !(revs->max_age != -1 && commit->date  < revs->max_age))
1088                        pass_blame(sb, suspect, opt);
1089                else {
1090                        commit->object.flags |= UNINTERESTING;
1091                        if (commit->object.parsed)
1092                                mark_parents_uninteresting(commit);
1093                }
1094
1095                /* Take responsibility for the remaining entries */
1096                for (ent = sb->ent; ent; ent = ent->next)
1097                        if (!cmp_suspect(ent->suspect, suspect))
1098                                ent->guilty = 1;
1099                origin_decref(suspect);
1100
1101                if (DEBUG) /* sanity */
1102                        sanity_check_refcnt(sb);
1103        }
1104}
1105
1106static const char *format_time(unsigned long time, const char *tz_str,
1107                               int show_raw_time)
1108{
1109        static char time_buf[128];
1110        time_t t = time;
1111        int minutes, tz;
1112        struct tm *tm;
1113
1114        if (show_raw_time) {
1115                sprintf(time_buf, "%lu %s", time, tz_str);
1116                return time_buf;
1117        }
1118
1119        tz = atoi(tz_str);
1120        minutes = tz < 0 ? -tz : tz;
1121        minutes = (minutes / 100)*60 + (minutes % 100);
1122        minutes = tz < 0 ? -minutes : minutes;
1123        t = time + minutes * 60;
1124        tm = gmtime(&t);
1125
1126        strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1127        strcat(time_buf, tz_str);
1128        return time_buf;
1129}
1130
1131struct commit_info
1132{
1133        char *author;
1134        char *author_mail;
1135        unsigned long author_time;
1136        char *author_tz;
1137
1138        /* filled only when asked for details */
1139        char *committer;
1140        char *committer_mail;
1141        unsigned long committer_time;
1142        char *committer_tz;
1143
1144        char *summary;
1145};
1146
1147static void get_ac_line(const char *inbuf, const char *what,
1148                        int bufsz, char *person, char **mail,
1149                        unsigned long *time, char **tz)
1150{
1151        int len;
1152        char *tmp, *endp;
1153
1154        tmp = strstr(inbuf, what);
1155        if (!tmp)
1156                goto error_out;
1157        tmp += strlen(what);
1158        endp = strchr(tmp, '\n');
1159        if (!endp)
1160                len = strlen(tmp);
1161        else
1162                len = endp - tmp;
1163        if (bufsz <= len) {
1164        error_out:
1165                /* Ugh */
1166                person = *mail = *tz = "(unknown)";
1167                *time = 0;
1168                return;
1169        }
1170        memcpy(person, tmp, len);
1171
1172        tmp = person;
1173        tmp += len;
1174        *tmp = 0;
1175        while (*tmp != ' ')
1176                tmp--;
1177        *tz = tmp+1;
1178
1179        *tmp = 0;
1180        while (*tmp != ' ')
1181                tmp--;
1182        *time = strtoul(tmp, NULL, 10);
1183
1184        *tmp = 0;
1185        while (*tmp != ' ')
1186                tmp--;
1187        *mail = tmp + 1;
1188        *tmp = 0;
1189}
1190
1191static void get_commit_info(struct commit *commit,
1192                            struct commit_info *ret,
1193                            int detailed)
1194{
1195        int len;
1196        char *tmp, *endp;
1197        static char author_buf[1024];
1198        static char committer_buf[1024];
1199        static char summary_buf[1024];
1200
1201        /* We've operated without save_commit_buffer, so
1202         * we now need to populate them for output.
1203         */
1204        if (!commit->buffer) {
1205                char type[20];
1206                unsigned long size;
1207                commit->buffer =
1208                        read_sha1_file(commit->object.sha1, type, &size);
1209        }
1210        ret->author = author_buf;
1211        get_ac_line(commit->buffer, "\nauthor ",
1212                    sizeof(author_buf), author_buf, &ret->author_mail,
1213                    &ret->author_time, &ret->author_tz);
1214
1215        if (!detailed)
1216                return;
1217
1218        ret->committer = committer_buf;
1219        get_ac_line(commit->buffer, "\ncommitter ",
1220                    sizeof(committer_buf), committer_buf, &ret->committer_mail,
1221                    &ret->committer_time, &ret->committer_tz);
1222
1223        ret->summary = summary_buf;
1224        tmp = strstr(commit->buffer, "\n\n");
1225        if (!tmp) {
1226        error_out:
1227                sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1228                return;
1229        }
1230        tmp += 2;
1231        endp = strchr(tmp, '\n');
1232        if (!endp)
1233                goto error_out;
1234        len = endp - tmp;
1235        if (len >= sizeof(summary_buf))
1236                goto error_out;
1237        memcpy(summary_buf, tmp, len);
1238        summary_buf[len] = 0;
1239}
1240
1241#define OUTPUT_ANNOTATE_COMPAT  001
1242#define OUTPUT_LONG_OBJECT_NAME 002
1243#define OUTPUT_RAW_TIMESTAMP    004
1244#define OUTPUT_PORCELAIN        010
1245#define OUTPUT_SHOW_NAME        020
1246#define OUTPUT_SHOW_NUMBER      040
1247#define OUTPUT_SHOW_SCORE      0100
1248
1249static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1250{
1251        int cnt;
1252        const char *cp;
1253        struct origin *suspect = ent->suspect;
1254        char hex[41];
1255
1256        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1257        printf("%s%c%d %d %d\n",
1258               hex,
1259               ent->guilty ? ' ' : '*', // purely for debugging
1260               ent->s_lno + 1,
1261               ent->lno + 1,
1262               ent->num_lines);
1263        if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1264                struct commit_info ci;
1265                suspect->commit->object.flags |= METAINFO_SHOWN;
1266                get_commit_info(suspect->commit, &ci, 1);
1267                printf("author %s\n", ci.author);
1268                printf("author-mail %s\n", ci.author_mail);
1269                printf("author-time %lu\n", ci.author_time);
1270                printf("author-tz %s\n", ci.author_tz);
1271                printf("committer %s\n", ci.committer);
1272                printf("committer-mail %s\n", ci.committer_mail);
1273                printf("committer-time %lu\n", ci.committer_time);
1274                printf("committer-tz %s\n", ci.committer_tz);
1275                printf("filename %s\n", suspect->path);
1276                printf("summary %s\n", ci.summary);
1277                if (suspect->commit->object.flags & UNINTERESTING)
1278                        printf("boundary\n");
1279        }
1280        else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1281                printf("filename %s\n", suspect->path);
1282
1283        cp = nth_line(sb, ent->lno);
1284        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1285                char ch;
1286                if (cnt)
1287                        printf("%s %d %d\n", hex,
1288                               ent->s_lno + 1 + cnt,
1289                               ent->lno + 1 + cnt);
1290                putchar('\t');
1291                do {
1292                        ch = *cp++;
1293                        putchar(ch);
1294                } while (ch != '\n' &&
1295                         cp < sb->final_buf + sb->final_buf_size);
1296        }
1297}
1298
1299static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1300{
1301        int cnt;
1302        const char *cp;
1303        struct origin *suspect = ent->suspect;
1304        struct commit_info ci;
1305        char hex[41];
1306        int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1307
1308        get_commit_info(suspect->commit, &ci, 1);
1309        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1310
1311        cp = nth_line(sb, ent->lno);
1312        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1313                char ch;
1314                int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1315
1316                if (suspect->commit->object.flags & UNINTERESTING) {
1317                        length--;
1318                        putchar('^');
1319                }
1320
1321                printf("%.*s", length, hex);
1322                if (opt & OUTPUT_ANNOTATE_COMPAT)
1323                        printf("\t(%10s\t%10s\t%d)", ci.author,
1324                               format_time(ci.author_time, ci.author_tz,
1325                                           show_raw_time),
1326                               ent->lno + 1 + cnt);
1327                else {
1328                        if (opt & OUTPUT_SHOW_SCORE)
1329                                printf(" %*d %02d",
1330                                       max_score_digits, ent->score,
1331                                       ent->suspect->refcnt);
1332                        if (opt & OUTPUT_SHOW_NAME)
1333                                printf(" %-*.*s", longest_file, longest_file,
1334                                       suspect->path);
1335                        if (opt & OUTPUT_SHOW_NUMBER)
1336                                printf(" %*d", max_orig_digits,
1337                                       ent->s_lno + 1 + cnt);
1338                        printf(" (%-*.*s %10s %*d) ",
1339                               longest_author, longest_author, ci.author,
1340                               format_time(ci.author_time, ci.author_tz,
1341                                           show_raw_time),
1342                               max_digits, ent->lno + 1 + cnt);
1343                }
1344                do {
1345                        ch = *cp++;
1346                        putchar(ch);
1347                } while (ch != '\n' &&
1348                         cp < sb->final_buf + sb->final_buf_size);
1349        }
1350}
1351
1352static void output(struct scoreboard *sb, int option)
1353{
1354        struct blame_entry *ent;
1355
1356        if (option & OUTPUT_PORCELAIN) {
1357                for (ent = sb->ent; ent; ent = ent->next) {
1358                        struct blame_entry *oth;
1359                        struct origin *suspect = ent->suspect;
1360                        struct commit *commit = suspect->commit;
1361                        if (commit->object.flags & MORE_THAN_ONE_PATH)
1362                                continue;
1363                        for (oth = ent->next; oth; oth = oth->next) {
1364                                if ((oth->suspect->commit != commit) ||
1365                                    !strcmp(oth->suspect->path, suspect->path))
1366                                        continue;
1367                                commit->object.flags |= MORE_THAN_ONE_PATH;
1368                                break;
1369                        }
1370                }
1371        }
1372
1373        for (ent = sb->ent; ent; ent = ent->next) {
1374                if (option & OUTPUT_PORCELAIN)
1375                        emit_porcelain(sb, ent);
1376                else {
1377                        emit_other(sb, ent, option);
1378                }
1379        }
1380}
1381
1382static int prepare_lines(struct scoreboard *sb)
1383{
1384        const char *buf = sb->final_buf;
1385        unsigned long len = sb->final_buf_size;
1386        int num = 0, incomplete = 0, bol = 1;
1387
1388        if (len && buf[len-1] != '\n')
1389                incomplete++; /* incomplete line at the end */
1390        while (len--) {
1391                if (bol) {
1392                        sb->lineno = xrealloc(sb->lineno,
1393                                              sizeof(int* ) * (num + 1));
1394                        sb->lineno[num] = buf - sb->final_buf;
1395                        bol = 0;
1396                }
1397                if (*buf++ == '\n') {
1398                        num++;
1399                        bol = 1;
1400                }
1401        }
1402        sb->lineno = xrealloc(sb->lineno,
1403                              sizeof(int* ) * (num + incomplete + 1));
1404        sb->lineno[num + incomplete] = buf - sb->final_buf;
1405        sb->num_lines = num + incomplete;
1406        return sb->num_lines;
1407}
1408
1409static int read_ancestry(const char *graft_file)
1410{
1411        FILE *fp = fopen(graft_file, "r");
1412        char buf[1024];
1413        if (!fp)
1414                return -1;
1415        while (fgets(buf, sizeof(buf), fp)) {
1416                /* The format is just "Commit Parent1 Parent2 ...\n" */
1417                int len = strlen(buf);
1418                struct commit_graft *graft = read_graft_line(buf, len);
1419                if (graft)
1420                        register_commit_graft(graft, 0);
1421        }
1422        fclose(fp);
1423        return 0;
1424}
1425
1426static int lineno_width(int lines)
1427{
1428        int i, width;
1429
1430        for (width = 1, i = 10; i <= lines + 1; width++)
1431                i *= 10;
1432        return width;
1433}
1434
1435static void find_alignment(struct scoreboard *sb, int *option)
1436{
1437        int longest_src_lines = 0;
1438        int longest_dst_lines = 0;
1439        unsigned largest_score = 0;
1440        struct blame_entry *e;
1441
1442        for (e = sb->ent; e; e = e->next) {
1443                struct origin *suspect = e->suspect;
1444                struct commit_info ci;
1445                int num;
1446
1447                if (strcmp(suspect->path, sb->path))
1448                        *option |= OUTPUT_SHOW_NAME;
1449                num = strlen(suspect->path);
1450                if (longest_file < num)
1451                        longest_file = num;
1452                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1453                        suspect->commit->object.flags |= METAINFO_SHOWN;
1454                        get_commit_info(suspect->commit, &ci, 1);
1455                        num = strlen(ci.author);
1456                        if (longest_author < num)
1457                                longest_author = num;
1458                }
1459                num = e->s_lno + e->num_lines;
1460                if (longest_src_lines < num)
1461                        longest_src_lines = num;
1462                num = e->lno + e->num_lines;
1463                if (longest_dst_lines < num)
1464                        longest_dst_lines = num;
1465                if (largest_score < ent_score(sb, e))
1466                        largest_score = ent_score(sb, e);
1467        }
1468        max_orig_digits = lineno_width(longest_src_lines);
1469        max_digits = lineno_width(longest_dst_lines);
1470        max_score_digits = lineno_width(largest_score);
1471}
1472
1473static void sanity_check_refcnt(struct scoreboard *sb)
1474{
1475        int baa = 0;
1476        struct blame_entry *ent;
1477
1478        for (ent = sb->ent; ent; ent = ent->next) {
1479                /* Nobody should have zero or negative refcnt */
1480                if (ent->suspect->refcnt <= 0) {
1481                        fprintf(stderr, "%s in %s has negative refcnt %d\n",
1482                                ent->suspect->path,
1483                                sha1_to_hex(ent->suspect->commit->object.sha1),
1484                                ent->suspect->refcnt);
1485                        baa = 1;
1486                }
1487        }
1488        for (ent = sb->ent; ent; ent = ent->next) {
1489                /* Mark the ones that haven't been checked */
1490                if (0 < ent->suspect->refcnt)
1491                        ent->suspect->refcnt = -ent->suspect->refcnt;
1492        }
1493        for (ent = sb->ent; ent; ent = ent->next) {
1494                /* then pick each and see if they have the the correct
1495                 * refcnt.
1496                 */
1497                int found;
1498                struct blame_entry *e;
1499                struct origin *suspect = ent->suspect;
1500
1501                if (0 < suspect->refcnt)
1502                        continue;
1503                suspect->refcnt = -suspect->refcnt; /* Unmark */
1504                for (found = 0, e = sb->ent; e; e = e->next) {
1505                        if (e->suspect != suspect)
1506                                continue;
1507                        found++;
1508                }
1509                if (suspect->refcnt != found) {
1510                        fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1511                                ent->suspect->path,
1512                                sha1_to_hex(ent->suspect->commit->object.sha1),
1513                                ent->suspect->refcnt, found);
1514                        baa = 2;
1515                }
1516        }
1517        if (baa) {
1518                int opt = 0160;
1519                find_alignment(sb, &opt);
1520                output(sb, opt);
1521                die("Baa %d!", baa);
1522        }
1523}
1524
1525static int has_path_in_work_tree(const char *path)
1526{
1527        struct stat st;
1528        return !lstat(path, &st);
1529}
1530
1531static unsigned parse_score(const char *arg)
1532{
1533        char *end;
1534        unsigned long score = strtoul(arg, &end, 10);
1535        if (*end)
1536                return 0;
1537        return score;
1538}
1539
1540static const char *add_prefix(const char *prefix, const char *path)
1541{
1542        if (!prefix || !prefix[0])
1543                return path;
1544        return prefix_path(prefix, strlen(prefix), path);
1545}
1546
1547static const char *parse_loc(const char *spec,
1548                             struct scoreboard *sb, long lno,
1549                             long begin, long *ret)
1550{
1551        char *term;
1552        const char *line;
1553        long num;
1554        int reg_error;
1555        regex_t regexp;
1556        regmatch_t match[1];
1557
1558        /* Allow "-L <something>,+20" to mean starting at <something>
1559         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1560         * <something>.
1561         */
1562        if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1563                num = strtol(spec + 1, &term, 10);
1564                if (term != spec + 1) {
1565                        if (spec[0] == '-')
1566                                num = 0 - num;
1567                        if (0 < num)
1568                                *ret = begin + num - 2;
1569                        else if (!num)
1570                                *ret = begin;
1571                        else
1572                                *ret = begin + num;
1573                        return term;
1574                }
1575                return spec;
1576        }
1577        num = strtol(spec, &term, 10);
1578        if (term != spec) {
1579                *ret = num;
1580                return term;
1581        }
1582        if (spec[0] != '/')
1583                return spec;
1584
1585        /* it could be a regexp of form /.../ */
1586        for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1587                if (*term == '\\')
1588                        term++;
1589        }
1590        if (*term != '/')
1591                return spec;
1592
1593        /* try [spec+1 .. term-1] as regexp */
1594        *term = 0;
1595        begin--; /* input is in human terms */
1596        line = nth_line(sb, begin);
1597
1598        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1599            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1600                const char *cp = line + match[0].rm_so;
1601                const char *nline;
1602
1603                while (begin++ < lno) {
1604                        nline = nth_line(sb, begin);
1605                        if (line <= cp && cp < nline)
1606                                break;
1607                        line = nline;
1608                }
1609                *ret = begin;
1610                regfree(&regexp);
1611                *term++ = '/';
1612                return term;
1613        }
1614        else {
1615                char errbuf[1024];
1616                regerror(reg_error, &regexp, errbuf, 1024);
1617                die("-L parameter '%s': %s", spec + 1, errbuf);
1618        }
1619}
1620
1621static void prepare_blame_range(struct scoreboard *sb,
1622                                const char *bottomtop,
1623                                long lno,
1624                                long *bottom, long *top)
1625{
1626        const char *term;
1627
1628        term = parse_loc(bottomtop, sb, lno, 1, bottom);
1629        if (*term == ',') {
1630                term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1631                if (*term)
1632                        usage(blame_usage);
1633        }
1634        if (*term)
1635                usage(blame_usage);
1636}
1637
1638int cmd_blame(int argc, const char **argv, const char *prefix)
1639{
1640        struct rev_info revs;
1641        const char *path;
1642        struct scoreboard sb;
1643        struct origin *o;
1644        struct blame_entry *ent;
1645        int i, seen_dashdash, unk, opt;
1646        long bottom, top, lno;
1647        int output_option = 0;
1648        const char *revs_file = NULL;
1649        const char *final_commit_name = NULL;
1650        char type[10];
1651        const char *bottomtop = NULL;
1652
1653        save_commit_buffer = 0;
1654
1655        opt = 0;
1656        seen_dashdash = 0;
1657        for (unk = i = 1; i < argc; i++) {
1658                const char *arg = argv[i];
1659                if (*arg != '-')
1660                        break;
1661                else if (!strcmp("-c", arg))
1662                        output_option |= OUTPUT_ANNOTATE_COMPAT;
1663                else if (!strcmp("-t", arg))
1664                        output_option |= OUTPUT_RAW_TIMESTAMP;
1665                else if (!strcmp("-l", arg))
1666                        output_option |= OUTPUT_LONG_OBJECT_NAME;
1667                else if (!strcmp("-S", arg) && ++i < argc)
1668                        revs_file = argv[i];
1669                else if (!strncmp("-M", arg, 2)) {
1670                        opt |= PICKAXE_BLAME_MOVE;
1671                        blame_move_score = parse_score(arg+2);
1672                }
1673                else if (!strncmp("-C", arg, 2)) {
1674                        if (opt & PICKAXE_BLAME_COPY)
1675                                opt |= PICKAXE_BLAME_COPY_HARDER;
1676                        opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1677                        blame_copy_score = parse_score(arg+2);
1678                }
1679                else if (!strncmp("-L", arg, 2)) {
1680                        if (!arg[2]) {
1681                                if (++i >= argc)
1682                                        usage(blame_usage);
1683                                arg = argv[i];
1684                        }
1685                        else
1686                                arg += 2;
1687                        if (bottomtop)
1688                                die("More than one '-L n,m' option given");
1689                        bottomtop = arg;
1690                }
1691                else if (!strcmp("--score-debug", arg))
1692                        output_option |= OUTPUT_SHOW_SCORE;
1693                else if (!strcmp("-f", arg) ||
1694                         !strcmp("--show-name", arg))
1695                        output_option |= OUTPUT_SHOW_NAME;
1696                else if (!strcmp("-n", arg) ||
1697                         !strcmp("--show-number", arg))
1698                        output_option |= OUTPUT_SHOW_NUMBER;
1699                else if (!strcmp("-p", arg) ||
1700                         !strcmp("--porcelain", arg))
1701                        output_option |= OUTPUT_PORCELAIN;
1702                else if (!strcmp("--", arg)) {
1703                        seen_dashdash = 1;
1704                        i++;
1705                        break;
1706                }
1707                else
1708                        argv[unk++] = arg;
1709        }
1710
1711        if (!blame_move_score)
1712                blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1713        if (!blame_copy_score)
1714                blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1715
1716        /* We have collected options unknown to us in argv[1..unk]
1717         * which are to be passed to revision machinery if we are
1718         * going to do the "bottom" procesing.
1719         *
1720         * The remaining are:
1721         *
1722         * (1) if seen_dashdash, its either
1723         *     "-options -- <path>" or
1724         *     "-options -- <path> <rev>".
1725         *     but the latter is allowed only if there is no
1726         *     options that we passed to revision machinery.
1727         *
1728         * (2) otherwise, we may have "--" somewhere later and
1729         *     might be looking at the first one of multiple 'rev'
1730         *     parameters (e.g. " master ^next ^maint -- path").
1731         *     See if there is a dashdash first, and give the
1732         *     arguments before that to revision machinery.
1733         *     After that there must be one 'path'.
1734         *
1735         * (3) otherwise, its one of the three:
1736         *     "-options <path> <rev>"
1737         *     "-options <rev> <path>"
1738         *     "-options <path>"
1739         *     but again the first one is allowed only if
1740         *     there is no options that we passed to revision
1741         *     machinery.
1742         */
1743
1744        if (seen_dashdash) {
1745                /* (1) */
1746                if (argc <= i)
1747                        usage(blame_usage);
1748                path = add_prefix(prefix, argv[i]);
1749                if (i + 1 == argc - 1) {
1750                        if (unk != 1)
1751                                usage(blame_usage);
1752                        argv[unk++] = argv[i + 1];
1753                }
1754                else if (i + 1 != argc)
1755                        /* garbage at end */
1756                        usage(blame_usage);
1757        }
1758        else {
1759                int j;
1760                for (j = i; !seen_dashdash && j < argc; j++)
1761                        if (!strcmp(argv[j], "--"))
1762                                seen_dashdash = j;
1763                if (seen_dashdash) {
1764                        if (seen_dashdash + 1 != argc - 1)
1765                                usage(blame_usage);
1766                        path = add_prefix(prefix, argv[seen_dashdash + 1]);
1767                        for (j = i; j < seen_dashdash; j++)
1768                                argv[unk++] = argv[j];
1769                }
1770                else {
1771                        /* (3) */
1772                        path = add_prefix(prefix, argv[i]);
1773                        if (i + 1 == argc - 1) {
1774                                final_commit_name = argv[i + 1];
1775
1776                                /* if (unk == 1) we could be getting
1777                                 * old-style
1778                                 */
1779                                if (unk == 1 && !has_path_in_work_tree(path)) {
1780                                        path = add_prefix(prefix, argv[i + 1]);
1781                                        final_commit_name = argv[i];
1782                                }
1783                        }
1784                        else if (i != argc - 1)
1785                                usage(blame_usage); /* garbage at end */
1786
1787                        if (!has_path_in_work_tree(path))
1788                                die("cannot stat path %s: %s",
1789                                    path, strerror(errno));
1790                }
1791        }
1792
1793        if (final_commit_name)
1794                argv[unk++] = final_commit_name;
1795
1796        /* Now we got rev and path.  We do not want the path pruning
1797         * but we may want "bottom" processing.
1798         */
1799        argv[unk++] = "--"; /* terminate the rev name */
1800        argv[unk] = NULL;
1801
1802        init_revisions(&revs, NULL);
1803        setup_revisions(unk, argv, &revs, "HEAD");
1804        memset(&sb, 0, sizeof(sb));
1805
1806        /* There must be one and only one positive commit in the
1807         * revs->pending array.
1808         */
1809        for (i = 0; i < revs.pending.nr; i++) {
1810                struct object *obj = revs.pending.objects[i].item;
1811                if (obj->flags & UNINTERESTING)
1812                        continue;
1813                while (obj->type == OBJ_TAG)
1814                        obj = deref_tag(obj, NULL, 0);
1815                if (obj->type != OBJ_COMMIT)
1816                        die("Non commit %s?",
1817                            revs.pending.objects[i].name);
1818                if (sb.final)
1819                        die("More than one commit to dig from %s and %s?",
1820                            revs.pending.objects[i].name,
1821                            final_commit_name);
1822                sb.final = (struct commit *) obj;
1823                final_commit_name = revs.pending.objects[i].name;
1824        }
1825
1826        if (!sb.final) {
1827                /* "--not A B -- path" without anything positive */
1828                unsigned char head_sha1[20];
1829
1830                final_commit_name = "HEAD";
1831                if (get_sha1(final_commit_name, head_sha1))
1832                        die("No such ref: HEAD");
1833                sb.final = lookup_commit_reference(head_sha1);
1834                add_pending_object(&revs, &(sb.final->object), "HEAD");
1835        }
1836
1837        /* If we have bottom, this will mark the ancestors of the
1838         * bottom commits we would reach while traversing as
1839         * uninteresting.
1840         */
1841        prepare_revision_walk(&revs);
1842
1843        o = get_origin(&sb, sb.final, path);
1844        if (fill_blob_sha1(o))
1845                die("no such path %s in %s", path, final_commit_name);
1846
1847        sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1848        num_read_blob++;
1849        lno = prepare_lines(&sb);
1850
1851        bottom = top = 0;
1852        if (bottomtop)
1853                prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
1854        if (bottom && top && top < bottom) {
1855                long tmp;
1856                tmp = top; top = bottom; bottom = tmp;
1857        }
1858        if (bottom < 1)
1859                bottom = 1;
1860        if (top < 1)
1861                top = lno;
1862        bottom--;
1863        if (lno < top)
1864                die("file %s has only %lu lines", path, lno);
1865
1866        ent = xcalloc(1, sizeof(*ent));
1867        ent->lno = bottom;
1868        ent->num_lines = top - bottom;
1869        ent->suspect = o;
1870        ent->s_lno = bottom;
1871
1872        sb.ent = ent;
1873        sb.path = path;
1874
1875        if (revs_file && read_ancestry(revs_file))
1876                die("reading graft file %s failed: %s",
1877                    revs_file, strerror(errno));
1878
1879        assign_blame(&sb, &revs, opt);
1880
1881        coalesce(&sb);
1882
1883        if (!(output_option & OUTPUT_PORCELAIN))
1884                find_alignment(&sb, &output_option);
1885
1886        output(&sb, output_option);
1887        free((void *)sb.final_buf);
1888        for (ent = sb.ent; ent; ) {
1889                struct blame_entry *e = ent->next;
1890                free(ent);
1891                ent = e;
1892        }
1893
1894        if (DEBUG) {
1895                printf("num read blob: %d\n", num_read_blob);
1896                printf("num get patch: %d\n", num_get_patch);
1897                printf("num commits: %d\n", num_commits);
1898        }
1899        return 0;
1900}