builtin-blame.con commit builtin-prune: memory diet. (16157b8)
   1/*
   2 * Pickaxe
   3 *
   4 * Copyright (c) 2006, Junio C Hamano
   5 */
   6
   7#include "cache.h"
   8#include "builtin.h"
   9#include "blob.h"
  10#include "commit.h"
  11#include "tag.h"
  12#include "tree-walk.h"
  13#include "diff.h"
  14#include "diffcore.h"
  15#include "revision.h"
  16#include "xdiff-interface.h"
  17
  18static char blame_usage[] =
  19"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
  20"  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
  21"  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
  22"  -l, --long          Show long commit SHA1 (Default: off)\n"
  23"  --root              Do not treat root commits as boundaries (Default: off)\n"
  24"  -t, --time          Show raw timestamp (Default: off)\n"
  25"  -f, --show-name     Show original filename (Default: auto)\n"
  26"  -n, --show-number   Show original linenumber (Default: off)\n"
  27"  -p, --porcelain     Show in a format designed for machine consumption\n"
  28"  -L n,m              Process only line range n,m, counting from 1\n"
  29"  -M, -C              Find line movements within and across files\n"
  30"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
  31
  32static int longest_file;
  33static int longest_author;
  34static int max_orig_digits;
  35static int max_digits;
  36static int max_score_digits;
  37static int show_root;
  38static int blank_boundary;
  39
  40#ifndef DEBUG
  41#define DEBUG 0
  42#endif
  43
  44/* stats */
  45static int num_read_blob;
  46static int num_get_patch;
  47static int num_commits;
  48
  49#define PICKAXE_BLAME_MOVE              01
  50#define PICKAXE_BLAME_COPY              02
  51#define PICKAXE_BLAME_COPY_HARDER       04
  52
  53/*
  54 * blame for a blame_entry with score lower than these thresholds
  55 * is not passed to the parent using move/copy logic.
  56 */
  57static unsigned blame_move_score;
  58static unsigned blame_copy_score;
  59#define BLAME_DEFAULT_MOVE_SCORE        20
  60#define BLAME_DEFAULT_COPY_SCORE        40
  61
  62/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
  63#define METAINFO_SHOWN          (1u<<12)
  64#define MORE_THAN_ONE_PATH      (1u<<13)
  65
  66/*
  67 * One blob in a commit that is being suspected
  68 */
  69struct origin {
  70        int refcnt;
  71        struct commit *commit;
  72        mmfile_t file;
  73        unsigned char blob_sha1[20];
  74        char path[FLEX_ARRAY];
  75};
  76
  77static char *fill_origin_blob(struct origin *o, mmfile_t *file)
  78{
  79        if (!o->file.ptr) {
  80                char type[10];
  81                num_read_blob++;
  82                file->ptr = read_sha1_file(o->blob_sha1, type,
  83                                           (unsigned long *)(&(file->size)));
  84                o->file = *file;
  85        }
  86        else
  87                *file = o->file;
  88        return file->ptr;
  89}
  90
  91static inline struct origin *origin_incref(struct origin *o)
  92{
  93        if (o)
  94                o->refcnt++;
  95        return o;
  96}
  97
  98static void origin_decref(struct origin *o)
  99{
 100        if (o && --o->refcnt <= 0) {
 101                if (o->file.ptr)
 102                        free(o->file.ptr);
 103                memset(o, 0, sizeof(*o));
 104                free(o);
 105        }
 106}
 107
 108struct blame_entry {
 109        struct blame_entry *prev;
 110        struct blame_entry *next;
 111
 112        /* the first line of this group in the final image;
 113         * internally all line numbers are 0 based.
 114         */
 115        int lno;
 116
 117        /* how many lines this group has */
 118        int num_lines;
 119
 120        /* the commit that introduced this group into the final image */
 121        struct origin *suspect;
 122
 123        /* true if the suspect is truly guilty; false while we have not
 124         * checked if the group came from one of its parents.
 125         */
 126        char guilty;
 127
 128        /* the line number of the first line of this group in the
 129         * suspect's file; internally all line numbers are 0 based.
 130         */
 131        int s_lno;
 132
 133        /* how significant this entry is -- cached to avoid
 134         * scanning the lines over and over
 135         */
 136        unsigned score;
 137};
 138
 139struct scoreboard {
 140        /* the final commit (i.e. where we started digging from) */
 141        struct commit *final;
 142
 143        const char *path;
 144
 145        /* the contents in the final; pointed into by buf pointers of
 146         * blame_entries
 147         */
 148        const char *final_buf;
 149        unsigned long final_buf_size;
 150
 151        /* linked list of blames */
 152        struct blame_entry *ent;
 153
 154        /* look-up a line in the final buffer */
 155        int num_lines;
 156        int *lineno;
 157};
 158
 159static int cmp_suspect(struct origin *a, struct origin *b)
 160{
 161        int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 162        if (cmp)
 163                return cmp;
 164        return strcmp(a->path, b->path);
 165}
 166
 167#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
 168
 169static void sanity_check_refcnt(struct scoreboard *);
 170
 171static void coalesce(struct scoreboard *sb)
 172{
 173        struct blame_entry *ent, *next;
 174
 175        for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 176                if (!cmp_suspect(ent->suspect, next->suspect) &&
 177                    ent->guilty == next->guilty &&
 178                    ent->s_lno + ent->num_lines == next->s_lno) {
 179                        ent->num_lines += next->num_lines;
 180                        ent->next = next->next;
 181                        if (ent->next)
 182                                ent->next->prev = ent;
 183                        origin_decref(next->suspect);
 184                        free(next);
 185                        ent->score = 0;
 186                        next = ent; /* again */
 187                }
 188        }
 189
 190        if (DEBUG) /* sanity */
 191                sanity_check_refcnt(sb);
 192}
 193
 194static struct origin *make_origin(struct commit *commit, const char *path)
 195{
 196        struct origin *o;
 197        o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
 198        o->commit = commit;
 199        o->refcnt = 1;
 200        strcpy(o->path, path);
 201        return o;
 202}
 203
 204static struct origin *get_origin(struct scoreboard *sb,
 205                                 struct commit *commit,
 206                                 const char *path)
 207{
 208        struct blame_entry *e;
 209
 210        for (e = sb->ent; e; e = e->next) {
 211                if (e->suspect->commit == commit &&
 212                    !strcmp(e->suspect->path, path))
 213                        return origin_incref(e->suspect);
 214        }
 215        return make_origin(commit, path);
 216}
 217
 218static int fill_blob_sha1(struct origin *origin)
 219{
 220        unsigned mode;
 221        char type[10];
 222
 223        if (!is_null_sha1(origin->blob_sha1))
 224                return 0;
 225        if (get_tree_entry(origin->commit->object.sha1,
 226                           origin->path,
 227                           origin->blob_sha1, &mode))
 228                goto error_out;
 229        if (sha1_object_info(origin->blob_sha1, type, NULL) ||
 230            strcmp(type, blob_type))
 231                goto error_out;
 232        return 0;
 233 error_out:
 234        hashclr(origin->blob_sha1);
 235        return -1;
 236}
 237
 238static struct origin *find_origin(struct scoreboard *sb,
 239                                  struct commit *parent,
 240                                  struct origin *origin)
 241{
 242        struct origin *porigin = NULL;
 243        struct diff_options diff_opts;
 244        const char *paths[2];
 245
 246        if (parent->util) {
 247                /* This is a freestanding copy of origin and not
 248                 * refcounted.
 249                 */
 250                struct origin *cached = parent->util;
 251                if (!strcmp(cached->path, origin->path)) {
 252                        porigin = get_origin(sb, parent, cached->path);
 253                        if (porigin->refcnt == 1)
 254                                hashcpy(porigin->blob_sha1, cached->blob_sha1);
 255                        return porigin;
 256                }
 257                /* otherwise it was not very useful; free it */
 258                free(parent->util);
 259                parent->util = NULL;
 260        }
 261
 262        /* See if the origin->path is different between parent
 263         * and origin first.  Most of the time they are the
 264         * same and diff-tree is fairly efficient about this.
 265         */
 266        diff_setup(&diff_opts);
 267        diff_opts.recursive = 1;
 268        diff_opts.detect_rename = 0;
 269        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 270        paths[0] = origin->path;
 271        paths[1] = NULL;
 272
 273        diff_tree_setup_paths(paths, &diff_opts);
 274        if (diff_setup_done(&diff_opts) < 0)
 275                die("diff-setup");
 276        diff_tree_sha1(parent->tree->object.sha1,
 277                       origin->commit->tree->object.sha1,
 278                       "", &diff_opts);
 279        diffcore_std(&diff_opts);
 280
 281        /* It is either one entry that says "modified", or "created",
 282         * or nothing.
 283         */
 284        if (!diff_queued_diff.nr) {
 285                /* The path is the same as parent */
 286                porigin = get_origin(sb, parent, origin->path);
 287                hashcpy(porigin->blob_sha1, origin->blob_sha1);
 288        }
 289        else if (diff_queued_diff.nr != 1)
 290                die("internal error in blame::find_origin");
 291        else {
 292                struct diff_filepair *p = diff_queued_diff.queue[0];
 293                switch (p->status) {
 294                default:
 295                        die("internal error in blame::find_origin (%c)",
 296                            p->status);
 297                case 'M':
 298                        porigin = get_origin(sb, parent, origin->path);
 299                        hashcpy(porigin->blob_sha1, p->one->sha1);
 300                        break;
 301                case 'A':
 302                case 'T':
 303                        /* Did not exist in parent, or type changed */
 304                        break;
 305                }
 306        }
 307        diff_flush(&diff_opts);
 308        if (porigin) {
 309                struct origin *cached;
 310                cached = make_origin(porigin->commit, porigin->path);
 311                hashcpy(cached->blob_sha1, porigin->blob_sha1);
 312                parent->util = cached;
 313        }
 314        return porigin;
 315}
 316
 317static struct origin *find_rename(struct scoreboard *sb,
 318                                  struct commit *parent,
 319                                  struct origin *origin)
 320{
 321        struct origin *porigin = NULL;
 322        struct diff_options diff_opts;
 323        int i;
 324        const char *paths[2];
 325
 326        diff_setup(&diff_opts);
 327        diff_opts.recursive = 1;
 328        diff_opts.detect_rename = DIFF_DETECT_RENAME;
 329        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 330        diff_opts.single_follow = origin->path;
 331        paths[0] = NULL;
 332        diff_tree_setup_paths(paths, &diff_opts);
 333        if (diff_setup_done(&diff_opts) < 0)
 334                die("diff-setup");
 335        diff_tree_sha1(parent->tree->object.sha1,
 336                       origin->commit->tree->object.sha1,
 337                       "", &diff_opts);
 338        diffcore_std(&diff_opts);
 339
 340        for (i = 0; i < diff_queued_diff.nr; i++) {
 341                struct diff_filepair *p = diff_queued_diff.queue[i];
 342                if ((p->status == 'R' || p->status == 'C') &&
 343                    !strcmp(p->two->path, origin->path)) {
 344                        porigin = get_origin(sb, parent, p->one->path);
 345                        hashcpy(porigin->blob_sha1, p->one->sha1);
 346                        break;
 347                }
 348        }
 349        diff_flush(&diff_opts);
 350        return porigin;
 351}
 352
 353struct chunk {
 354        /* line number in postimage; up to but not including this
 355         * line is the same as preimage
 356         */
 357        int same;
 358
 359        /* preimage line number after this chunk */
 360        int p_next;
 361
 362        /* postimage line number after this chunk */
 363        int t_next;
 364};
 365
 366struct patch {
 367        struct chunk *chunks;
 368        int num;
 369};
 370
 371struct blame_diff_state {
 372        struct xdiff_emit_state xm;
 373        struct patch *ret;
 374        unsigned hunk_post_context;
 375        unsigned hunk_in_pre_context : 1;
 376};
 377
 378static void process_u_diff(void *state_, char *line, unsigned long len)
 379{
 380        struct blame_diff_state *state = state_;
 381        struct chunk *chunk;
 382        int off1, off2, len1, len2, num;
 383
 384        num = state->ret->num;
 385        if (len < 4 || line[0] != '@' || line[1] != '@') {
 386                if (state->hunk_in_pre_context && line[0] == ' ')
 387                        state->ret->chunks[num - 1].same++;
 388                else {
 389                        state->hunk_in_pre_context = 0;
 390                        if (line[0] == ' ')
 391                                state->hunk_post_context++;
 392                        else
 393                                state->hunk_post_context = 0;
 394                }
 395                return;
 396        }
 397
 398        if (num && state->hunk_post_context) {
 399                chunk = &state->ret->chunks[num - 1];
 400                chunk->p_next -= state->hunk_post_context;
 401                chunk->t_next -= state->hunk_post_context;
 402        }
 403        state->ret->num = ++num;
 404        state->ret->chunks = xrealloc(state->ret->chunks,
 405                                      sizeof(struct chunk) * num);
 406        chunk = &state->ret->chunks[num - 1];
 407        if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
 408                state->ret->num--;
 409                return;
 410        }
 411
 412        /* Line numbers in patch output are one based. */
 413        off1--;
 414        off2--;
 415
 416        chunk->same = len2 ? off2 : (off2 + 1);
 417
 418        chunk->p_next = off1 + (len1 ? len1 : 1);
 419        chunk->t_next = chunk->same + len2;
 420        state->hunk_in_pre_context = 1;
 421        state->hunk_post_context = 0;
 422}
 423
 424static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
 425                                    int context)
 426{
 427        struct blame_diff_state state;
 428        xpparam_t xpp;
 429        xdemitconf_t xecfg;
 430        xdemitcb_t ecb;
 431
 432        xpp.flags = XDF_NEED_MINIMAL;
 433        xecfg.ctxlen = context;
 434        xecfg.flags = 0;
 435        ecb.outf = xdiff_outf;
 436        ecb.priv = &state;
 437        memset(&state, 0, sizeof(state));
 438        state.xm.consume = process_u_diff;
 439        state.ret = xmalloc(sizeof(struct patch));
 440        state.ret->chunks = NULL;
 441        state.ret->num = 0;
 442
 443        xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
 444
 445        if (state.ret->num) {
 446                struct chunk *chunk;
 447                chunk = &state.ret->chunks[state.ret->num - 1];
 448                chunk->p_next -= state.hunk_post_context;
 449                chunk->t_next -= state.hunk_post_context;
 450        }
 451        return state.ret;
 452}
 453
 454static struct patch *get_patch(struct origin *parent, struct origin *origin)
 455{
 456        mmfile_t file_p, file_o;
 457        struct patch *patch;
 458
 459        fill_origin_blob(parent, &file_p);
 460        fill_origin_blob(origin, &file_o);
 461        if (!file_p.ptr || !file_o.ptr)
 462                return NULL;
 463        patch = compare_buffer(&file_p, &file_o, 0);
 464        num_get_patch++;
 465        return patch;
 466}
 467
 468static void free_patch(struct patch *p)
 469{
 470        free(p->chunks);
 471        free(p);
 472}
 473
 474static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
 475{
 476        struct blame_entry *ent, *prev = NULL;
 477
 478        origin_incref(e->suspect);
 479
 480        for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
 481                prev = ent;
 482
 483        /* prev, if not NULL, is the last one that is below e */
 484        e->prev = prev;
 485        if (prev) {
 486                e->next = prev->next;
 487                prev->next = e;
 488        }
 489        else {
 490                e->next = sb->ent;
 491                sb->ent = e;
 492        }
 493        if (e->next)
 494                e->next->prev = e;
 495}
 496
 497static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
 498{
 499        struct blame_entry *p, *n;
 500
 501        p = dst->prev;
 502        n = dst->next;
 503        origin_incref(src->suspect);
 504        origin_decref(dst->suspect);
 505        memcpy(dst, src, sizeof(*src));
 506        dst->prev = p;
 507        dst->next = n;
 508        dst->score = 0;
 509}
 510
 511static const char *nth_line(struct scoreboard *sb, int lno)
 512{
 513        return sb->final_buf + sb->lineno[lno];
 514}
 515
 516static void split_overlap(struct blame_entry *split,
 517                          struct blame_entry *e,
 518                          int tlno, int plno, int same,
 519                          struct origin *parent)
 520{
 521        /* it is known that lines between tlno to same came from
 522         * parent, and e has an overlap with that range.  it also is
 523         * known that parent's line plno corresponds to e's line tlno.
 524         *
 525         *                <---- e ----->
 526         *                   <------>
 527         *                   <------------>
 528         *             <------------>
 529         *             <------------------>
 530         *
 531         * Potentially we need to split e into three parts; before
 532         * this chunk, the chunk to be blamed for parent, and after
 533         * that portion.
 534         */
 535        int chunk_end_lno;
 536        memset(split, 0, sizeof(struct blame_entry [3]));
 537
 538        if (e->s_lno < tlno) {
 539                /* there is a pre-chunk part not blamed on parent */
 540                split[0].suspect = origin_incref(e->suspect);
 541                split[0].lno = e->lno;
 542                split[0].s_lno = e->s_lno;
 543                split[0].num_lines = tlno - e->s_lno;
 544                split[1].lno = e->lno + tlno - e->s_lno;
 545                split[1].s_lno = plno;
 546        }
 547        else {
 548                split[1].lno = e->lno;
 549                split[1].s_lno = plno + (e->s_lno - tlno);
 550        }
 551
 552        if (same < e->s_lno + e->num_lines) {
 553                /* there is a post-chunk part not blamed on parent */
 554                split[2].suspect = origin_incref(e->suspect);
 555                split[2].lno = e->lno + (same - e->s_lno);
 556                split[2].s_lno = e->s_lno + (same - e->s_lno);
 557                split[2].num_lines = e->s_lno + e->num_lines - same;
 558                chunk_end_lno = split[2].lno;
 559        }
 560        else
 561                chunk_end_lno = e->lno + e->num_lines;
 562        split[1].num_lines = chunk_end_lno - split[1].lno;
 563
 564        if (split[1].num_lines < 1)
 565                return;
 566        split[1].suspect = origin_incref(parent);
 567}
 568
 569static void split_blame(struct scoreboard *sb,
 570                        struct blame_entry *split,
 571                        struct blame_entry *e)
 572{
 573        struct blame_entry *new_entry;
 574
 575        if (split[0].suspect && split[2].suspect) {
 576                /* we need to split e into two and add another for parent */
 577                dup_entry(e, &split[0]);
 578
 579                new_entry = xmalloc(sizeof(*new_entry));
 580                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 581                add_blame_entry(sb, new_entry);
 582
 583                new_entry = xmalloc(sizeof(*new_entry));
 584                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 585                add_blame_entry(sb, new_entry);
 586        }
 587        else if (!split[0].suspect && !split[2].suspect)
 588                /* parent covers the entire area */
 589                dup_entry(e, &split[1]);
 590        else if (split[0].suspect) {
 591                dup_entry(e, &split[0]);
 592
 593                new_entry = xmalloc(sizeof(*new_entry));
 594                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 595                add_blame_entry(sb, new_entry);
 596        }
 597        else {
 598                dup_entry(e, &split[1]);
 599
 600                new_entry = xmalloc(sizeof(*new_entry));
 601                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 602                add_blame_entry(sb, new_entry);
 603        }
 604
 605        if (DEBUG) { /* sanity */
 606                struct blame_entry *ent;
 607                int lno = sb->ent->lno, corrupt = 0;
 608
 609                for (ent = sb->ent; ent; ent = ent->next) {
 610                        if (lno != ent->lno)
 611                                corrupt = 1;
 612                        if (ent->s_lno < 0)
 613                                corrupt = 1;
 614                        lno += ent->num_lines;
 615                }
 616                if (corrupt) {
 617                        lno = sb->ent->lno;
 618                        for (ent = sb->ent; ent; ent = ent->next) {
 619                                printf("L %8d l %8d n %8d\n",
 620                                       lno, ent->lno, ent->num_lines);
 621                                lno = ent->lno + ent->num_lines;
 622                        }
 623                        die("oops");
 624                }
 625        }
 626}
 627
 628static void decref_split(struct blame_entry *split)
 629{
 630        int i;
 631
 632        for (i = 0; i < 3; i++)
 633                origin_decref(split[i].suspect);
 634}
 635
 636static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
 637                          int tlno, int plno, int same,
 638                          struct origin *parent)
 639{
 640        struct blame_entry split[3];
 641
 642        split_overlap(split, e, tlno, plno, same, parent);
 643        if (split[1].suspect)
 644                split_blame(sb, split, e);
 645        decref_split(split);
 646}
 647
 648static int find_last_in_target(struct scoreboard *sb, struct origin *target)
 649{
 650        struct blame_entry *e;
 651        int last_in_target = -1;
 652
 653        for (e = sb->ent; e; e = e->next) {
 654                if (e->guilty || cmp_suspect(e->suspect, target))
 655                        continue;
 656                if (last_in_target < e->s_lno + e->num_lines)
 657                        last_in_target = e->s_lno + e->num_lines;
 658        }
 659        return last_in_target;
 660}
 661
 662static void blame_chunk(struct scoreboard *sb,
 663                        int tlno, int plno, int same,
 664                        struct origin *target, struct origin *parent)
 665{
 666        struct blame_entry *e;
 667
 668        for (e = sb->ent; e; e = e->next) {
 669                if (e->guilty || cmp_suspect(e->suspect, target))
 670                        continue;
 671                if (same <= e->s_lno)
 672                        continue;
 673                if (tlno < e->s_lno + e->num_lines)
 674                        blame_overlap(sb, e, tlno, plno, same, parent);
 675        }
 676}
 677
 678static int pass_blame_to_parent(struct scoreboard *sb,
 679                                struct origin *target,
 680                                struct origin *parent)
 681{
 682        int i, last_in_target, plno, tlno;
 683        struct patch *patch;
 684
 685        last_in_target = find_last_in_target(sb, target);
 686        if (last_in_target < 0)
 687                return 1; /* nothing remains for this target */
 688
 689        patch = get_patch(parent, target);
 690        plno = tlno = 0;
 691        for (i = 0; i < patch->num; i++) {
 692                struct chunk *chunk = &patch->chunks[i];
 693
 694                blame_chunk(sb, tlno, plno, chunk->same, target, parent);
 695                plno = chunk->p_next;
 696                tlno = chunk->t_next;
 697        }
 698        /* rest (i.e. anything above tlno) are the same as parent */
 699        blame_chunk(sb, tlno, plno, last_in_target, target, parent);
 700
 701        free_patch(patch);
 702        return 0;
 703}
 704
 705static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
 706{
 707        unsigned score;
 708        const char *cp, *ep;
 709
 710        if (e->score)
 711                return e->score;
 712
 713        score = 1;
 714        cp = nth_line(sb, e->lno);
 715        ep = nth_line(sb, e->lno + e->num_lines);
 716        while (cp < ep) {
 717                unsigned ch = *((unsigned char *)cp);
 718                if (isalnum(ch))
 719                        score++;
 720                cp++;
 721        }
 722        e->score = score;
 723        return score;
 724}
 725
 726static void copy_split_if_better(struct scoreboard *sb,
 727                                 struct blame_entry *best_so_far,
 728                                 struct blame_entry *this)
 729{
 730        int i;
 731
 732        if (!this[1].suspect)
 733                return;
 734        if (best_so_far[1].suspect) {
 735                if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
 736                        return;
 737        }
 738
 739        for (i = 0; i < 3; i++)
 740                origin_incref(this[i].suspect);
 741        decref_split(best_so_far);
 742        memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
 743}
 744
 745static void find_copy_in_blob(struct scoreboard *sb,
 746                              struct blame_entry *ent,
 747                              struct origin *parent,
 748                              struct blame_entry *split,
 749                              mmfile_t *file_p)
 750{
 751        const char *cp;
 752        int cnt;
 753        mmfile_t file_o;
 754        struct patch *patch;
 755        int i, plno, tlno;
 756
 757        cp = nth_line(sb, ent->lno);
 758        file_o.ptr = (char*) cp;
 759        cnt = ent->num_lines;
 760
 761        while (cnt && cp < sb->final_buf + sb->final_buf_size) {
 762                if (*cp++ == '\n')
 763                        cnt--;
 764        }
 765        file_o.size = cp - file_o.ptr;
 766
 767        patch = compare_buffer(file_p, &file_o, 1);
 768
 769        memset(split, 0, sizeof(struct blame_entry [3]));
 770        plno = tlno = 0;
 771        for (i = 0; i < patch->num; i++) {
 772                struct chunk *chunk = &patch->chunks[i];
 773
 774                /* tlno to chunk->same are the same as ent */
 775                if (ent->num_lines <= tlno)
 776                        break;
 777                if (tlno < chunk->same) {
 778                        struct blame_entry this[3];
 779                        split_overlap(this, ent,
 780                                      tlno + ent->s_lno, plno,
 781                                      chunk->same + ent->s_lno,
 782                                      parent);
 783                        copy_split_if_better(sb, split, this);
 784                        decref_split(this);
 785                }
 786                plno = chunk->p_next;
 787                tlno = chunk->t_next;
 788        }
 789        free_patch(patch);
 790}
 791
 792static int find_move_in_parent(struct scoreboard *sb,
 793                               struct origin *target,
 794                               struct origin *parent)
 795{
 796        int last_in_target, made_progress;
 797        struct blame_entry *e, split[3];
 798        mmfile_t file_p;
 799
 800        last_in_target = find_last_in_target(sb, target);
 801        if (last_in_target < 0)
 802                return 1; /* nothing remains for this target */
 803
 804        fill_origin_blob(parent, &file_p);
 805        if (!file_p.ptr)
 806                return 0;
 807
 808        made_progress = 1;
 809        while (made_progress) {
 810                made_progress = 0;
 811                for (e = sb->ent; e; e = e->next) {
 812                        if (e->guilty || cmp_suspect(e->suspect, target))
 813                                continue;
 814                        find_copy_in_blob(sb, e, parent, split, &file_p);
 815                        if (split[1].suspect &&
 816                            blame_move_score < ent_score(sb, &split[1])) {
 817                                split_blame(sb, split, e);
 818                                made_progress = 1;
 819                        }
 820                        decref_split(split);
 821                }
 822        }
 823        return 0;
 824}
 825
 826
 827struct blame_list {
 828        struct blame_entry *ent;
 829        struct blame_entry split[3];
 830};
 831
 832static struct blame_list *setup_blame_list(struct scoreboard *sb,
 833                                           struct origin *target,
 834                                           int *num_ents_p)
 835{
 836        struct blame_entry *e;
 837        int num_ents, i;
 838        struct blame_list *blame_list = NULL;
 839
 840        /* Count the number of entries the target is suspected for,
 841         * and prepare a list of entry and the best split.
 842         */
 843        for (e = sb->ent, num_ents = 0; e; e = e->next)
 844                if (!e->guilty && !cmp_suspect(e->suspect, target))
 845                        num_ents++;
 846        if (num_ents) {
 847                blame_list = xcalloc(num_ents, sizeof(struct blame_list));
 848                for (e = sb->ent, i = 0; e; e = e->next)
 849                        if (!e->guilty && !cmp_suspect(e->suspect, target))
 850                                blame_list[i++].ent = e;
 851        }
 852        *num_ents_p = num_ents;
 853        return blame_list;
 854}
 855
 856static int find_copy_in_parent(struct scoreboard *sb,
 857                               struct origin *target,
 858                               struct commit *parent,
 859                               struct origin *porigin,
 860                               int opt)
 861{
 862        struct diff_options diff_opts;
 863        const char *paths[1];
 864        int i, j;
 865        int retval;
 866        struct blame_list *blame_list;
 867        int num_ents;
 868
 869        blame_list = setup_blame_list(sb, target, &num_ents);
 870        if (!blame_list)
 871                return 1; /* nothing remains for this target */
 872
 873        diff_setup(&diff_opts);
 874        diff_opts.recursive = 1;
 875        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 876
 877        paths[0] = NULL;
 878        diff_tree_setup_paths(paths, &diff_opts);
 879        if (diff_setup_done(&diff_opts) < 0)
 880                die("diff-setup");
 881
 882        /* Try "find copies harder" on new path if requested;
 883         * we do not want to use diffcore_rename() actually to
 884         * match things up; find_copies_harder is set only to
 885         * force diff_tree_sha1() to feed all filepairs to diff_queue,
 886         * and this code needs to be after diff_setup_done(), which
 887         * usually makes find-copies-harder imply copy detection.
 888         */
 889        if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
 890            (!porigin || strcmp(target->path, porigin->path)))
 891                diff_opts.find_copies_harder = 1;
 892
 893        diff_tree_sha1(parent->tree->object.sha1,
 894                       target->commit->tree->object.sha1,
 895                       "", &diff_opts);
 896
 897        if (!diff_opts.find_copies_harder)
 898                diffcore_std(&diff_opts);
 899
 900        retval = 0;
 901        while (1) {
 902                int made_progress = 0;
 903
 904                for (i = 0; i < diff_queued_diff.nr; i++) {
 905                        struct diff_filepair *p = diff_queued_diff.queue[i];
 906                        struct origin *norigin;
 907                        mmfile_t file_p;
 908                        struct blame_entry this[3];
 909
 910                        if (!DIFF_FILE_VALID(p->one))
 911                                continue; /* does not exist in parent */
 912                        if (porigin && !strcmp(p->one->path, porigin->path))
 913                                /* find_move already dealt with this path */
 914                                continue;
 915
 916                        norigin = get_origin(sb, parent, p->one->path);
 917                        hashcpy(norigin->blob_sha1, p->one->sha1);
 918                        fill_origin_blob(norigin, &file_p);
 919                        if (!file_p.ptr)
 920                                continue;
 921
 922                        for (j = 0; j < num_ents; j++) {
 923                                find_copy_in_blob(sb, blame_list[j].ent,
 924                                                  norigin, this, &file_p);
 925                                copy_split_if_better(sb, blame_list[j].split,
 926                                                     this);
 927                                decref_split(this);
 928                        }
 929                        origin_decref(norigin);
 930                }
 931
 932                for (j = 0; j < num_ents; j++) {
 933                        struct blame_entry *split = blame_list[j].split;
 934                        if (split[1].suspect &&
 935                            blame_copy_score < ent_score(sb, &split[1])) {
 936                                split_blame(sb, split, blame_list[j].ent);
 937                                made_progress = 1;
 938                        }
 939                        decref_split(split);
 940                }
 941                free(blame_list);
 942
 943                if (!made_progress)
 944                        break;
 945                blame_list = setup_blame_list(sb, target, &num_ents);
 946                if (!blame_list) {
 947                        retval = 1;
 948                        break;
 949                }
 950        }
 951        diff_flush(&diff_opts);
 952
 953        return retval;
 954}
 955
 956/* The blobs of origin and porigin exactly match, so everything
 957 * origin is suspected for can be blamed on the parent.
 958 */
 959static void pass_whole_blame(struct scoreboard *sb,
 960                             struct origin *origin, struct origin *porigin)
 961{
 962        struct blame_entry *e;
 963
 964        if (!porigin->file.ptr && origin->file.ptr) {
 965                /* Steal its file */
 966                porigin->file = origin->file;
 967                origin->file.ptr = NULL;
 968        }
 969        for (e = sb->ent; e; e = e->next) {
 970                if (cmp_suspect(e->suspect, origin))
 971                        continue;
 972                origin_incref(porigin);
 973                origin_decref(e->suspect);
 974                e->suspect = porigin;
 975        }
 976}
 977
 978#define MAXPARENT 16
 979
 980static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
 981{
 982        int i, pass;
 983        struct commit *commit = origin->commit;
 984        struct commit_list *parent;
 985        struct origin *parent_origin[MAXPARENT], *porigin;
 986
 987        memset(parent_origin, 0, sizeof(parent_origin));
 988
 989        /* The first pass looks for unrenamed path to optimize for
 990         * common cases, then we look for renames in the second pass.
 991         */
 992        for (pass = 0; pass < 2; pass++) {
 993                struct origin *(*find)(struct scoreboard *,
 994                                       struct commit *, struct origin *);
 995                find = pass ? find_rename : find_origin;
 996
 997                for (i = 0, parent = commit->parents;
 998                     i < MAXPARENT && parent;
 999                     parent = parent->next, i++) {
1000                        struct commit *p = parent->item;
1001                        int j, same;
1002
1003                        if (parent_origin[i])
1004                                continue;
1005                        if (parse_commit(p))
1006                                continue;
1007                        porigin = find(sb, p, origin);
1008                        if (!porigin)
1009                                continue;
1010                        if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1011                                pass_whole_blame(sb, origin, porigin);
1012                                origin_decref(porigin);
1013                                goto finish;
1014                        }
1015                        for (j = same = 0; j < i; j++)
1016                                if (parent_origin[j] &&
1017                                    !hashcmp(parent_origin[j]->blob_sha1,
1018                                             porigin->blob_sha1)) {
1019                                        same = 1;
1020                                        break;
1021                                }
1022                        if (!same)
1023                                parent_origin[i] = porigin;
1024                        else
1025                                origin_decref(porigin);
1026                }
1027        }
1028
1029        num_commits++;
1030        for (i = 0, parent = commit->parents;
1031             i < MAXPARENT && parent;
1032             parent = parent->next, i++) {
1033                struct origin *porigin = parent_origin[i];
1034                if (!porigin)
1035                        continue;
1036                if (pass_blame_to_parent(sb, origin, porigin))
1037                        goto finish;
1038        }
1039
1040        /*
1041         * Optionally run "miff" to find moves in parents' files here.
1042         */
1043        if (opt & PICKAXE_BLAME_MOVE)
1044                for (i = 0, parent = commit->parents;
1045                     i < MAXPARENT && parent;
1046                     parent = parent->next, i++) {
1047                        struct origin *porigin = parent_origin[i];
1048                        if (!porigin)
1049                                continue;
1050                        if (find_move_in_parent(sb, origin, porigin))
1051                                goto finish;
1052                }
1053
1054        /*
1055         * Optionally run "ciff" to find copies from parents' files here.
1056         */
1057        if (opt & PICKAXE_BLAME_COPY)
1058                for (i = 0, parent = commit->parents;
1059                     i < MAXPARENT && parent;
1060                     parent = parent->next, i++) {
1061                        struct origin *porigin = parent_origin[i];
1062                        if (find_copy_in_parent(sb, origin, parent->item,
1063                                                porigin, opt))
1064                                goto finish;
1065                }
1066
1067 finish:
1068        for (i = 0; i < MAXPARENT; i++)
1069                origin_decref(parent_origin[i]);
1070}
1071
1072static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1073{
1074        while (1) {
1075                struct blame_entry *ent;
1076                struct commit *commit;
1077                struct origin *suspect = NULL;
1078
1079                /* find one suspect to break down */
1080                for (ent = sb->ent; !suspect && ent; ent = ent->next)
1081                        if (!ent->guilty)
1082                                suspect = ent->suspect;
1083                if (!suspect)
1084                        return; /* all done */
1085
1086                origin_incref(suspect);
1087                commit = suspect->commit;
1088                if (!commit->object.parsed)
1089                        parse_commit(commit);
1090                if (!(commit->object.flags & UNINTERESTING) &&
1091                    !(revs->max_age != -1 && commit->date  < revs->max_age))
1092                        pass_blame(sb, suspect, opt);
1093                else {
1094                        commit->object.flags |= UNINTERESTING;
1095                        if (commit->object.parsed)
1096                                mark_parents_uninteresting(commit);
1097                }
1098                /* treat root commit as boundary */
1099                if (!commit->parents && !show_root)
1100                        commit->object.flags |= UNINTERESTING;
1101
1102                /* Take responsibility for the remaining entries */
1103                for (ent = sb->ent; ent; ent = ent->next)
1104                        if (!cmp_suspect(ent->suspect, suspect))
1105                                ent->guilty = 1;
1106                origin_decref(suspect);
1107
1108                if (DEBUG) /* sanity */
1109                        sanity_check_refcnt(sb);
1110        }
1111}
1112
1113static const char *format_time(unsigned long time, const char *tz_str,
1114                               int show_raw_time)
1115{
1116        static char time_buf[128];
1117        time_t t = time;
1118        int minutes, tz;
1119        struct tm *tm;
1120
1121        if (show_raw_time) {
1122                sprintf(time_buf, "%lu %s", time, tz_str);
1123                return time_buf;
1124        }
1125
1126        tz = atoi(tz_str);
1127        minutes = tz < 0 ? -tz : tz;
1128        minutes = (minutes / 100)*60 + (minutes % 100);
1129        minutes = tz < 0 ? -minutes : minutes;
1130        t = time + minutes * 60;
1131        tm = gmtime(&t);
1132
1133        strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1134        strcat(time_buf, tz_str);
1135        return time_buf;
1136}
1137
1138struct commit_info
1139{
1140        char *author;
1141        char *author_mail;
1142        unsigned long author_time;
1143        char *author_tz;
1144
1145        /* filled only when asked for details */
1146        char *committer;
1147        char *committer_mail;
1148        unsigned long committer_time;
1149        char *committer_tz;
1150
1151        char *summary;
1152};
1153
1154static void get_ac_line(const char *inbuf, const char *what,
1155                        int bufsz, char *person, char **mail,
1156                        unsigned long *time, char **tz)
1157{
1158        int len;
1159        char *tmp, *endp;
1160
1161        tmp = strstr(inbuf, what);
1162        if (!tmp)
1163                goto error_out;
1164        tmp += strlen(what);
1165        endp = strchr(tmp, '\n');
1166        if (!endp)
1167                len = strlen(tmp);
1168        else
1169                len = endp - tmp;
1170        if (bufsz <= len) {
1171        error_out:
1172                /* Ugh */
1173                person = *mail = *tz = "(unknown)";
1174                *time = 0;
1175                return;
1176        }
1177        memcpy(person, tmp, len);
1178
1179        tmp = person;
1180        tmp += len;
1181        *tmp = 0;
1182        while (*tmp != ' ')
1183                tmp--;
1184        *tz = tmp+1;
1185
1186        *tmp = 0;
1187        while (*tmp != ' ')
1188                tmp--;
1189        *time = strtoul(tmp, NULL, 10);
1190
1191        *tmp = 0;
1192        while (*tmp != ' ')
1193                tmp--;
1194        *mail = tmp + 1;
1195        *tmp = 0;
1196}
1197
1198static void get_commit_info(struct commit *commit,
1199                            struct commit_info *ret,
1200                            int detailed)
1201{
1202        int len;
1203        char *tmp, *endp;
1204        static char author_buf[1024];
1205        static char committer_buf[1024];
1206        static char summary_buf[1024];
1207
1208        /* We've operated without save_commit_buffer, so
1209         * we now need to populate them for output.
1210         */
1211        if (!commit->buffer) {
1212                char type[20];
1213                unsigned long size;
1214                commit->buffer =
1215                        read_sha1_file(commit->object.sha1, type, &size);
1216        }
1217        ret->author = author_buf;
1218        get_ac_line(commit->buffer, "\nauthor ",
1219                    sizeof(author_buf), author_buf, &ret->author_mail,
1220                    &ret->author_time, &ret->author_tz);
1221
1222        if (!detailed)
1223                return;
1224
1225        ret->committer = committer_buf;
1226        get_ac_line(commit->buffer, "\ncommitter ",
1227                    sizeof(committer_buf), committer_buf, &ret->committer_mail,
1228                    &ret->committer_time, &ret->committer_tz);
1229
1230        ret->summary = summary_buf;
1231        tmp = strstr(commit->buffer, "\n\n");
1232        if (!tmp) {
1233        error_out:
1234                sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1235                return;
1236        }
1237        tmp += 2;
1238        endp = strchr(tmp, '\n');
1239        if (!endp)
1240                goto error_out;
1241        len = endp - tmp;
1242        if (len >= sizeof(summary_buf))
1243                goto error_out;
1244        memcpy(summary_buf, tmp, len);
1245        summary_buf[len] = 0;
1246}
1247
1248#define OUTPUT_ANNOTATE_COMPAT  001
1249#define OUTPUT_LONG_OBJECT_NAME 002
1250#define OUTPUT_RAW_TIMESTAMP    004
1251#define OUTPUT_PORCELAIN        010
1252#define OUTPUT_SHOW_NAME        020
1253#define OUTPUT_SHOW_NUMBER      040
1254#define OUTPUT_SHOW_SCORE      0100
1255
1256static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1257{
1258        int cnt;
1259        const char *cp;
1260        struct origin *suspect = ent->suspect;
1261        char hex[41];
1262
1263        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1264        printf("%s%c%d %d %d\n",
1265               hex,
1266               ent->guilty ? ' ' : '*', // purely for debugging
1267               ent->s_lno + 1,
1268               ent->lno + 1,
1269               ent->num_lines);
1270        if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1271                struct commit_info ci;
1272                suspect->commit->object.flags |= METAINFO_SHOWN;
1273                get_commit_info(suspect->commit, &ci, 1);
1274                printf("author %s\n", ci.author);
1275                printf("author-mail %s\n", ci.author_mail);
1276                printf("author-time %lu\n", ci.author_time);
1277                printf("author-tz %s\n", ci.author_tz);
1278                printf("committer %s\n", ci.committer);
1279                printf("committer-mail %s\n", ci.committer_mail);
1280                printf("committer-time %lu\n", ci.committer_time);
1281                printf("committer-tz %s\n", ci.committer_tz);
1282                printf("filename %s\n", suspect->path);
1283                printf("summary %s\n", ci.summary);
1284                if (suspect->commit->object.flags & UNINTERESTING)
1285                        printf("boundary\n");
1286        }
1287        else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1288                printf("filename %s\n", suspect->path);
1289
1290        cp = nth_line(sb, ent->lno);
1291        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1292                char ch;
1293                if (cnt)
1294                        printf("%s %d %d\n", hex,
1295                               ent->s_lno + 1 + cnt,
1296                               ent->lno + 1 + cnt);
1297                putchar('\t');
1298                do {
1299                        ch = *cp++;
1300                        putchar(ch);
1301                } while (ch != '\n' &&
1302                         cp < sb->final_buf + sb->final_buf_size);
1303        }
1304}
1305
1306static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1307{
1308        int cnt;
1309        const char *cp;
1310        struct origin *suspect = ent->suspect;
1311        struct commit_info ci;
1312        char hex[41];
1313        int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1314
1315        get_commit_info(suspect->commit, &ci, 1);
1316        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1317
1318        cp = nth_line(sb, ent->lno);
1319        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1320                char ch;
1321                int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1322
1323                if (suspect->commit->object.flags & UNINTERESTING) {
1324                        if (!blank_boundary) {
1325                                length--;
1326                                putchar('^');
1327                        }
1328                        else
1329                                memset(hex, ' ', length);
1330                }
1331
1332                printf("%.*s", length, hex);
1333                if (opt & OUTPUT_ANNOTATE_COMPAT)
1334                        printf("\t(%10s\t%10s\t%d)", ci.author,
1335                               format_time(ci.author_time, ci.author_tz,
1336                                           show_raw_time),
1337                               ent->lno + 1 + cnt);
1338                else {
1339                        if (opt & OUTPUT_SHOW_SCORE)
1340                                printf(" %*d %02d",
1341                                       max_score_digits, ent->score,
1342                                       ent->suspect->refcnt);
1343                        if (opt & OUTPUT_SHOW_NAME)
1344                                printf(" %-*.*s", longest_file, longest_file,
1345                                       suspect->path);
1346                        if (opt & OUTPUT_SHOW_NUMBER)
1347                                printf(" %*d", max_orig_digits,
1348                                       ent->s_lno + 1 + cnt);
1349                        printf(" (%-*.*s %10s %*d) ",
1350                               longest_author, longest_author, ci.author,
1351                               format_time(ci.author_time, ci.author_tz,
1352                                           show_raw_time),
1353                               max_digits, ent->lno + 1 + cnt);
1354                }
1355                do {
1356                        ch = *cp++;
1357                        putchar(ch);
1358                } while (ch != '\n' &&
1359                         cp < sb->final_buf + sb->final_buf_size);
1360        }
1361}
1362
1363static void output(struct scoreboard *sb, int option)
1364{
1365        struct blame_entry *ent;
1366
1367        if (option & OUTPUT_PORCELAIN) {
1368                for (ent = sb->ent; ent; ent = ent->next) {
1369                        struct blame_entry *oth;
1370                        struct origin *suspect = ent->suspect;
1371                        struct commit *commit = suspect->commit;
1372                        if (commit->object.flags & MORE_THAN_ONE_PATH)
1373                                continue;
1374                        for (oth = ent->next; oth; oth = oth->next) {
1375                                if ((oth->suspect->commit != commit) ||
1376                                    !strcmp(oth->suspect->path, suspect->path))
1377                                        continue;
1378                                commit->object.flags |= MORE_THAN_ONE_PATH;
1379                                break;
1380                        }
1381                }
1382        }
1383
1384        for (ent = sb->ent; ent; ent = ent->next) {
1385                if (option & OUTPUT_PORCELAIN)
1386                        emit_porcelain(sb, ent);
1387                else {
1388                        emit_other(sb, ent, option);
1389                }
1390        }
1391}
1392
1393static int prepare_lines(struct scoreboard *sb)
1394{
1395        const char *buf = sb->final_buf;
1396        unsigned long len = sb->final_buf_size;
1397        int num = 0, incomplete = 0, bol = 1;
1398
1399        if (len && buf[len-1] != '\n')
1400                incomplete++; /* incomplete line at the end */
1401        while (len--) {
1402                if (bol) {
1403                        sb->lineno = xrealloc(sb->lineno,
1404                                              sizeof(int* ) * (num + 1));
1405                        sb->lineno[num] = buf - sb->final_buf;
1406                        bol = 0;
1407                }
1408                if (*buf++ == '\n') {
1409                        num++;
1410                        bol = 1;
1411                }
1412        }
1413        sb->lineno = xrealloc(sb->lineno,
1414                              sizeof(int* ) * (num + incomplete + 1));
1415        sb->lineno[num + incomplete] = buf - sb->final_buf;
1416        sb->num_lines = num + incomplete;
1417        return sb->num_lines;
1418}
1419
1420static int read_ancestry(const char *graft_file)
1421{
1422        FILE *fp = fopen(graft_file, "r");
1423        char buf[1024];
1424        if (!fp)
1425                return -1;
1426        while (fgets(buf, sizeof(buf), fp)) {
1427                /* The format is just "Commit Parent1 Parent2 ...\n" */
1428                int len = strlen(buf);
1429                struct commit_graft *graft = read_graft_line(buf, len);
1430                if (graft)
1431                        register_commit_graft(graft, 0);
1432        }
1433        fclose(fp);
1434        return 0;
1435}
1436
1437static int lineno_width(int lines)
1438{
1439        int i, width;
1440
1441        for (width = 1, i = 10; i <= lines + 1; width++)
1442                i *= 10;
1443        return width;
1444}
1445
1446static void find_alignment(struct scoreboard *sb, int *option)
1447{
1448        int longest_src_lines = 0;
1449        int longest_dst_lines = 0;
1450        unsigned largest_score = 0;
1451        struct blame_entry *e;
1452
1453        for (e = sb->ent; e; e = e->next) {
1454                struct origin *suspect = e->suspect;
1455                struct commit_info ci;
1456                int num;
1457
1458                if (strcmp(suspect->path, sb->path))
1459                        *option |= OUTPUT_SHOW_NAME;
1460                num = strlen(suspect->path);
1461                if (longest_file < num)
1462                        longest_file = num;
1463                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1464                        suspect->commit->object.flags |= METAINFO_SHOWN;
1465                        get_commit_info(suspect->commit, &ci, 1);
1466                        num = strlen(ci.author);
1467                        if (longest_author < num)
1468                                longest_author = num;
1469                }
1470                num = e->s_lno + e->num_lines;
1471                if (longest_src_lines < num)
1472                        longest_src_lines = num;
1473                num = e->lno + e->num_lines;
1474                if (longest_dst_lines < num)
1475                        longest_dst_lines = num;
1476                if (largest_score < ent_score(sb, e))
1477                        largest_score = ent_score(sb, e);
1478        }
1479        max_orig_digits = lineno_width(longest_src_lines);
1480        max_digits = lineno_width(longest_dst_lines);
1481        max_score_digits = lineno_width(largest_score);
1482}
1483
1484static void sanity_check_refcnt(struct scoreboard *sb)
1485{
1486        int baa = 0;
1487        struct blame_entry *ent;
1488
1489        for (ent = sb->ent; ent; ent = ent->next) {
1490                /* Nobody should have zero or negative refcnt */
1491                if (ent->suspect->refcnt <= 0) {
1492                        fprintf(stderr, "%s in %s has negative refcnt %d\n",
1493                                ent->suspect->path,
1494                                sha1_to_hex(ent->suspect->commit->object.sha1),
1495                                ent->suspect->refcnt);
1496                        baa = 1;
1497                }
1498        }
1499        for (ent = sb->ent; ent; ent = ent->next) {
1500                /* Mark the ones that haven't been checked */
1501                if (0 < ent->suspect->refcnt)
1502                        ent->suspect->refcnt = -ent->suspect->refcnt;
1503        }
1504        for (ent = sb->ent; ent; ent = ent->next) {
1505                /* then pick each and see if they have the the correct
1506                 * refcnt.
1507                 */
1508                int found;
1509                struct blame_entry *e;
1510                struct origin *suspect = ent->suspect;
1511
1512                if (0 < suspect->refcnt)
1513                        continue;
1514                suspect->refcnt = -suspect->refcnt; /* Unmark */
1515                for (found = 0, e = sb->ent; e; e = e->next) {
1516                        if (e->suspect != suspect)
1517                                continue;
1518                        found++;
1519                }
1520                if (suspect->refcnt != found) {
1521                        fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1522                                ent->suspect->path,
1523                                sha1_to_hex(ent->suspect->commit->object.sha1),
1524                                ent->suspect->refcnt, found);
1525                        baa = 2;
1526                }
1527        }
1528        if (baa) {
1529                int opt = 0160;
1530                find_alignment(sb, &opt);
1531                output(sb, opt);
1532                die("Baa %d!", baa);
1533        }
1534}
1535
1536static int has_path_in_work_tree(const char *path)
1537{
1538        struct stat st;
1539        return !lstat(path, &st);
1540}
1541
1542static unsigned parse_score(const char *arg)
1543{
1544        char *end;
1545        unsigned long score = strtoul(arg, &end, 10);
1546        if (*end)
1547                return 0;
1548        return score;
1549}
1550
1551static const char *add_prefix(const char *prefix, const char *path)
1552{
1553        if (!prefix || !prefix[0])
1554                return path;
1555        return prefix_path(prefix, strlen(prefix), path);
1556}
1557
1558static const char *parse_loc(const char *spec,
1559                             struct scoreboard *sb, long lno,
1560                             long begin, long *ret)
1561{
1562        char *term;
1563        const char *line;
1564        long num;
1565        int reg_error;
1566        regex_t regexp;
1567        regmatch_t match[1];
1568
1569        /* Allow "-L <something>,+20" to mean starting at <something>
1570         * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1571         * <something>.
1572         */
1573        if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1574                num = strtol(spec + 1, &term, 10);
1575                if (term != spec + 1) {
1576                        if (spec[0] == '-')
1577                                num = 0 - num;
1578                        if (0 < num)
1579                                *ret = begin + num - 2;
1580                        else if (!num)
1581                                *ret = begin;
1582                        else
1583                                *ret = begin + num;
1584                        return term;
1585                }
1586                return spec;
1587        }
1588        num = strtol(spec, &term, 10);
1589        if (term != spec) {
1590                *ret = num;
1591                return term;
1592        }
1593        if (spec[0] != '/')
1594                return spec;
1595
1596        /* it could be a regexp of form /.../ */
1597        for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1598                if (*term == '\\')
1599                        term++;
1600        }
1601        if (*term != '/')
1602                return spec;
1603
1604        /* try [spec+1 .. term-1] as regexp */
1605        *term = 0;
1606        begin--; /* input is in human terms */
1607        line = nth_line(sb, begin);
1608
1609        if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1610            !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1611                const char *cp = line + match[0].rm_so;
1612                const char *nline;
1613
1614                while (begin++ < lno) {
1615                        nline = nth_line(sb, begin);
1616                        if (line <= cp && cp < nline)
1617                                break;
1618                        line = nline;
1619                }
1620                *ret = begin;
1621                regfree(&regexp);
1622                *term++ = '/';
1623                return term;
1624        }
1625        else {
1626                char errbuf[1024];
1627                regerror(reg_error, &regexp, errbuf, 1024);
1628                die("-L parameter '%s': %s", spec + 1, errbuf);
1629        }
1630}
1631
1632static void prepare_blame_range(struct scoreboard *sb,
1633                                const char *bottomtop,
1634                                long lno,
1635                                long *bottom, long *top)
1636{
1637        const char *term;
1638
1639        term = parse_loc(bottomtop, sb, lno, 1, bottom);
1640        if (*term == ',') {
1641                term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1642                if (*term)
1643                        usage(blame_usage);
1644        }
1645        if (*term)
1646                usage(blame_usage);
1647}
1648
1649static int git_blame_config(const char *var, const char *value)
1650{
1651        if (!strcmp(var, "blame.showroot")) {
1652                show_root = git_config_bool(var, value);
1653                return 0;
1654        }
1655        if (!strcmp(var, "blame.blankboundary")) {
1656                blank_boundary = git_config_bool(var, value);
1657                return 0;
1658        }
1659        return git_default_config(var, value);
1660}
1661
1662int cmd_blame(int argc, const char **argv, const char *prefix)
1663{
1664        struct rev_info revs;
1665        const char *path;
1666        struct scoreboard sb;
1667        struct origin *o;
1668        struct blame_entry *ent;
1669        int i, seen_dashdash, unk, opt;
1670        long bottom, top, lno;
1671        int output_option = 0;
1672        const char *revs_file = NULL;
1673        const char *final_commit_name = NULL;
1674        char type[10];
1675        const char *bottomtop = NULL;
1676
1677        git_config(git_blame_config);
1678        save_commit_buffer = 0;
1679
1680        opt = 0;
1681        seen_dashdash = 0;
1682        for (unk = i = 1; i < argc; i++) {
1683                const char *arg = argv[i];
1684                if (*arg != '-')
1685                        break;
1686                else if (!strcmp("-b", arg))
1687                        blank_boundary = 1;
1688                else if (!strcmp("--root", arg))
1689                        show_root = 1;
1690                else if (!strcmp("-c", arg))
1691                        output_option |= OUTPUT_ANNOTATE_COMPAT;
1692                else if (!strcmp("-t", arg))
1693                        output_option |= OUTPUT_RAW_TIMESTAMP;
1694                else if (!strcmp("-l", arg))
1695                        output_option |= OUTPUT_LONG_OBJECT_NAME;
1696                else if (!strcmp("-S", arg) && ++i < argc)
1697                        revs_file = argv[i];
1698                else if (!strncmp("-M", arg, 2)) {
1699                        opt |= PICKAXE_BLAME_MOVE;
1700                        blame_move_score = parse_score(arg+2);
1701                }
1702                else if (!strncmp("-C", arg, 2)) {
1703                        if (opt & PICKAXE_BLAME_COPY)
1704                                opt |= PICKAXE_BLAME_COPY_HARDER;
1705                        opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1706                        blame_copy_score = parse_score(arg+2);
1707                }
1708                else if (!strncmp("-L", arg, 2)) {
1709                        if (!arg[2]) {
1710                                if (++i >= argc)
1711                                        usage(blame_usage);
1712                                arg = argv[i];
1713                        }
1714                        else
1715                                arg += 2;
1716                        if (bottomtop)
1717                                die("More than one '-L n,m' option given");
1718                        bottomtop = arg;
1719                }
1720                else if (!strcmp("--score-debug", arg))
1721                        output_option |= OUTPUT_SHOW_SCORE;
1722                else if (!strcmp("-f", arg) ||
1723                         !strcmp("--show-name", arg))
1724                        output_option |= OUTPUT_SHOW_NAME;
1725                else if (!strcmp("-n", arg) ||
1726                         !strcmp("--show-number", arg))
1727                        output_option |= OUTPUT_SHOW_NUMBER;
1728                else if (!strcmp("-p", arg) ||
1729                         !strcmp("--porcelain", arg))
1730                        output_option |= OUTPUT_PORCELAIN;
1731                else if (!strcmp("--", arg)) {
1732                        seen_dashdash = 1;
1733                        i++;
1734                        break;
1735                }
1736                else
1737                        argv[unk++] = arg;
1738        }
1739
1740        if (!blame_move_score)
1741                blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1742        if (!blame_copy_score)
1743                blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1744
1745        /* We have collected options unknown to us in argv[1..unk]
1746         * which are to be passed to revision machinery if we are
1747         * going to do the "bottom" procesing.
1748         *
1749         * The remaining are:
1750         *
1751         * (1) if seen_dashdash, its either
1752         *     "-options -- <path>" or
1753         *     "-options -- <path> <rev>".
1754         *     but the latter is allowed only if there is no
1755         *     options that we passed to revision machinery.
1756         *
1757         * (2) otherwise, we may have "--" somewhere later and
1758         *     might be looking at the first one of multiple 'rev'
1759         *     parameters (e.g. " master ^next ^maint -- path").
1760         *     See if there is a dashdash first, and give the
1761         *     arguments before that to revision machinery.
1762         *     After that there must be one 'path'.
1763         *
1764         * (3) otherwise, its one of the three:
1765         *     "-options <path> <rev>"
1766         *     "-options <rev> <path>"
1767         *     "-options <path>"
1768         *     but again the first one is allowed only if
1769         *     there is no options that we passed to revision
1770         *     machinery.
1771         */
1772
1773        if (seen_dashdash) {
1774                /* (1) */
1775                if (argc <= i)
1776                        usage(blame_usage);
1777                path = add_prefix(prefix, argv[i]);
1778                if (i + 1 == argc - 1) {
1779                        if (unk != 1)
1780                                usage(blame_usage);
1781                        argv[unk++] = argv[i + 1];
1782                }
1783                else if (i + 1 != argc)
1784                        /* garbage at end */
1785                        usage(blame_usage);
1786        }
1787        else {
1788                int j;
1789                for (j = i; !seen_dashdash && j < argc; j++)
1790                        if (!strcmp(argv[j], "--"))
1791                                seen_dashdash = j;
1792                if (seen_dashdash) {
1793                        if (seen_dashdash + 1 != argc - 1)
1794                                usage(blame_usage);
1795                        path = add_prefix(prefix, argv[seen_dashdash + 1]);
1796                        for (j = i; j < seen_dashdash; j++)
1797                                argv[unk++] = argv[j];
1798                }
1799                else {
1800                        /* (3) */
1801                        path = add_prefix(prefix, argv[i]);
1802                        if (i + 1 == argc - 1) {
1803                                final_commit_name = argv[i + 1];
1804
1805                                /* if (unk == 1) we could be getting
1806                                 * old-style
1807                                 */
1808                                if (unk == 1 && !has_path_in_work_tree(path)) {
1809                                        path = add_prefix(prefix, argv[i + 1]);
1810                                        final_commit_name = argv[i];
1811                                }
1812                        }
1813                        else if (i != argc - 1)
1814                                usage(blame_usage); /* garbage at end */
1815
1816                        if (!has_path_in_work_tree(path))
1817                                die("cannot stat path %s: %s",
1818                                    path, strerror(errno));
1819                }
1820        }
1821
1822        if (final_commit_name)
1823                argv[unk++] = final_commit_name;
1824
1825        /* Now we got rev and path.  We do not want the path pruning
1826         * but we may want "bottom" processing.
1827         */
1828        argv[unk++] = "--"; /* terminate the rev name */
1829        argv[unk] = NULL;
1830
1831        init_revisions(&revs, NULL);
1832        setup_revisions(unk, argv, &revs, "HEAD");
1833        memset(&sb, 0, sizeof(sb));
1834
1835        /* There must be one and only one positive commit in the
1836         * revs->pending array.
1837         */
1838        for (i = 0; i < revs.pending.nr; i++) {
1839                struct object *obj = revs.pending.objects[i].item;
1840                if (obj->flags & UNINTERESTING)
1841                        continue;
1842                while (obj->type == OBJ_TAG)
1843                        obj = deref_tag(obj, NULL, 0);
1844                if (obj->type != OBJ_COMMIT)
1845                        die("Non commit %s?",
1846                            revs.pending.objects[i].name);
1847                if (sb.final)
1848                        die("More than one commit to dig from %s and %s?",
1849                            revs.pending.objects[i].name,
1850                            final_commit_name);
1851                sb.final = (struct commit *) obj;
1852                final_commit_name = revs.pending.objects[i].name;
1853        }
1854
1855        if (!sb.final) {
1856                /* "--not A B -- path" without anything positive */
1857                unsigned char head_sha1[20];
1858
1859                final_commit_name = "HEAD";
1860                if (get_sha1(final_commit_name, head_sha1))
1861                        die("No such ref: HEAD");
1862                sb.final = lookup_commit_reference(head_sha1);
1863                add_pending_object(&revs, &(sb.final->object), "HEAD");
1864        }
1865
1866        /* If we have bottom, this will mark the ancestors of the
1867         * bottom commits we would reach while traversing as
1868         * uninteresting.
1869         */
1870        prepare_revision_walk(&revs);
1871
1872        o = get_origin(&sb, sb.final, path);
1873        if (fill_blob_sha1(o))
1874                die("no such path %s in %s", path, final_commit_name);
1875
1876        sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1877        num_read_blob++;
1878        lno = prepare_lines(&sb);
1879
1880        bottom = top = 0;
1881        if (bottomtop)
1882                prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
1883        if (bottom && top && top < bottom) {
1884                long tmp;
1885                tmp = top; top = bottom; bottom = tmp;
1886        }
1887        if (bottom < 1)
1888                bottom = 1;
1889        if (top < 1)
1890                top = lno;
1891        bottom--;
1892        if (lno < top)
1893                die("file %s has only %lu lines", path, lno);
1894
1895        ent = xcalloc(1, sizeof(*ent));
1896        ent->lno = bottom;
1897        ent->num_lines = top - bottom;
1898        ent->suspect = o;
1899        ent->s_lno = bottom;
1900
1901        sb.ent = ent;
1902        sb.path = path;
1903
1904        if (revs_file && read_ancestry(revs_file))
1905                die("reading graft file %s failed: %s",
1906                    revs_file, strerror(errno));
1907
1908        assign_blame(&sb, &revs, opt);
1909
1910        coalesce(&sb);
1911
1912        if (!(output_option & OUTPUT_PORCELAIN))
1913                find_alignment(&sb, &output_option);
1914
1915        output(&sb, output_option);
1916        free((void *)sb.final_buf);
1917        for (ent = sb.ent; ent; ) {
1918                struct blame_entry *e = ent->next;
1919                free(ent);
1920                ent = e;
1921        }
1922
1923        if (DEBUG) {
1924                printf("num read blob: %d\n", num_read_blob);
1925                printf("num get patch: %d\n", num_get_patch);
1926                printf("num commits: %d\n", num_commits);
1927        }
1928        return 0;
1929}