332e6a2e3c9ba3a91e3e1969be49907c588b0444
   1/*
   2 * Pickaxe
   3 *
   4 * Copyright (c) 2006, Junio C Hamano
   5 */
   6
   7#include "cache.h"
   8#include "builtin.h"
   9#include "blob.h"
  10#include "commit.h"
  11#include "tag.h"
  12#include "tree-walk.h"
  13#include "diff.h"
  14#include "diffcore.h"
  15#include "revision.h"
  16#include "xdiff-interface.h"
  17
  18#include <time.h>
  19#include <sys/time.h>
  20
  21static char pickaxe_usage[] =
  22"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
  23"  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
  24"  -l, --long          Show long commit SHA1 (Default: off)\n"
  25"  -t, --time          Show raw timestamp (Default: off)\n"
  26"  -f, --show-name     Show original filename (Default: auto)\n"
  27"  -n, --show-number   Show original linenumber (Default: off)\n"
  28"  -p, --porcelain     Show in a format designed for machine consumption\n"
  29"  -L n,m              Process only line range n,m, counting from 1\n"
  30"  -M, -C              Find line movements within and across files\n"
  31"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
  32
  33static int longest_file;
  34static int longest_author;
  35static int max_orig_digits;
  36static int max_digits;
  37static int max_score_digits;
  38
  39#ifndef DEBUG
  40#define DEBUG 0
  41#endif
  42
  43#define PICKAXE_BLAME_MOVE              01
  44#define PICKAXE_BLAME_COPY              02
  45#define PICKAXE_BLAME_COPY_HARDER       04
  46
  47/*
  48 * blame for a blame_entry with score lower than these thresholds
  49 * is not passed to the parent using move/copy logic.
  50 */
  51static unsigned blame_move_score;
  52static unsigned blame_copy_score;
  53#define BLAME_DEFAULT_MOVE_SCORE        20
  54#define BLAME_DEFAULT_COPY_SCORE        40
  55
  56/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
  57#define METAINFO_SHOWN          (1u<<12)
  58#define MORE_THAN_ONE_PATH      (1u<<13)
  59
  60/*
  61 * One blob in a commit that is being suspected
  62 */
  63struct origin {
  64        int refcnt;
  65        struct commit *commit;
  66        unsigned char blob_sha1[20];
  67        char path[FLEX_ARRAY];
  68};
  69
  70static inline struct origin *origin_incref(struct origin *o)
  71{
  72        if (o)
  73                o->refcnt++;
  74        return o;
  75}
  76
  77static void origin_decref(struct origin *o)
  78{
  79        if (o && --o->refcnt <= 0) {
  80                memset(o, 0, sizeof(*o));
  81                free(o);
  82        }
  83}
  84
  85struct blame_entry {
  86        struct blame_entry *prev;
  87        struct blame_entry *next;
  88
  89        /* the first line of this group in the final image;
  90         * internally all line numbers are 0 based.
  91         */
  92        int lno;
  93
  94        /* how many lines this group has */
  95        int num_lines;
  96
  97        /* the commit that introduced this group into the final image */
  98        struct origin *suspect;
  99
 100        /* true if the suspect is truly guilty; false while we have not
 101         * checked if the group came from one of its parents.
 102         */
 103        char guilty;
 104
 105        /* the line number of the first line of this group in the
 106         * suspect's file; internally all line numbers are 0 based.
 107         */
 108        int s_lno;
 109
 110        /* how significant this entry is -- cached to avoid
 111         * scanning the lines over and over
 112         */
 113        unsigned score;
 114};
 115
 116struct scoreboard {
 117        /* the final commit (i.e. where we started digging from) */
 118        struct commit *final;
 119
 120        const char *path;
 121
 122        /* the contents in the final; pointed into by buf pointers of
 123         * blame_entries
 124         */
 125        const char *final_buf;
 126        unsigned long final_buf_size;
 127
 128        /* linked list of blames */
 129        struct blame_entry *ent;
 130
 131        /* look-up a line in the final buffer */
 132        int num_lines;
 133        int *lineno;
 134};
 135
 136static int cmp_suspect(struct origin *a, struct origin *b)
 137{
 138        int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
 139        if (cmp)
 140                return cmp;
 141        return strcmp(a->path, b->path);
 142}
 143
 144#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
 145
 146static void sanity_check_refcnt(struct scoreboard *);
 147
 148static void coalesce(struct scoreboard *sb)
 149{
 150        struct blame_entry *ent, *next;
 151
 152        for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 153                if (!cmp_suspect(ent->suspect, next->suspect) &&
 154                    ent->guilty == next->guilty &&
 155                    ent->s_lno + ent->num_lines == next->s_lno) {
 156                        ent->num_lines += next->num_lines;
 157                        ent->next = next->next;
 158                        if (ent->next)
 159                                ent->next->prev = ent;
 160                        origin_decref(next->suspect);
 161                        free(next);
 162                        ent->score = 0;
 163                        next = ent; /* again */
 164                }
 165        }
 166
 167        if (DEBUG) /* sanity */
 168                sanity_check_refcnt(sb);
 169}
 170
 171static struct origin *make_origin(struct commit *commit, const char *path)
 172{
 173        struct origin *o;
 174        o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
 175        o->commit = commit;
 176        o->refcnt = 1;
 177        strcpy(o->path, path);
 178        return o;
 179}
 180
 181static struct origin *get_origin(struct scoreboard *sb,
 182                                 struct commit *commit,
 183                                 const char *path)
 184{
 185        struct blame_entry *e;
 186
 187        for (e = sb->ent; e; e = e->next) {
 188                if (e->suspect->commit == commit &&
 189                    !strcmp(e->suspect->path, path))
 190                        return origin_incref(e->suspect);
 191        }
 192        return make_origin(commit, path);
 193}
 194
 195static int fill_blob_sha1(struct origin *origin)
 196{
 197        unsigned mode;
 198        char type[10];
 199
 200        if (!is_null_sha1(origin->blob_sha1))
 201                return 0;
 202        if (get_tree_entry(origin->commit->object.sha1,
 203                           origin->path,
 204                           origin->blob_sha1, &mode))
 205                goto error_out;
 206        if (sha1_object_info(origin->blob_sha1, type, NULL) ||
 207            strcmp(type, blob_type))
 208                goto error_out;
 209        return 0;
 210 error_out:
 211        hashclr(origin->blob_sha1);
 212        return -1;
 213}
 214
 215static struct origin *find_origin(struct scoreboard *sb,
 216                                  struct commit *parent,
 217                                  struct origin *origin)
 218{
 219        struct origin *porigin = NULL;
 220        struct diff_options diff_opts;
 221        const char *paths[2];
 222
 223        if (parent->util) {
 224                /* This is a freestanding copy of origin and not
 225                 * refcounted.
 226                 */
 227                struct origin *cached = parent->util;
 228                if (!strcmp(cached->path, origin->path)) {
 229                        porigin = get_origin(sb, parent, cached->path);
 230                        if (porigin->refcnt == 1)
 231                                hashcpy(porigin->blob_sha1, cached->blob_sha1);
 232                        return porigin;
 233                }
 234                /* otherwise it was not very useful; free it */
 235                free(parent->util);
 236                parent->util = NULL;
 237        }
 238
 239        /* See if the origin->path is different between parent
 240         * and origin first.  Most of the time they are the
 241         * same and diff-tree is fairly efficient about this.
 242         */
 243        diff_setup(&diff_opts);
 244        diff_opts.recursive = 1;
 245        diff_opts.detect_rename = 0;
 246        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 247        paths[0] = origin->path;
 248        paths[1] = NULL;
 249
 250        diff_tree_setup_paths(paths, &diff_opts);
 251        if (diff_setup_done(&diff_opts) < 0)
 252                die("diff-setup");
 253        diff_tree_sha1(parent->tree->object.sha1,
 254                       origin->commit->tree->object.sha1,
 255                       "", &diff_opts);
 256        diffcore_std(&diff_opts);
 257
 258        /* It is either one entry that says "modified", or "created",
 259         * or nothing.
 260         */
 261        if (!diff_queued_diff.nr) {
 262                /* The path is the same as parent */
 263                porigin = get_origin(sb, parent, origin->path);
 264                hashcpy(porigin->blob_sha1, origin->blob_sha1);
 265        }
 266        else if (diff_queued_diff.nr != 1)
 267                die("internal error in pickaxe::find_origin");
 268        else {
 269                struct diff_filepair *p = diff_queued_diff.queue[0];
 270                switch (p->status) {
 271                default:
 272                        die("internal error in pickaxe::find_origin (%c)",
 273                            p->status);
 274                case 'M':
 275                        porigin = get_origin(sb, parent, origin->path);
 276                        hashcpy(porigin->blob_sha1, p->one->sha1);
 277                        break;
 278                case 'A':
 279                case 'T':
 280                        /* Did not exist in parent, or type changed */
 281                        break;
 282                }
 283        }
 284        diff_flush(&diff_opts);
 285        if (porigin) {
 286                struct origin *cached;
 287                cached = make_origin(porigin->commit, porigin->path);
 288                hashcpy(cached->blob_sha1, porigin->blob_sha1);
 289                parent->util = cached;
 290        }
 291        return porigin;
 292}
 293
 294static struct origin *find_rename(struct scoreboard *sb,
 295                                  struct commit *parent,
 296                                  struct origin *origin)
 297{
 298        struct origin *porigin = NULL;
 299        struct diff_options diff_opts;
 300        int i;
 301        const char *paths[2];
 302
 303        diff_setup(&diff_opts);
 304        diff_opts.recursive = 1;
 305        diff_opts.detect_rename = DIFF_DETECT_RENAME;
 306        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 307        diff_opts.single_follow = origin->path;
 308        paths[0] = NULL;
 309        diff_tree_setup_paths(paths, &diff_opts);
 310        if (diff_setup_done(&diff_opts) < 0)
 311                die("diff-setup");
 312        diff_tree_sha1(parent->tree->object.sha1,
 313                       origin->commit->tree->object.sha1,
 314                       "", &diff_opts);
 315        diffcore_std(&diff_opts);
 316
 317        for (i = 0; i < diff_queued_diff.nr; i++) {
 318                struct diff_filepair *p = diff_queued_diff.queue[i];
 319                if ((p->status == 'R' || p->status == 'C') &&
 320                    !strcmp(p->two->path, origin->path)) {
 321                        porigin = get_origin(sb, parent, p->one->path);
 322                        hashcpy(porigin->blob_sha1, p->one->sha1);
 323                        break;
 324                }
 325        }
 326        diff_flush(&diff_opts);
 327        return porigin;
 328}
 329
 330struct chunk {
 331        /* line number in postimage; up to but not including this
 332         * line is the same as preimage
 333         */
 334        int same;
 335
 336        /* preimage line number after this chunk */
 337        int p_next;
 338
 339        /* postimage line number after this chunk */
 340        int t_next;
 341};
 342
 343struct patch {
 344        struct chunk *chunks;
 345        int num;
 346};
 347
 348struct blame_diff_state {
 349        struct xdiff_emit_state xm;
 350        struct patch *ret;
 351        unsigned hunk_post_context;
 352        unsigned hunk_in_pre_context : 1;
 353};
 354
 355static void process_u_diff(void *state_, char *line, unsigned long len)
 356{
 357        struct blame_diff_state *state = state_;
 358        struct chunk *chunk;
 359        int off1, off2, len1, len2, num;
 360
 361        num = state->ret->num;
 362        if (len < 4 || line[0] != '@' || line[1] != '@') {
 363                if (state->hunk_in_pre_context && line[0] == ' ')
 364                        state->ret->chunks[num - 1].same++;
 365                else {
 366                        state->hunk_in_pre_context = 0;
 367                        if (line[0] == ' ')
 368                                state->hunk_post_context++;
 369                        else
 370                                state->hunk_post_context = 0;
 371                }
 372                return;
 373        }
 374
 375        if (num && state->hunk_post_context) {
 376                chunk = &state->ret->chunks[num - 1];
 377                chunk->p_next -= state->hunk_post_context;
 378                chunk->t_next -= state->hunk_post_context;
 379        }
 380        state->ret->num = ++num;
 381        state->ret->chunks = xrealloc(state->ret->chunks,
 382                                      sizeof(struct chunk) * num);
 383        chunk = &state->ret->chunks[num - 1];
 384        if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
 385                state->ret->num--;
 386                return;
 387        }
 388
 389        /* Line numbers in patch output are one based. */
 390        off1--;
 391        off2--;
 392
 393        chunk->same = len2 ? off2 : (off2 + 1);
 394
 395        chunk->p_next = off1 + (len1 ? len1 : 1);
 396        chunk->t_next = chunk->same + len2;
 397        state->hunk_in_pre_context = 1;
 398        state->hunk_post_context = 0;
 399}
 400
 401static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
 402                                    int context)
 403{
 404        struct blame_diff_state state;
 405        xpparam_t xpp;
 406        xdemitconf_t xecfg;
 407        xdemitcb_t ecb;
 408
 409        xpp.flags = XDF_NEED_MINIMAL;
 410        xecfg.ctxlen = context;
 411        xecfg.flags = 0;
 412        ecb.outf = xdiff_outf;
 413        ecb.priv = &state;
 414        memset(&state, 0, sizeof(state));
 415        state.xm.consume = process_u_diff;
 416        state.ret = xmalloc(sizeof(struct patch));
 417        state.ret->chunks = NULL;
 418        state.ret->num = 0;
 419
 420        xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
 421
 422        if (state.ret->num) {
 423                struct chunk *chunk;
 424                chunk = &state.ret->chunks[state.ret->num - 1];
 425                chunk->p_next -= state.hunk_post_context;
 426                chunk->t_next -= state.hunk_post_context;
 427        }
 428        return state.ret;
 429}
 430
 431static struct patch *get_patch(struct origin *parent, struct origin *origin)
 432{
 433        mmfile_t file_p, file_o;
 434        char type[10];
 435        char *blob_p, *blob_o;
 436        struct patch *patch;
 437
 438        blob_p = read_sha1_file(parent->blob_sha1, type,
 439                                (unsigned long *) &file_p.size);
 440        blob_o = read_sha1_file(origin->blob_sha1, type,
 441                                (unsigned long *) &file_o.size);
 442        file_p.ptr = blob_p;
 443        file_o.ptr = blob_o;
 444        if (!file_p.ptr || !file_o.ptr) {
 445                free(blob_p);
 446                free(blob_o);
 447                return NULL;
 448        }
 449
 450        patch = compare_buffer(&file_p, &file_o, 0);
 451        free(blob_p);
 452        free(blob_o);
 453        return patch;
 454}
 455
 456static void free_patch(struct patch *p)
 457{
 458        free(p->chunks);
 459        free(p);
 460}
 461
 462static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
 463{
 464        struct blame_entry *ent, *prev = NULL;
 465
 466        origin_incref(e->suspect);
 467
 468        for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
 469                prev = ent;
 470
 471        /* prev, if not NULL, is the last one that is below e */
 472        e->prev = prev;
 473        if (prev) {
 474                e->next = prev->next;
 475                prev->next = e;
 476        }
 477        else {
 478                e->next = sb->ent;
 479                sb->ent = e;
 480        }
 481        if (e->next)
 482                e->next->prev = e;
 483}
 484
 485static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
 486{
 487        struct blame_entry *p, *n;
 488
 489        p = dst->prev;
 490        n = dst->next;
 491        origin_incref(src->suspect);
 492        origin_decref(dst->suspect);
 493        memcpy(dst, src, sizeof(*src));
 494        dst->prev = p;
 495        dst->next = n;
 496        dst->score = 0;
 497}
 498
 499static const char *nth_line(struct scoreboard *sb, int lno)
 500{
 501        return sb->final_buf + sb->lineno[lno];
 502}
 503
 504static void split_overlap(struct blame_entry *split,
 505                          struct blame_entry *e,
 506                          int tlno, int plno, int same,
 507                          struct origin *parent)
 508{
 509        /* it is known that lines between tlno to same came from
 510         * parent, and e has an overlap with that range.  it also is
 511         * known that parent's line plno corresponds to e's line tlno.
 512         *
 513         *                <---- e ----->
 514         *                   <------>
 515         *                   <------------>
 516         *             <------------>
 517         *             <------------------>
 518         *
 519         * Potentially we need to split e into three parts; before
 520         * this chunk, the chunk to be blamed for parent, and after
 521         * that portion.
 522         */
 523        int chunk_end_lno;
 524        memset(split, 0, sizeof(struct blame_entry [3]));
 525
 526        if (e->s_lno < tlno) {
 527                /* there is a pre-chunk part not blamed on parent */
 528                split[0].suspect = origin_incref(e->suspect);
 529                split[0].lno = e->lno;
 530                split[0].s_lno = e->s_lno;
 531                split[0].num_lines = tlno - e->s_lno;
 532                split[1].lno = e->lno + tlno - e->s_lno;
 533                split[1].s_lno = plno;
 534        }
 535        else {
 536                split[1].lno = e->lno;
 537                split[1].s_lno = plno + (e->s_lno - tlno);
 538        }
 539
 540        if (same < e->s_lno + e->num_lines) {
 541                /* there is a post-chunk part not blamed on parent */
 542                split[2].suspect = origin_incref(e->suspect);
 543                split[2].lno = e->lno + (same - e->s_lno);
 544                split[2].s_lno = e->s_lno + (same - e->s_lno);
 545                split[2].num_lines = e->s_lno + e->num_lines - same;
 546                chunk_end_lno = split[2].lno;
 547        }
 548        else
 549                chunk_end_lno = e->lno + e->num_lines;
 550        split[1].num_lines = chunk_end_lno - split[1].lno;
 551
 552        if (split[1].num_lines < 1)
 553                return;
 554        split[1].suspect = origin_incref(parent);
 555}
 556
 557static void split_blame(struct scoreboard *sb,
 558                        struct blame_entry *split,
 559                        struct blame_entry *e)
 560{
 561        struct blame_entry *new_entry;
 562
 563        if (split[0].suspect && split[2].suspect) {
 564                /* we need to split e into two and add another for parent */
 565                dup_entry(e, &split[0]);
 566
 567                new_entry = xmalloc(sizeof(*new_entry));
 568                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 569                add_blame_entry(sb, new_entry);
 570
 571                new_entry = xmalloc(sizeof(*new_entry));
 572                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 573                add_blame_entry(sb, new_entry);
 574        }
 575        else if (!split[0].suspect && !split[2].suspect)
 576                /* parent covers the entire area */
 577                dup_entry(e, &split[1]);
 578        else if (split[0].suspect) {
 579                dup_entry(e, &split[0]);
 580
 581                new_entry = xmalloc(sizeof(*new_entry));
 582                memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
 583                add_blame_entry(sb, new_entry);
 584        }
 585        else {
 586                dup_entry(e, &split[1]);
 587
 588                new_entry = xmalloc(sizeof(*new_entry));
 589                memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
 590                add_blame_entry(sb, new_entry);
 591        }
 592
 593        if (DEBUG) { /* sanity */
 594                struct blame_entry *ent;
 595                int lno = sb->ent->lno, corrupt = 0;
 596
 597                for (ent = sb->ent; ent; ent = ent->next) {
 598                        if (lno != ent->lno)
 599                                corrupt = 1;
 600                        if (ent->s_lno < 0)
 601                                corrupt = 1;
 602                        lno += ent->num_lines;
 603                }
 604                if (corrupt) {
 605                        lno = sb->ent->lno;
 606                        for (ent = sb->ent; ent; ent = ent->next) {
 607                                printf("L %8d l %8d n %8d\n",
 608                                       lno, ent->lno, ent->num_lines);
 609                                lno = ent->lno + ent->num_lines;
 610                        }
 611                        die("oops");
 612                }
 613        }
 614}
 615
 616static void decref_split(struct blame_entry *split)
 617{
 618        int i;
 619
 620        for (i = 0; i < 3; i++)
 621                origin_decref(split[i].suspect);
 622}
 623
 624static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
 625                          int tlno, int plno, int same,
 626                          struct origin *parent)
 627{
 628        struct blame_entry split[3];
 629
 630        split_overlap(split, e, tlno, plno, same, parent);
 631        if (split[1].suspect)
 632                split_blame(sb, split, e);
 633        decref_split(split);
 634}
 635
 636static int find_last_in_target(struct scoreboard *sb, struct origin *target)
 637{
 638        struct blame_entry *e;
 639        int last_in_target = -1;
 640
 641        for (e = sb->ent; e; e = e->next) {
 642                if (e->guilty || cmp_suspect(e->suspect, target))
 643                        continue;
 644                if (last_in_target < e->s_lno + e->num_lines)
 645                        last_in_target = e->s_lno + e->num_lines;
 646        }
 647        return last_in_target;
 648}
 649
 650static void blame_chunk(struct scoreboard *sb,
 651                        int tlno, int plno, int same,
 652                        struct origin *target, struct origin *parent)
 653{
 654        struct blame_entry *e;
 655
 656        for (e = sb->ent; e; e = e->next) {
 657                if (e->guilty || cmp_suspect(e->suspect, target))
 658                        continue;
 659                if (same <= e->s_lno)
 660                        continue;
 661                if (tlno < e->s_lno + e->num_lines)
 662                        blame_overlap(sb, e, tlno, plno, same, parent);
 663        }
 664}
 665
 666static int pass_blame_to_parent(struct scoreboard *sb,
 667                                struct origin *target,
 668                                struct origin *parent)
 669{
 670        int i, last_in_target, plno, tlno;
 671        struct patch *patch;
 672
 673        last_in_target = find_last_in_target(sb, target);
 674        if (last_in_target < 0)
 675                return 1; /* nothing remains for this target */
 676
 677        patch = get_patch(parent, target);
 678        plno = tlno = 0;
 679        for (i = 0; i < patch->num; i++) {
 680                struct chunk *chunk = &patch->chunks[i];
 681
 682                blame_chunk(sb, tlno, plno, chunk->same, target, parent);
 683                plno = chunk->p_next;
 684                tlno = chunk->t_next;
 685        }
 686        /* rest (i.e. anything above tlno) are the same as parent */
 687        blame_chunk(sb, tlno, plno, last_in_target, target, parent);
 688
 689        free_patch(patch);
 690        return 0;
 691}
 692
 693static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
 694{
 695        unsigned score;
 696        const char *cp, *ep;
 697
 698        if (e->score)
 699                return e->score;
 700
 701        score = 1;
 702        cp = nth_line(sb, e->lno);
 703        ep = nth_line(sb, e->lno + e->num_lines);
 704        while (cp < ep) {
 705                unsigned ch = *((unsigned char *)cp);
 706                if (isalnum(ch))
 707                        score++;
 708                cp++;
 709        }
 710        e->score = score;
 711        return score;
 712}
 713
 714static void copy_split_if_better(struct scoreboard *sb,
 715                                 struct blame_entry *best_so_far,
 716                                 struct blame_entry *this)
 717{
 718        int i;
 719
 720        if (!this[1].suspect)
 721                return;
 722        if (best_so_far[1].suspect) {
 723                if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
 724                        return;
 725        }
 726
 727        for (i = 0; i < 3; i++)
 728                origin_incref(this[i].suspect);
 729        decref_split(best_so_far);
 730        memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
 731}
 732
 733static void find_copy_in_blob(struct scoreboard *sb,
 734                              struct blame_entry *ent,
 735                              struct origin *parent,
 736                              struct blame_entry *split,
 737                              mmfile_t *file_p)
 738{
 739        const char *cp;
 740        int cnt;
 741        mmfile_t file_o;
 742        struct patch *patch;
 743        int i, plno, tlno;
 744
 745        cp = nth_line(sb, ent->lno);
 746        file_o.ptr = (char*) cp;
 747        cnt = ent->num_lines;
 748
 749        while (cnt && cp < sb->final_buf + sb->final_buf_size) {
 750                if (*cp++ == '\n')
 751                        cnt--;
 752        }
 753        file_o.size = cp - file_o.ptr;
 754
 755        patch = compare_buffer(file_p, &file_o, 1);
 756
 757        memset(split, 0, sizeof(struct blame_entry [3]));
 758        plno = tlno = 0;
 759        for (i = 0; i < patch->num; i++) {
 760                struct chunk *chunk = &patch->chunks[i];
 761
 762                /* tlno to chunk->same are the same as ent */
 763                if (ent->num_lines <= tlno)
 764                        break;
 765                if (tlno < chunk->same) {
 766                        struct blame_entry this[3];
 767                        split_overlap(this, ent,
 768                                      tlno + ent->s_lno, plno,
 769                                      chunk->same + ent->s_lno,
 770                                      parent);
 771                        copy_split_if_better(sb, split, this);
 772                        decref_split(this);
 773                }
 774                plno = chunk->p_next;
 775                tlno = chunk->t_next;
 776        }
 777        free_patch(patch);
 778}
 779
 780static int find_move_in_parent(struct scoreboard *sb,
 781                               struct origin *target,
 782                               struct origin *parent)
 783{
 784        int last_in_target, made_progress;
 785        struct blame_entry *e, split[3];
 786        mmfile_t file_p;
 787        char type[10];
 788        char *blob_p;
 789
 790        last_in_target = find_last_in_target(sb, target);
 791        if (last_in_target < 0)
 792                return 1; /* nothing remains for this target */
 793
 794        blob_p = read_sha1_file(parent->blob_sha1, type,
 795                                (unsigned long *) &file_p.size);
 796        file_p.ptr = blob_p;
 797        if (!file_p.ptr) {
 798                free(blob_p);
 799                return 0;
 800        }
 801
 802        made_progress = 1;
 803        while (made_progress) {
 804                made_progress = 0;
 805                for (e = sb->ent; e; e = e->next) {
 806                        if (e->guilty || cmp_suspect(e->suspect, target))
 807                                continue;
 808                        find_copy_in_blob(sb, e, parent, split, &file_p);
 809                        if (split[1].suspect &&
 810                            blame_move_score < ent_score(sb, &split[1])) {
 811                                split_blame(sb, split, e);
 812                                made_progress = 1;
 813                        }
 814                        decref_split(split);
 815                }
 816        }
 817        free(blob_p);
 818        return 0;
 819}
 820
 821
 822struct blame_list {
 823        struct blame_entry *ent;
 824        struct blame_entry split[3];
 825};
 826
 827static struct blame_list *setup_blame_list(struct scoreboard *sb,
 828                                           struct origin *target,
 829                                           int *num_ents_p)
 830{
 831        struct blame_entry *e;
 832        int num_ents, i;
 833        struct blame_list *blame_list = NULL;
 834
 835        /* Count the number of entries the target is suspected for,
 836         * and prepare a list of entry and the best split.
 837         */
 838        for (e = sb->ent, num_ents = 0; e; e = e->next)
 839                if (!e->guilty && !cmp_suspect(e->suspect, target))
 840                        num_ents++;
 841        if (num_ents) {
 842                blame_list = xcalloc(num_ents, sizeof(struct blame_list));
 843                for (e = sb->ent, i = 0; e; e = e->next)
 844                        if (!e->guilty && !cmp_suspect(e->suspect, target))
 845                                blame_list[i++].ent = e;
 846        }
 847        *num_ents_p = num_ents;
 848        return blame_list;
 849}
 850
 851static int find_copy_in_parent(struct scoreboard *sb,
 852                               struct origin *target,
 853                               struct commit *parent,
 854                               struct origin *porigin,
 855                               int opt)
 856{
 857        struct diff_options diff_opts;
 858        const char *paths[1];
 859        int i, j;
 860        int retval;
 861        struct blame_list *blame_list;
 862        int num_ents;
 863
 864        blame_list = setup_blame_list(sb, target, &num_ents);
 865        if (!blame_list)
 866                return 1; /* nothing remains for this target */
 867
 868        diff_setup(&diff_opts);
 869        diff_opts.recursive = 1;
 870        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 871
 872        paths[0] = NULL;
 873        diff_tree_setup_paths(paths, &diff_opts);
 874        if (diff_setup_done(&diff_opts) < 0)
 875                die("diff-setup");
 876
 877        /* Try "find copies harder" on new path if requested;
 878         * we do not want to use diffcore_rename() actually to
 879         * match things up; find_copies_harder is set only to
 880         * force diff_tree_sha1() to feed all filepairs to diff_queue,
 881         * and this code needs to be after diff_setup_done(), which
 882         * usually makes find-copies-harder imply copy detection.
 883         */
 884        if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
 885            (!porigin || strcmp(target->path, porigin->path)))
 886                diff_opts.find_copies_harder = 1;
 887
 888        diff_tree_sha1(parent->tree->object.sha1,
 889                       target->commit->tree->object.sha1,
 890                       "", &diff_opts);
 891
 892        if (!diff_opts.find_copies_harder)
 893                diffcore_std(&diff_opts);
 894
 895        retval = 0;
 896        while (1) {
 897                int made_progress = 0;
 898
 899                for (i = 0; i < diff_queued_diff.nr; i++) {
 900                        struct diff_filepair *p = diff_queued_diff.queue[i];
 901                        struct origin *norigin;
 902                        mmfile_t file_p;
 903                        char type[10];
 904                        char *blob;
 905                        struct blame_entry this[3];
 906
 907                        if (!DIFF_FILE_VALID(p->one))
 908                                continue; /* does not exist in parent */
 909                        if (porigin && !strcmp(p->one->path, porigin->path))
 910                                /* find_move already dealt with this path */
 911                                continue;
 912
 913                        norigin = get_origin(sb, parent, p->one->path);
 914                        hashcpy(norigin->blob_sha1, p->one->sha1);
 915                        blob = read_sha1_file(norigin->blob_sha1, type,
 916                                              (unsigned long *) &file_p.size);
 917                        file_p.ptr = blob;
 918                        if (!file_p.ptr)
 919                                continue;
 920
 921                        for (j = 0; j < num_ents; j++) {
 922                                find_copy_in_blob(sb, blame_list[j].ent,
 923                                                  norigin, this, &file_p);
 924                                copy_split_if_better(sb, blame_list[j].split,
 925                                                     this);
 926                                decref_split(this);
 927                        }
 928                        free(blob);
 929                        origin_decref(norigin);
 930                }
 931
 932                for (j = 0; j < num_ents; j++) {
 933                        struct blame_entry *split = blame_list[j].split;
 934                        if (split[1].suspect &&
 935                            blame_copy_score < ent_score(sb, &split[1])) {
 936                                split_blame(sb, split, blame_list[j].ent);
 937                                made_progress = 1;
 938                        }
 939                        decref_split(split);
 940                }
 941                free(blame_list);
 942
 943                if (!made_progress)
 944                        break;
 945                blame_list = setup_blame_list(sb, target, &num_ents);
 946                if (!blame_list) {
 947                        retval = 1;
 948                        break;
 949                }
 950        }
 951        diff_flush(&diff_opts);
 952
 953        return retval;
 954}
 955
 956#define MAXPARENT 16
 957
 958static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
 959{
 960        int i, pass;
 961        struct commit *commit = origin->commit;
 962        struct commit_list *parent;
 963        struct origin *parent_origin[MAXPARENT], *porigin;
 964
 965        memset(parent_origin, 0, sizeof(parent_origin));
 966
 967        /* The first pass looks for unrenamed path to optimize for
 968         * common cases, then we look for renames in the second pass.
 969         */
 970        for (pass = 0; pass < 2; pass++) {
 971                struct origin *(*find)(struct scoreboard *,
 972                                       struct commit *, struct origin *);
 973                find = pass ? find_rename : find_origin;
 974
 975                for (i = 0, parent = commit->parents;
 976                     i < MAXPARENT && parent;
 977                     parent = parent->next, i++) {
 978                        struct commit *p = parent->item;
 979                        int j, same;
 980
 981                        if (parent_origin[i])
 982                                continue;
 983                        if (parse_commit(p))
 984                                continue;
 985                        porigin = find(sb, p, origin);
 986                        if (!porigin)
 987                                continue;
 988                        if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
 989                                struct blame_entry *e;
 990                                for (e = sb->ent; e; e = e->next)
 991                                        if (e->suspect == origin) {
 992                                                origin_incref(porigin);
 993                                                origin_decref(e->suspect);
 994                                                e->suspect = porigin;
 995                                        }
 996                                origin_decref(porigin);
 997                                goto finish;
 998                        }
 999                        for (j = same = 0; j < i; j++)
1000                                if (parent_origin[j] &&
1001                                    !hashcmp(parent_origin[j]->blob_sha1,
1002                                             porigin->blob_sha1)) {
1003                                        same = 1;
1004                                        break;
1005                                }
1006                        if (!same)
1007                                parent_origin[i] = porigin;
1008                        else
1009                                origin_decref(porigin);
1010                }
1011        }
1012
1013        for (i = 0, parent = commit->parents;
1014             i < MAXPARENT && parent;
1015             parent = parent->next, i++) {
1016                struct origin *porigin = parent_origin[i];
1017                if (!porigin)
1018                        continue;
1019                if (pass_blame_to_parent(sb, origin, porigin))
1020                        goto finish;
1021        }
1022
1023        /*
1024         * Optionally run "miff" to find moves in parents' files here.
1025         */
1026        if (opt & PICKAXE_BLAME_MOVE)
1027                for (i = 0, parent = commit->parents;
1028                     i < MAXPARENT && parent;
1029                     parent = parent->next, i++) {
1030                        struct origin *porigin = parent_origin[i];
1031                        if (!porigin)
1032                                continue;
1033                        if (find_move_in_parent(sb, origin, porigin))
1034                                goto finish;
1035                }
1036
1037        /*
1038         * Optionally run "ciff" to find copies from parents' files here.
1039         */
1040        if (opt & PICKAXE_BLAME_COPY)
1041                for (i = 0, parent = commit->parents;
1042                     i < MAXPARENT && parent;
1043                     parent = parent->next, i++) {
1044                        struct origin *porigin = parent_origin[i];
1045                        if (find_copy_in_parent(sb, origin, parent->item,
1046                                                porigin, opt))
1047                                goto finish;
1048                }
1049
1050 finish:
1051        for (i = 0; i < MAXPARENT; i++)
1052                origin_decref(parent_origin[i]);
1053}
1054
1055static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1056{
1057        while (1) {
1058                struct blame_entry *ent;
1059                struct commit *commit;
1060                struct origin *suspect = NULL;
1061
1062                /* find one suspect to break down */
1063                for (ent = sb->ent; !suspect && ent; ent = ent->next)
1064                        if (!ent->guilty)
1065                                suspect = ent->suspect;
1066                if (!suspect)
1067                        return; /* all done */
1068
1069                origin_incref(suspect);
1070                commit = suspect->commit;
1071                parse_commit(commit);
1072                if (!(commit->object.flags & UNINTERESTING) &&
1073                    !(revs->max_age != -1 && commit->date  < revs->max_age))
1074                        pass_blame(sb, suspect, opt);
1075
1076                /* Take responsibility for the remaining entries */
1077                for (ent = sb->ent; ent; ent = ent->next)
1078                        if (!cmp_suspect(ent->suspect, suspect))
1079                                ent->guilty = 1;
1080                origin_decref(suspect);
1081
1082                if (DEBUG) /* sanity */
1083                        sanity_check_refcnt(sb);
1084        }
1085}
1086
1087static const char *format_time(unsigned long time, const char *tz_str,
1088                               int show_raw_time)
1089{
1090        static char time_buf[128];
1091        time_t t = time;
1092        int minutes, tz;
1093        struct tm *tm;
1094
1095        if (show_raw_time) {
1096                sprintf(time_buf, "%lu %s", time, tz_str);
1097                return time_buf;
1098        }
1099
1100        tz = atoi(tz_str);
1101        minutes = tz < 0 ? -tz : tz;
1102        minutes = (minutes / 100)*60 + (minutes % 100);
1103        minutes = tz < 0 ? -minutes : minutes;
1104        t = time + minutes * 60;
1105        tm = gmtime(&t);
1106
1107        strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1108        strcat(time_buf, tz_str);
1109        return time_buf;
1110}
1111
1112struct commit_info
1113{
1114        char *author;
1115        char *author_mail;
1116        unsigned long author_time;
1117        char *author_tz;
1118
1119        /* filled only when asked for details */
1120        char *committer;
1121        char *committer_mail;
1122        unsigned long committer_time;
1123        char *committer_tz;
1124
1125        char *summary;
1126};
1127
1128static void get_ac_line(const char *inbuf, const char *what,
1129                        int bufsz, char *person, char **mail,
1130                        unsigned long *time, char **tz)
1131{
1132        int len;
1133        char *tmp, *endp;
1134
1135        tmp = strstr(inbuf, what);
1136        if (!tmp)
1137                goto error_out;
1138        tmp += strlen(what);
1139        endp = strchr(tmp, '\n');
1140        if (!endp)
1141                len = strlen(tmp);
1142        else
1143                len = endp - tmp;
1144        if (bufsz <= len) {
1145        error_out:
1146                /* Ugh */
1147                person = *mail = *tz = "(unknown)";
1148                *time = 0;
1149                return;
1150        }
1151        memcpy(person, tmp, len);
1152
1153        tmp = person;
1154        tmp += len;
1155        *tmp = 0;
1156        while (*tmp != ' ')
1157                tmp--;
1158        *tz = tmp+1;
1159
1160        *tmp = 0;
1161        while (*tmp != ' ')
1162                tmp--;
1163        *time = strtoul(tmp, NULL, 10);
1164
1165        *tmp = 0;
1166        while (*tmp != ' ')
1167                tmp--;
1168        *mail = tmp + 1;
1169        *tmp = 0;
1170}
1171
1172static void get_commit_info(struct commit *commit,
1173                            struct commit_info *ret,
1174                            int detailed)
1175{
1176        int len;
1177        char *tmp, *endp;
1178        static char author_buf[1024];
1179        static char committer_buf[1024];
1180        static char summary_buf[1024];
1181
1182        /* We've operated without save_commit_buffer, so
1183         * we now need to populate them for output.
1184         */
1185        if (!commit->buffer) {
1186                char type[20];
1187                unsigned long size;
1188                commit->buffer =
1189                        read_sha1_file(commit->object.sha1, type, &size);
1190        }
1191        ret->author = author_buf;
1192        get_ac_line(commit->buffer, "\nauthor ",
1193                    sizeof(author_buf), author_buf, &ret->author_mail,
1194                    &ret->author_time, &ret->author_tz);
1195
1196        if (!detailed)
1197                return;
1198
1199        ret->committer = committer_buf;
1200        get_ac_line(commit->buffer, "\ncommitter ",
1201                    sizeof(committer_buf), committer_buf, &ret->committer_mail,
1202                    &ret->committer_time, &ret->committer_tz);
1203
1204        ret->summary = summary_buf;
1205        tmp = strstr(commit->buffer, "\n\n");
1206        if (!tmp) {
1207        error_out:
1208                sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1209                return;
1210        }
1211        tmp += 2;
1212        endp = strchr(tmp, '\n');
1213        if (!endp)
1214                goto error_out;
1215        len = endp - tmp;
1216        if (len >= sizeof(summary_buf))
1217                goto error_out;
1218        memcpy(summary_buf, tmp, len);
1219        summary_buf[len] = 0;
1220}
1221
1222#define OUTPUT_ANNOTATE_COMPAT  001
1223#define OUTPUT_LONG_OBJECT_NAME 002
1224#define OUTPUT_RAW_TIMESTAMP    004
1225#define OUTPUT_PORCELAIN        010
1226#define OUTPUT_SHOW_NAME        020
1227#define OUTPUT_SHOW_NUMBER      040
1228#define OUTPUT_SHOW_SCORE      0100
1229
1230static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1231{
1232        int cnt;
1233        const char *cp;
1234        struct origin *suspect = ent->suspect;
1235        char hex[41];
1236
1237        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1238        printf("%s%c%d %d %d\n",
1239               hex,
1240               ent->guilty ? ' ' : '*', // purely for debugging
1241               ent->s_lno + 1,
1242               ent->lno + 1,
1243               ent->num_lines);
1244        if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1245                struct commit_info ci;
1246                suspect->commit->object.flags |= METAINFO_SHOWN;
1247                get_commit_info(suspect->commit, &ci, 1);
1248                printf("author %s\n", ci.author);
1249                printf("author-mail %s\n", ci.author_mail);
1250                printf("author-time %lu\n", ci.author_time);
1251                printf("author-tz %s\n", ci.author_tz);
1252                printf("committer %s\n", ci.committer);
1253                printf("committer-mail %s\n", ci.committer_mail);
1254                printf("committer-time %lu\n", ci.committer_time);
1255                printf("committer-tz %s\n", ci.committer_tz);
1256                printf("filename %s\n", suspect->path);
1257                printf("summary %s\n", ci.summary);
1258        }
1259        else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1260                printf("filename %s\n", suspect->path);
1261
1262        cp = nth_line(sb, ent->lno);
1263        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1264                char ch;
1265                if (cnt)
1266                        printf("%s %d %d\n", hex,
1267                               ent->s_lno + 1 + cnt,
1268                               ent->lno + 1 + cnt);
1269                putchar('\t');
1270                do {
1271                        ch = *cp++;
1272                        putchar(ch);
1273                } while (ch != '\n' &&
1274                         cp < sb->final_buf + sb->final_buf_size);
1275        }
1276}
1277
1278static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1279{
1280        int cnt;
1281        const char *cp;
1282        struct origin *suspect = ent->suspect;
1283        struct commit_info ci;
1284        char hex[41];
1285        int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1286
1287        get_commit_info(suspect->commit, &ci, 1);
1288        strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1289
1290        cp = nth_line(sb, ent->lno);
1291        for (cnt = 0; cnt < ent->num_lines; cnt++) {
1292                char ch;
1293
1294                printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex);
1295                if (opt & OUTPUT_ANNOTATE_COMPAT)
1296                        printf("\t(%10s\t%10s\t%d)", ci.author,
1297                               format_time(ci.author_time, ci.author_tz,
1298                                           show_raw_time),
1299                               ent->lno + 1 + cnt);
1300                else {
1301                        if (opt & OUTPUT_SHOW_SCORE)
1302                                printf(" %*d %02d",
1303                                       max_score_digits, ent->score,
1304                                       ent->suspect->refcnt);
1305                        if (opt & OUTPUT_SHOW_NAME)
1306                                printf(" %-*.*s", longest_file, longest_file,
1307                                       suspect->path);
1308                        if (opt & OUTPUT_SHOW_NUMBER)
1309                                printf(" %*d", max_orig_digits,
1310                                       ent->s_lno + 1 + cnt);
1311                        printf(" (%-*.*s %10s %*d) ",
1312                               longest_author, longest_author, ci.author,
1313                               format_time(ci.author_time, ci.author_tz,
1314                                           show_raw_time),
1315                               max_digits, ent->lno + 1 + cnt);
1316                }
1317                do {
1318                        ch = *cp++;
1319                        putchar(ch);
1320                } while (ch != '\n' &&
1321                         cp < sb->final_buf + sb->final_buf_size);
1322        }
1323}
1324
1325static void output(struct scoreboard *sb, int option)
1326{
1327        struct blame_entry *ent;
1328
1329        if (option & OUTPUT_PORCELAIN) {
1330                for (ent = sb->ent; ent; ent = ent->next) {
1331                        struct blame_entry *oth;
1332                        struct origin *suspect = ent->suspect;
1333                        struct commit *commit = suspect->commit;
1334                        if (commit->object.flags & MORE_THAN_ONE_PATH)
1335                                continue;
1336                        for (oth = ent->next; oth; oth = oth->next) {
1337                                if ((oth->suspect->commit != commit) ||
1338                                    !strcmp(oth->suspect->path, suspect->path))
1339                                        continue;
1340                                commit->object.flags |= MORE_THAN_ONE_PATH;
1341                                break;
1342                        }
1343                }
1344        }
1345
1346        for (ent = sb->ent; ent; ent = ent->next) {
1347                if (option & OUTPUT_PORCELAIN)
1348                        emit_porcelain(sb, ent);
1349                else {
1350                        emit_other(sb, ent, option);
1351                }
1352        }
1353}
1354
1355static int prepare_lines(struct scoreboard *sb)
1356{
1357        const char *buf = sb->final_buf;
1358        unsigned long len = sb->final_buf_size;
1359        int num = 0, incomplete = 0, bol = 1;
1360
1361        if (len && buf[len-1] != '\n')
1362                incomplete++; /* incomplete line at the end */
1363        while (len--) {
1364                if (bol) {
1365                        sb->lineno = xrealloc(sb->lineno,
1366                                              sizeof(int* ) * (num + 1));
1367                        sb->lineno[num] = buf - sb->final_buf;
1368                        bol = 0;
1369                }
1370                if (*buf++ == '\n') {
1371                        num++;
1372                        bol = 1;
1373                }
1374        }
1375        sb->lineno = xrealloc(sb->lineno,
1376                              sizeof(int* ) * (num + incomplete + 1));
1377        sb->lineno[num + incomplete] = buf - sb->final_buf;
1378        sb->num_lines = num + incomplete;
1379        return sb->num_lines;
1380}
1381
1382static int read_ancestry(const char *graft_file)
1383{
1384        FILE *fp = fopen(graft_file, "r");
1385        char buf[1024];
1386        if (!fp)
1387                return -1;
1388        while (fgets(buf, sizeof(buf), fp)) {
1389                /* The format is just "Commit Parent1 Parent2 ...\n" */
1390                int len = strlen(buf);
1391                struct commit_graft *graft = read_graft_line(buf, len);
1392                register_commit_graft(graft, 0);
1393        }
1394        fclose(fp);
1395        return 0;
1396}
1397
1398static int lineno_width(int lines)
1399{
1400        int i, width;
1401
1402        for (width = 1, i = 10; i <= lines + 1; width++)
1403                i *= 10;
1404        return width;
1405}
1406
1407static void find_alignment(struct scoreboard *sb, int *option)
1408{
1409        int longest_src_lines = 0;
1410        int longest_dst_lines = 0;
1411        unsigned largest_score = 0;
1412        struct blame_entry *e;
1413
1414        for (e = sb->ent; e; e = e->next) {
1415                struct origin *suspect = e->suspect;
1416                struct commit_info ci;
1417                int num;
1418
1419                if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1420                        suspect->commit->object.flags |= METAINFO_SHOWN;
1421                        get_commit_info(suspect->commit, &ci, 1);
1422                        if (strcmp(suspect->path, sb->path))
1423                                *option |= OUTPUT_SHOW_NAME;
1424                        num = strlen(suspect->path);
1425                        if (longest_file < num)
1426                                longest_file = num;
1427                        num = strlen(ci.author);
1428                        if (longest_author < num)
1429                                longest_author = num;
1430                }
1431                num = e->s_lno + e->num_lines;
1432                if (longest_src_lines < num)
1433                        longest_src_lines = num;
1434                num = e->lno + e->num_lines;
1435                if (longest_dst_lines < num)
1436                        longest_dst_lines = num;
1437                if (largest_score < ent_score(sb, e))
1438                        largest_score = ent_score(sb, e);
1439        }
1440        max_orig_digits = lineno_width(longest_src_lines);
1441        max_digits = lineno_width(longest_dst_lines);
1442        max_score_digits = lineno_width(largest_score);
1443}
1444
1445static void sanity_check_refcnt(struct scoreboard *sb)
1446{
1447        int baa = 0;
1448        struct blame_entry *ent;
1449
1450        for (ent = sb->ent; ent; ent = ent->next) {
1451                /* Nobody should have zero or negative refcnt */
1452                if (ent->suspect->refcnt <= 0) {
1453                        fprintf(stderr, "%s in %s has negative refcnt %d\n",
1454                                ent->suspect->path,
1455                                sha1_to_hex(ent->suspect->commit->object.sha1),
1456                                ent->suspect->refcnt);
1457                        baa = 1;
1458                }
1459        }
1460        for (ent = sb->ent; ent; ent = ent->next) {
1461                /* Mark the ones that haven't been checked */
1462                if (0 < ent->suspect->refcnt)
1463                        ent->suspect->refcnt = -ent->suspect->refcnt;
1464        }
1465        for (ent = sb->ent; ent; ent = ent->next) {
1466                /* then pick each and see if they have the the correct
1467                 * refcnt.
1468                 */
1469                int found;
1470                struct blame_entry *e;
1471                struct origin *suspect = ent->suspect;
1472
1473                if (0 < suspect->refcnt)
1474                        continue;
1475                suspect->refcnt = -suspect->refcnt; /* Unmark */
1476                for (found = 0, e = sb->ent; e; e = e->next) {
1477                        if (e->suspect != suspect)
1478                                continue;
1479                        found++;
1480                }
1481                if (suspect->refcnt != found) {
1482                        fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1483                                ent->suspect->path,
1484                                sha1_to_hex(ent->suspect->commit->object.sha1),
1485                                ent->suspect->refcnt, found);
1486                        baa = 2;
1487                }
1488        }
1489        if (baa) {
1490                int opt = 0160;
1491                find_alignment(sb, &opt);
1492                output(sb, opt);
1493                die("Baa %d!", baa);
1494        }
1495}
1496
1497static int has_path_in_work_tree(const char *path)
1498{
1499        struct stat st;
1500        return !lstat(path, &st);
1501}
1502
1503static unsigned parse_score(const char *arg)
1504{
1505        char *end;
1506        unsigned long score = strtoul(arg, &end, 10);
1507        if (*end)
1508                return 0;
1509        return score;
1510}
1511
1512static const char *add_prefix(const char *prefix, const char *path)
1513{
1514        if (!prefix || !prefix[0])
1515                return path;
1516        return prefix_path(prefix, strlen(prefix), path);
1517}
1518
1519int cmd_pickaxe(int argc, const char **argv, const char *prefix)
1520{
1521        struct rev_info revs;
1522        const char *path;
1523        struct scoreboard sb;
1524        struct origin *o;
1525        struct blame_entry *ent;
1526        int i, seen_dashdash, unk, opt;
1527        long bottom, top, lno;
1528        int output_option = 0;
1529        const char *revs_file = NULL;
1530        const char *final_commit_name = NULL;
1531        char type[10];
1532
1533        save_commit_buffer = 0;
1534
1535        opt = 0;
1536        bottom = top = 0;
1537        seen_dashdash = 0;
1538        for (unk = i = 1; i < argc; i++) {
1539                const char *arg = argv[i];
1540                if (*arg != '-')
1541                        break;
1542                else if (!strcmp("-c", arg))
1543                        output_option |= OUTPUT_ANNOTATE_COMPAT;
1544                else if (!strcmp("-t", arg))
1545                        output_option |= OUTPUT_RAW_TIMESTAMP;
1546                else if (!strcmp("-l", arg))
1547                        output_option |= OUTPUT_LONG_OBJECT_NAME;
1548                else if (!strcmp("-S", arg) && ++i < argc)
1549                        revs_file = argv[i];
1550                else if (!strncmp("-M", arg, 2)) {
1551                        opt |= PICKAXE_BLAME_MOVE;
1552                        blame_move_score = parse_score(arg+2);
1553                }
1554                else if (!strncmp("-C", arg, 2)) {
1555                        if (opt & PICKAXE_BLAME_COPY)
1556                                opt |= PICKAXE_BLAME_COPY_HARDER;
1557                        opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1558                        blame_copy_score = parse_score(arg+2);
1559                }
1560                else if (!strncmp("-L", arg, 2)) {
1561                        char *term;
1562                        if (!arg[2]) {
1563                                if (++i >= argc)
1564                                        usage(pickaxe_usage);
1565                                arg = argv[i];
1566                        }
1567                        else
1568                                arg += 2;
1569                        if (bottom || top)
1570                                die("More than one '-L n,m' option given");
1571                        bottom = strtol(arg, &term, 10);
1572                        if (*term == ',') {
1573                                top = strtol(term + 1, &term, 10);
1574                                if (*term)
1575                                        usage(pickaxe_usage);
1576                        }
1577                        if (bottom && top && top < bottom) {
1578                                unsigned long tmp;
1579                                tmp = top; top = bottom; bottom = tmp;
1580                        }
1581                }
1582                else if (!strcmp("--score-debug", arg))
1583                        output_option |= OUTPUT_SHOW_SCORE;
1584                else if (!strcmp("-f", arg) ||
1585                         !strcmp("--show-name", arg))
1586                        output_option |= OUTPUT_SHOW_NAME;
1587                else if (!strcmp("-n", arg) ||
1588                         !strcmp("--show-number", arg))
1589                        output_option |= OUTPUT_SHOW_NUMBER;
1590                else if (!strcmp("-p", arg) ||
1591                         !strcmp("--porcelain", arg))
1592                        output_option |= OUTPUT_PORCELAIN;
1593                else if (!strcmp("--", arg)) {
1594                        seen_dashdash = 1;
1595                        i++;
1596                        break;
1597                }
1598                else
1599                        argv[unk++] = arg;
1600        }
1601
1602        if (!blame_move_score)
1603                blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1604        if (!blame_copy_score)
1605                blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1606
1607        /* We have collected options unknown to us in argv[1..unk]
1608         * which are to be passed to revision machinery if we are
1609         * going to do the "bottom" procesing.
1610         *
1611         * The remaining are:
1612         *
1613         * (1) if seen_dashdash, its either
1614         *     "-options -- <path>" or
1615         *     "-options -- <path> <rev>".
1616         *     but the latter is allowed only if there is no
1617         *     options that we passed to revision machinery.
1618         *
1619         * (2) otherwise, we may have "--" somewhere later and
1620         *     might be looking at the first one of multiple 'rev'
1621         *     parameters (e.g. " master ^next ^maint -- path").
1622         *     See if there is a dashdash first, and give the
1623         *     arguments before that to revision machinery.
1624         *     After that there must be one 'path'.
1625         *
1626         * (3) otherwise, its one of the three:
1627         *     "-options <path> <rev>"
1628         *     "-options <rev> <path>"
1629         *     "-options <path>"
1630         *     but again the first one is allowed only if
1631         *     there is no options that we passed to revision
1632         *     machinery.
1633         */
1634
1635        if (seen_dashdash) {
1636                /* (1) */
1637                if (argc <= i)
1638                        usage(pickaxe_usage);
1639                path = add_prefix(prefix, argv[i]);
1640                if (i + 1 == argc - 1) {
1641                        if (unk != 1)
1642                                usage(pickaxe_usage);
1643                        argv[unk++] = argv[i + 1];
1644                }
1645                else if (i + 1 != argc)
1646                        /* garbage at end */
1647                        usage(pickaxe_usage);
1648        }
1649        else {
1650                int j;
1651                for (j = i; !seen_dashdash && j < argc; j++)
1652                        if (!strcmp(argv[j], "--"))
1653                                seen_dashdash = j;
1654                if (seen_dashdash) {
1655                        if (seen_dashdash + 1 != argc - 1)
1656                                usage(pickaxe_usage);
1657                        path = add_prefix(prefix, argv[seen_dashdash + 1]);
1658                        for (j = i; j < seen_dashdash; j++)
1659                                argv[unk++] = argv[j];
1660                }
1661                else {
1662                        /* (3) */
1663                        path = add_prefix(prefix, argv[i]);
1664                        if (i + 1 == argc - 1) {
1665                                final_commit_name = argv[i + 1];
1666
1667                                /* if (unk == 1) we could be getting
1668                                 * old-style
1669                                 */
1670                                if (unk == 1 && !has_path_in_work_tree(path)) {
1671                                        path = add_prefix(prefix, argv[i + 1]);
1672                                        final_commit_name = argv[i];
1673                                }
1674                        }
1675                        else if (i != argc - 1)
1676                                usage(pickaxe_usage); /* garbage at end */
1677
1678                        if (!has_path_in_work_tree(path))
1679                                die("cannot stat path %s: %s",
1680                                    path, strerror(errno));
1681                }
1682        }
1683
1684        if (final_commit_name)
1685                argv[unk++] = final_commit_name;
1686
1687        /* Now we got rev and path.  We do not want the path pruning
1688         * but we may want "bottom" processing.
1689         */
1690        argv[unk] = NULL;
1691
1692        init_revisions(&revs, NULL);
1693        setup_revisions(unk, argv, &revs, "HEAD");
1694        memset(&sb, 0, sizeof(sb));
1695
1696        /* There must be one and only one positive commit in the
1697         * revs->pending array.
1698         */
1699        for (i = 0; i < revs.pending.nr; i++) {
1700                struct object *obj = revs.pending.objects[i].item;
1701                if (obj->flags & UNINTERESTING)
1702                        continue;
1703                while (obj->type == OBJ_TAG)
1704                        obj = deref_tag(obj, NULL, 0);
1705                if (obj->type != OBJ_COMMIT)
1706                        die("Non commit %s?",
1707                            revs.pending.objects[i].name);
1708                if (sb.final)
1709                        die("More than one commit to dig from %s and %s?",
1710                            revs.pending.objects[i].name,
1711                            final_commit_name);
1712                sb.final = (struct commit *) obj;
1713                final_commit_name = revs.pending.objects[i].name;
1714        }
1715
1716        if (!sb.final) {
1717                /* "--not A B -- path" without anything positive */
1718                unsigned char head_sha1[20];
1719
1720                final_commit_name = "HEAD";
1721                if (get_sha1(final_commit_name, head_sha1))
1722                        die("No such ref: HEAD");
1723                sb.final = lookup_commit_reference(head_sha1);
1724                add_pending_object(&revs, &(sb.final->object), "HEAD");
1725        }
1726
1727        /* If we have bottom, this will mark the ancestors of the
1728         * bottom commits we would reach while traversing as
1729         * uninteresting.
1730         */
1731        prepare_revision_walk(&revs);
1732
1733        o = get_origin(&sb, sb.final, path);
1734        if (fill_blob_sha1(o))
1735                die("no such path %s in %s", path, final_commit_name);
1736
1737        sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1738        lno = prepare_lines(&sb);
1739
1740        if (bottom < 1)
1741                bottom = 1;
1742        if (top < 1)
1743                top = lno;
1744        bottom--;
1745        if (lno < top)
1746                die("file %s has only %lu lines", path, lno);
1747
1748        ent = xcalloc(1, sizeof(*ent));
1749        ent->lno = bottom;
1750        ent->num_lines = top - bottom;
1751        ent->suspect = o;
1752        ent->s_lno = bottom;
1753
1754        sb.ent = ent;
1755        sb.path = path;
1756
1757        if (revs_file && read_ancestry(revs_file))
1758                die("reading graft file %s failed: %s",
1759                    revs_file, strerror(errno));
1760
1761        assign_blame(&sb, &revs, opt);
1762
1763        coalesce(&sb);
1764
1765        if (!(output_option & OUTPUT_PORCELAIN))
1766                find_alignment(&sb, &output_option);
1767
1768        output(&sb, output_option);
1769        free((void *)sb.final_buf);
1770        for (ent = sb.ent; ent; ) {
1771                struct blame_entry *e = ent->next;
1772                free(ent);
1773                ent = e;
1774        }
1775        return 0;
1776}