1/* 2 * Pickaxe 3 * 4 * Copyright (c) 2006, Junio C Hamano 5 */ 6 7#include"cache.h" 8#include"builtin.h" 9#include"blob.h" 10#include"commit.h" 11#include"tag.h" 12#include"tree-walk.h" 13#include"diff.h" 14#include"diffcore.h" 15#include"revision.h" 16#include"quote.h" 17#include"xdiff-interface.h" 18#include"cache-tree.h" 19 20static char blame_usage[] = 21"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n" 22" -c Use the same output mode as git-annotate (Default: off)\n" 23" -b Show blank SHA-1 for boundary commits (Default: off)\n" 24" -l Show long commit SHA1 (Default: off)\n" 25" --root Do not treat root commits as boundaries (Default: off)\n" 26" -t Show raw timestamp (Default: off)\n" 27" -f, --show-name Show original filename (Default: auto)\n" 28" -n, --show-number Show original linenumber (Default: off)\n" 29" -p, --porcelain Show in a format designed for machine consumption\n" 30" -L n,m Process only line range n,m, counting from 1\n" 31" -M, -C Find line movements within and across files\n" 32" --incremental Show blame entries as we find them, incrementally\n" 33" --contents file Use <file>'s contents as the final image\n" 34" -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; 35 36static int longest_file; 37static int longest_author; 38static int max_orig_digits; 39static int max_digits; 40static int max_score_digits; 41static int show_root; 42static int blank_boundary; 43static int incremental; 44static int cmd_is_annotate; 45 46#ifndef DEBUG 47#define DEBUG 0 48#endif 49 50/* stats */ 51static int num_read_blob; 52static int num_get_patch; 53static int num_commits; 54 55#define PICKAXE_BLAME_MOVE 01 56#define PICKAXE_BLAME_COPY 02 57#define PICKAXE_BLAME_COPY_HARDER 04 58#define PICKAXE_BLAME_COPY_HARDEST 010 59 60/* 61 * blame for a blame_entry with score lower than these thresholds 62 * is not passed to the parent using move/copy logic. 63 */ 64static unsigned blame_move_score; 65static unsigned blame_copy_score; 66#define BLAME_DEFAULT_MOVE_SCORE 20 67#define BLAME_DEFAULT_COPY_SCORE 40 68 69/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ 70#define METAINFO_SHOWN (1u<<12) 71#define MORE_THAN_ONE_PATH (1u<<13) 72 73/* 74 * One blob in a commit that is being suspected 75 */ 76struct origin { 77int refcnt; 78struct commit *commit; 79 mmfile_t file; 80unsigned char blob_sha1[20]; 81char path[FLEX_ARRAY]; 82}; 83 84/* 85 * Given an origin, prepare mmfile_t structure to be used by the 86 * diff machinery 87 */ 88static char*fill_origin_blob(struct origin *o, mmfile_t *file) 89{ 90if(!o->file.ptr) { 91enum object_type type; 92 num_read_blob++; 93 file->ptr =read_sha1_file(o->blob_sha1, &type, 94(unsigned long*)(&(file->size))); 95 o->file = *file; 96} 97else 98*file = o->file; 99return file->ptr; 100} 101 102/* 103 * Origin is refcounted and usually we keep the blob contents to be 104 * reused. 105 */ 106staticinlinestruct origin *origin_incref(struct origin *o) 107{ 108if(o) 109 o->refcnt++; 110return o; 111} 112 113static voidorigin_decref(struct origin *o) 114{ 115if(o && --o->refcnt <=0) { 116if(o->file.ptr) 117free(o->file.ptr); 118memset(o,0,sizeof(*o)); 119free(o); 120} 121} 122 123/* 124 * Each group of lines is described by a blame_entry; it can be split 125 * as we pass blame to the parents. They form a linked list in the 126 * scoreboard structure, sorted by the target line number. 127 */ 128struct blame_entry { 129struct blame_entry *prev; 130struct blame_entry *next; 131 132/* the first line of this group in the final image; 133 * internally all line numbers are 0 based. 134 */ 135int lno; 136 137/* how many lines this group has */ 138int num_lines; 139 140/* the commit that introduced this group into the final image */ 141struct origin *suspect; 142 143/* true if the suspect is truly guilty; false while we have not 144 * checked if the group came from one of its parents. 145 */ 146char guilty; 147 148/* the line number of the first line of this group in the 149 * suspect's file; internally all line numbers are 0 based. 150 */ 151int s_lno; 152 153/* how significant this entry is -- cached to avoid 154 * scanning the lines over and over. 155 */ 156unsigned score; 157}; 158 159/* 160 * The current state of the blame assignment. 161 */ 162struct scoreboard { 163/* the final commit (i.e. where we started digging from) */ 164struct commit *final; 165 166const char*path; 167 168/* 169 * The contents in the final image. 170 * Used by many functions to obtain contents of the nth line, 171 * indexed with scoreboard.lineno[blame_entry.lno]. 172 */ 173const char*final_buf; 174unsigned long final_buf_size; 175 176/* linked list of blames */ 177struct blame_entry *ent; 178 179/* look-up a line in the final buffer */ 180int num_lines; 181int*lineno; 182}; 183 184staticinlineintsame_suspect(struct origin *a,struct origin *b) 185{ 186if(a == b) 187return1; 188if(a->commit != b->commit) 189return0; 190return!strcmp(a->path, b->path); 191} 192 193static voidsanity_check_refcnt(struct scoreboard *); 194 195/* 196 * If two blame entries that are next to each other came from 197 * contiguous lines in the same origin (i.e. <commit, path> pair), 198 * merge them together. 199 */ 200static voidcoalesce(struct scoreboard *sb) 201{ 202struct blame_entry *ent, *next; 203 204for(ent = sb->ent; ent && (next = ent->next); ent = next) { 205if(same_suspect(ent->suspect, next->suspect) && 206 ent->guilty == next->guilty && 207 ent->s_lno + ent->num_lines == next->s_lno) { 208 ent->num_lines += next->num_lines; 209 ent->next = next->next; 210if(ent->next) 211 ent->next->prev = ent; 212origin_decref(next->suspect); 213free(next); 214 ent->score =0; 215 next = ent;/* again */ 216} 217} 218 219if(DEBUG)/* sanity */ 220sanity_check_refcnt(sb); 221} 222 223/* 224 * Given a commit and a path in it, create a new origin structure. 225 * The callers that add blame to the scoreboard should use 226 * get_origin() to obtain shared, refcounted copy instead of calling 227 * this function directly. 228 */ 229static struct origin *make_origin(struct commit *commit,const char*path) 230{ 231struct origin *o; 232 o =xcalloc(1,sizeof(*o) +strlen(path) +1); 233 o->commit = commit; 234 o->refcnt =1; 235strcpy(o->path, path); 236return o; 237} 238 239/* 240 * Locate an existing origin or create a new one. 241 */ 242static struct origin *get_origin(struct scoreboard *sb, 243struct commit *commit, 244const char*path) 245{ 246struct blame_entry *e; 247 248for(e = sb->ent; e; e = e->next) { 249if(e->suspect->commit == commit && 250!strcmp(e->suspect->path, path)) 251returnorigin_incref(e->suspect); 252} 253returnmake_origin(commit, path); 254} 255 256/* 257 * Fill the blob_sha1 field of an origin if it hasn't, so that later 258 * call to fill_origin_blob() can use it to locate the data. blob_sha1 259 * for an origin is also used to pass the blame for the entire file to 260 * the parent to detect the case where a child's blob is identical to 261 * that of its parent's. 262 */ 263static intfill_blob_sha1(struct origin *origin) 264{ 265unsigned mode; 266 267if(!is_null_sha1(origin->blob_sha1)) 268return0; 269if(get_tree_entry(origin->commit->object.sha1, 270 origin->path, 271 origin->blob_sha1, &mode)) 272goto error_out; 273if(sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB) 274goto error_out; 275return0; 276 error_out: 277hashclr(origin->blob_sha1); 278return-1; 279} 280 281/* 282 * We have an origin -- check if the same path exists in the 283 * parent and return an origin structure to represent it. 284 */ 285static struct origin *find_origin(struct scoreboard *sb, 286struct commit *parent, 287struct origin *origin) 288{ 289struct origin *porigin = NULL; 290struct diff_options diff_opts; 291const char*paths[2]; 292 293if(parent->util) { 294/* 295 * Each commit object can cache one origin in that 296 * commit. This is a freestanding copy of origin and 297 * not refcounted. 298 */ 299struct origin *cached = parent->util; 300if(!strcmp(cached->path, origin->path)) { 301/* 302 * The same path between origin and its parent 303 * without renaming -- the most common case. 304 */ 305 porigin =get_origin(sb, parent, cached->path); 306 307/* 308 * If the origin was newly created (i.e. get_origin 309 * would call make_origin if none is found in the 310 * scoreboard), it does not know the blob_sha1, 311 * so copy it. Otherwise porigin was in the 312 * scoreboard and already knows blob_sha1. 313 */ 314if(porigin->refcnt ==1) 315hashcpy(porigin->blob_sha1, cached->blob_sha1); 316return porigin; 317} 318/* otherwise it was not very useful; free it */ 319free(parent->util); 320 parent->util = NULL; 321} 322 323/* See if the origin->path is different between parent 324 * and origin first. Most of the time they are the 325 * same and diff-tree is fairly efficient about this. 326 */ 327diff_setup(&diff_opts); 328 diff_opts.recursive =1; 329 diff_opts.detect_rename =0; 330 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; 331 paths[0] = origin->path; 332 paths[1] = NULL; 333 334diff_tree_setup_paths(paths, &diff_opts); 335if(diff_setup_done(&diff_opts) <0) 336die("diff-setup"); 337 338if(is_null_sha1(origin->commit->object.sha1)) 339do_diff_cache(parent->tree->object.sha1, &diff_opts); 340else 341diff_tree_sha1(parent->tree->object.sha1, 342 origin->commit->tree->object.sha1, 343"", &diff_opts); 344diffcore_std(&diff_opts); 345 346/* It is either one entry that says "modified", or "created", 347 * or nothing. 348 */ 349if(!diff_queued_diff.nr) { 350/* The path is the same as parent */ 351 porigin =get_origin(sb, parent, origin->path); 352hashcpy(porigin->blob_sha1, origin->blob_sha1); 353} 354else if(diff_queued_diff.nr !=1) 355die("internal error in blame::find_origin"); 356else{ 357struct diff_filepair *p = diff_queued_diff.queue[0]; 358switch(p->status) { 359default: 360die("internal error in blame::find_origin (%c)", 361 p->status); 362case'M': 363 porigin =get_origin(sb, parent, origin->path); 364hashcpy(porigin->blob_sha1, p->one->sha1); 365break; 366case'A': 367case'T': 368/* Did not exist in parent, or type changed */ 369break; 370} 371} 372diff_flush(&diff_opts); 373if(porigin) { 374/* 375 * Create a freestanding copy that is not part of 376 * the refcounted origin found in the scoreboard, and 377 * cache it in the commit. 378 */ 379struct origin *cached; 380 381 cached =make_origin(porigin->commit, porigin->path); 382hashcpy(cached->blob_sha1, porigin->blob_sha1); 383 parent->util = cached; 384} 385return porigin; 386} 387 388/* 389 * We have an origin -- find the path that corresponds to it in its 390 * parent and return an origin structure to represent it. 391 */ 392static struct origin *find_rename(struct scoreboard *sb, 393struct commit *parent, 394struct origin *origin) 395{ 396struct origin *porigin = NULL; 397struct diff_options diff_opts; 398int i; 399const char*paths[2]; 400 401diff_setup(&diff_opts); 402 diff_opts.recursive =1; 403 diff_opts.detect_rename = DIFF_DETECT_RENAME; 404 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; 405 diff_opts.single_follow = origin->path; 406 paths[0] = NULL; 407diff_tree_setup_paths(paths, &diff_opts); 408if(diff_setup_done(&diff_opts) <0) 409die("diff-setup"); 410 411if(is_null_sha1(origin->commit->object.sha1)) 412do_diff_cache(parent->tree->object.sha1, &diff_opts); 413else 414diff_tree_sha1(parent->tree->object.sha1, 415 origin->commit->tree->object.sha1, 416"", &diff_opts); 417diffcore_std(&diff_opts); 418 419for(i =0; i < diff_queued_diff.nr; i++) { 420struct diff_filepair *p = diff_queued_diff.queue[i]; 421if((p->status =='R'|| p->status =='C') && 422!strcmp(p->two->path, origin->path)) { 423 porigin =get_origin(sb, parent, p->one->path); 424hashcpy(porigin->blob_sha1, p->one->sha1); 425break; 426} 427} 428diff_flush(&diff_opts); 429return porigin; 430} 431 432/* 433 * Parsing of patch chunks... 434 */ 435struct chunk { 436/* line number in postimage; up to but not including this 437 * line is the same as preimage 438 */ 439int same; 440 441/* preimage line number after this chunk */ 442int p_next; 443 444/* postimage line number after this chunk */ 445int t_next; 446}; 447 448struct patch { 449struct chunk *chunks; 450int num; 451}; 452 453struct blame_diff_state { 454struct xdiff_emit_state xm; 455struct patch *ret; 456unsigned hunk_post_context; 457unsigned hunk_in_pre_context :1; 458}; 459 460static voidprocess_u_diff(void*state_,char*line,unsigned long len) 461{ 462struct blame_diff_state *state = state_; 463struct chunk *chunk; 464int off1, off2, len1, len2, num; 465 466 num = state->ret->num; 467if(len <4|| line[0] !='@'|| line[1] !='@') { 468if(state->hunk_in_pre_context && line[0] ==' ') 469 state->ret->chunks[num -1].same++; 470else{ 471 state->hunk_in_pre_context =0; 472if(line[0] ==' ') 473 state->hunk_post_context++; 474else 475 state->hunk_post_context =0; 476} 477return; 478} 479 480if(num && state->hunk_post_context) { 481 chunk = &state->ret->chunks[num -1]; 482 chunk->p_next -= state->hunk_post_context; 483 chunk->t_next -= state->hunk_post_context; 484} 485 state->ret->num = ++num; 486 state->ret->chunks =xrealloc(state->ret->chunks, 487sizeof(struct chunk) * num); 488 chunk = &state->ret->chunks[num -1]; 489if(parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) { 490 state->ret->num--; 491return; 492} 493 494/* Line numbers in patch output are one based. */ 495 off1--; 496 off2--; 497 498 chunk->same = len2 ? off2 : (off2 +1); 499 500 chunk->p_next = off1 + (len1 ? len1 :1); 501 chunk->t_next = chunk->same + len2; 502 state->hunk_in_pre_context =1; 503 state->hunk_post_context =0; 504} 505 506static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, 507int context) 508{ 509struct blame_diff_state state; 510 xpparam_t xpp; 511 xdemitconf_t xecfg; 512 xdemitcb_t ecb; 513 514 xpp.flags = XDF_NEED_MINIMAL; 515 xecfg.ctxlen = context; 516 xecfg.flags =0; 517 ecb.outf = xdiff_outf; 518 ecb.priv = &state; 519memset(&state,0,sizeof(state)); 520 state.xm.consume = process_u_diff; 521 state.ret =xmalloc(sizeof(struct patch)); 522 state.ret->chunks = NULL; 523 state.ret->num =0; 524 525xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb); 526 527if(state.ret->num) { 528struct chunk *chunk; 529 chunk = &state.ret->chunks[state.ret->num -1]; 530 chunk->p_next -= state.hunk_post_context; 531 chunk->t_next -= state.hunk_post_context; 532} 533return state.ret; 534} 535 536/* 537 * Run diff between two origins and grab the patch output, so that 538 * we can pass blame for lines origin is currently suspected for 539 * to its parent. 540 */ 541static struct patch *get_patch(struct origin *parent,struct origin *origin) 542{ 543 mmfile_t file_p, file_o; 544struct patch *patch; 545 546fill_origin_blob(parent, &file_p); 547fill_origin_blob(origin, &file_o); 548if(!file_p.ptr || !file_o.ptr) 549return NULL; 550 patch =compare_buffer(&file_p, &file_o,0); 551 num_get_patch++; 552return patch; 553} 554 555static voidfree_patch(struct patch *p) 556{ 557free(p->chunks); 558free(p); 559} 560 561/* 562 * Link in a new blame entry to the scoreboard. Entries that cover the 563 * same line range have been removed from the scoreboard previously. 564 */ 565static voidadd_blame_entry(struct scoreboard *sb,struct blame_entry *e) 566{ 567struct blame_entry *ent, *prev = NULL; 568 569origin_incref(e->suspect); 570 571for(ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next) 572 prev = ent; 573 574/* prev, if not NULL, is the last one that is below e */ 575 e->prev = prev; 576if(prev) { 577 e->next = prev->next; 578 prev->next = e; 579} 580else{ 581 e->next = sb->ent; 582 sb->ent = e; 583} 584if(e->next) 585 e->next->prev = e; 586} 587 588/* 589 * src typically is on-stack; we want to copy the information in it to 590 * an malloced blame_entry that is already on the linked list of the 591 * scoreboard. The origin of dst loses a refcnt while the origin of src 592 * gains one. 593 */ 594static voiddup_entry(struct blame_entry *dst,struct blame_entry *src) 595{ 596struct blame_entry *p, *n; 597 598 p = dst->prev; 599 n = dst->next; 600origin_incref(src->suspect); 601origin_decref(dst->suspect); 602memcpy(dst, src,sizeof(*src)); 603 dst->prev = p; 604 dst->next = n; 605 dst->score =0; 606} 607 608static const char*nth_line(struct scoreboard *sb,int lno) 609{ 610return sb->final_buf + sb->lineno[lno]; 611} 612 613/* 614 * It is known that lines between tlno to same came from parent, and e 615 * has an overlap with that range. it also is known that parent's 616 * line plno corresponds to e's line tlno. 617 * 618 * <---- e -----> 619 * <------> 620 * <------------> 621 * <------------> 622 * <------------------> 623 * 624 * Split e into potentially three parts; before this chunk, the chunk 625 * to be blamed for the parent, and after that portion. 626 */ 627static voidsplit_overlap(struct blame_entry *split, 628struct blame_entry *e, 629int tlno,int plno,int same, 630struct origin *parent) 631{ 632int chunk_end_lno; 633memset(split,0,sizeof(struct blame_entry [3])); 634 635if(e->s_lno < tlno) { 636/* there is a pre-chunk part not blamed on parent */ 637 split[0].suspect =origin_incref(e->suspect); 638 split[0].lno = e->lno; 639 split[0].s_lno = e->s_lno; 640 split[0].num_lines = tlno - e->s_lno; 641 split[1].lno = e->lno + tlno - e->s_lno; 642 split[1].s_lno = plno; 643} 644else{ 645 split[1].lno = e->lno; 646 split[1].s_lno = plno + (e->s_lno - tlno); 647} 648 649if(same < e->s_lno + e->num_lines) { 650/* there is a post-chunk part not blamed on parent */ 651 split[2].suspect =origin_incref(e->suspect); 652 split[2].lno = e->lno + (same - e->s_lno); 653 split[2].s_lno = e->s_lno + (same - e->s_lno); 654 split[2].num_lines = e->s_lno + e->num_lines - same; 655 chunk_end_lno = split[2].lno; 656} 657else 658 chunk_end_lno = e->lno + e->num_lines; 659 split[1].num_lines = chunk_end_lno - split[1].lno; 660 661/* 662 * if it turns out there is nothing to blame the parent for, 663 * forget about the splitting. !split[1].suspect signals this. 664 */ 665if(split[1].num_lines <1) 666return; 667 split[1].suspect =origin_incref(parent); 668} 669 670/* 671 * split_overlap() divided an existing blame e into up to three parts 672 * in split. Adjust the linked list of blames in the scoreboard to 673 * reflect the split. 674 */ 675static voidsplit_blame(struct scoreboard *sb, 676struct blame_entry *split, 677struct blame_entry *e) 678{ 679struct blame_entry *new_entry; 680 681if(split[0].suspect && split[2].suspect) { 682/* The first part (reuse storage for the existing entry e) */ 683dup_entry(e, &split[0]); 684 685/* The last part -- me */ 686 new_entry =xmalloc(sizeof(*new_entry)); 687memcpy(new_entry, &(split[2]),sizeof(struct blame_entry)); 688add_blame_entry(sb, new_entry); 689 690/* ... and the middle part -- parent */ 691 new_entry =xmalloc(sizeof(*new_entry)); 692memcpy(new_entry, &(split[1]),sizeof(struct blame_entry)); 693add_blame_entry(sb, new_entry); 694} 695else if(!split[0].suspect && !split[2].suspect) 696/* 697 * The parent covers the entire area; reuse storage for 698 * e and replace it with the parent. 699 */ 700dup_entry(e, &split[1]); 701else if(split[0].suspect) { 702/* me and then parent */ 703dup_entry(e, &split[0]); 704 705 new_entry =xmalloc(sizeof(*new_entry)); 706memcpy(new_entry, &(split[1]),sizeof(struct blame_entry)); 707add_blame_entry(sb, new_entry); 708} 709else{ 710/* parent and then me */ 711dup_entry(e, &split[1]); 712 713 new_entry =xmalloc(sizeof(*new_entry)); 714memcpy(new_entry, &(split[2]),sizeof(struct blame_entry)); 715add_blame_entry(sb, new_entry); 716} 717 718if(DEBUG) {/* sanity */ 719struct blame_entry *ent; 720int lno = sb->ent->lno, corrupt =0; 721 722for(ent = sb->ent; ent; ent = ent->next) { 723if(lno != ent->lno) 724 corrupt =1; 725if(ent->s_lno <0) 726 corrupt =1; 727 lno += ent->num_lines; 728} 729if(corrupt) { 730 lno = sb->ent->lno; 731for(ent = sb->ent; ent; ent = ent->next) { 732printf("L%8d l%8d n%8d\n", 733 lno, ent->lno, ent->num_lines); 734 lno = ent->lno + ent->num_lines; 735} 736die("oops"); 737} 738} 739} 740 741/* 742 * After splitting the blame, the origins used by the 743 * on-stack blame_entry should lose one refcnt each. 744 */ 745static voiddecref_split(struct blame_entry *split) 746{ 747int i; 748 749for(i =0; i <3; i++) 750origin_decref(split[i].suspect); 751} 752 753/* 754 * Helper for blame_chunk(). blame_entry e is known to overlap with 755 * the patch hunk; split it and pass blame to the parent. 756 */ 757static voidblame_overlap(struct scoreboard *sb,struct blame_entry *e, 758int tlno,int plno,int same, 759struct origin *parent) 760{ 761struct blame_entry split[3]; 762 763split_overlap(split, e, tlno, plno, same, parent); 764if(split[1].suspect) 765split_blame(sb, split, e); 766decref_split(split); 767} 768 769/* 770 * Find the line number of the last line the target is suspected for. 771 */ 772static intfind_last_in_target(struct scoreboard *sb,struct origin *target) 773{ 774struct blame_entry *e; 775int last_in_target = -1; 776 777for(e = sb->ent; e; e = e->next) { 778if(e->guilty || !same_suspect(e->suspect, target)) 779continue; 780if(last_in_target < e->s_lno + e->num_lines) 781 last_in_target = e->s_lno + e->num_lines; 782} 783return last_in_target; 784} 785 786/* 787 * Process one hunk from the patch between the current suspect for 788 * blame_entry e and its parent. Find and split the overlap, and 789 * pass blame to the overlapping part to the parent. 790 */ 791static voidblame_chunk(struct scoreboard *sb, 792int tlno,int plno,int same, 793struct origin *target,struct origin *parent) 794{ 795struct blame_entry *e; 796 797for(e = sb->ent; e; e = e->next) { 798if(e->guilty || !same_suspect(e->suspect, target)) 799continue; 800if(same <= e->s_lno) 801continue; 802if(tlno < e->s_lno + e->num_lines) 803blame_overlap(sb, e, tlno, plno, same, parent); 804} 805} 806 807/* 808 * We are looking at the origin 'target' and aiming to pass blame 809 * for the lines it is suspected to its parent. Run diff to find 810 * which lines came from parent and pass blame for them. 811 */ 812static intpass_blame_to_parent(struct scoreboard *sb, 813struct origin *target, 814struct origin *parent) 815{ 816int i, last_in_target, plno, tlno; 817struct patch *patch; 818 819 last_in_target =find_last_in_target(sb, target); 820if(last_in_target <0) 821return1;/* nothing remains for this target */ 822 823 patch =get_patch(parent, target); 824 plno = tlno =0; 825for(i =0; i < patch->num; i++) { 826struct chunk *chunk = &patch->chunks[i]; 827 828blame_chunk(sb, tlno, plno, chunk->same, target, parent); 829 plno = chunk->p_next; 830 tlno = chunk->t_next; 831} 832/* The rest (i.e. anything after tlno) are the same as the parent */ 833blame_chunk(sb, tlno, plno, last_in_target, target, parent); 834 835free_patch(patch); 836return0; 837} 838 839/* 840 * The lines in blame_entry after splitting blames many times can become 841 * very small and trivial, and at some point it becomes pointless to 842 * blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any 843 * ordinary C program, and it is not worth to say it was copied from 844 * totally unrelated file in the parent. 845 * 846 * Compute how trivial the lines in the blame_entry are. 847 */ 848static unsignedent_score(struct scoreboard *sb,struct blame_entry *e) 849{ 850unsigned score; 851const char*cp, *ep; 852 853if(e->score) 854return e->score; 855 856 score =1; 857 cp =nth_line(sb, e->lno); 858 ep =nth_line(sb, e->lno + e->num_lines); 859while(cp < ep) { 860unsigned ch = *((unsigned char*)cp); 861if(isalnum(ch)) 862 score++; 863 cp++; 864} 865 e->score = score; 866return score; 867} 868 869/* 870 * best_so_far[] and this[] are both a split of an existing blame_entry 871 * that passes blame to the parent. Maintain best_so_far the best split 872 * so far, by comparing this and best_so_far and copying this into 873 * bst_so_far as needed. 874 */ 875static voidcopy_split_if_better(struct scoreboard *sb, 876struct blame_entry *best_so_far, 877struct blame_entry *this) 878{ 879int i; 880 881if(!this[1].suspect) 882return; 883if(best_so_far[1].suspect) { 884if(ent_score(sb, &this[1]) <ent_score(sb, &best_so_far[1])) 885return; 886} 887 888for(i =0; i <3; i++) 889origin_incref(this[i].suspect); 890decref_split(best_so_far); 891memcpy(best_so_far,this,sizeof(struct blame_entry [3])); 892} 893 894/* 895 * We are looking at a part of the final image represented by 896 * ent (tlno and same are offset by ent->s_lno). 897 * tlno is where we are looking at in the final image. 898 * up to (but not including) same match preimage. 899 * plno is where we are looking at in the preimage. 900 * 901 * <-------------- final image ----------------------> 902 * <------ent------> 903 * ^tlno ^same 904 * <---------preimage-----> 905 * ^plno 906 * 907 * All line numbers are 0-based. 908 */ 909static voidhandle_split(struct scoreboard *sb, 910struct blame_entry *ent, 911int tlno,int plno,int same, 912struct origin *parent, 913struct blame_entry *split) 914{ 915if(ent->num_lines <= tlno) 916return; 917if(tlno < same) { 918struct blame_entry this[3]; 919 tlno += ent->s_lno; 920 same += ent->s_lno; 921split_overlap(this, ent, tlno, plno, same, parent); 922copy_split_if_better(sb, split,this); 923decref_split(this); 924} 925} 926 927/* 928 * Find the lines from parent that are the same as ent so that 929 * we can pass blames to it. file_p has the blob contents for 930 * the parent. 931 */ 932static voidfind_copy_in_blob(struct scoreboard *sb, 933struct blame_entry *ent, 934struct origin *parent, 935struct blame_entry *split, 936 mmfile_t *file_p) 937{ 938const char*cp; 939int cnt; 940 mmfile_t file_o; 941struct patch *patch; 942int i, plno, tlno; 943 944/* 945 * Prepare mmfile that contains only the lines in ent. 946 */ 947 cp =nth_line(sb, ent->lno); 948 file_o.ptr = (char*) cp; 949 cnt = ent->num_lines; 950 951while(cnt && cp < sb->final_buf + sb->final_buf_size) { 952if(*cp++ =='\n') 953 cnt--; 954} 955 file_o.size = cp - file_o.ptr; 956 957 patch =compare_buffer(file_p, &file_o,1); 958 959/* 960 * file_o is a part of final image we are annotating. 961 * file_p partially may match that image. 962 */ 963memset(split,0,sizeof(struct blame_entry [3])); 964 plno = tlno =0; 965for(i =0; i < patch->num; i++) { 966struct chunk *chunk = &patch->chunks[i]; 967 968handle_split(sb, ent, tlno, plno, chunk->same, parent, split); 969 plno = chunk->p_next; 970 tlno = chunk->t_next; 971} 972/* remainder, if any, all match the preimage */ 973handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split); 974free_patch(patch); 975} 976 977/* 978 * See if lines currently target is suspected for can be attributed to 979 * parent. 980 */ 981static intfind_move_in_parent(struct scoreboard *sb, 982struct origin *target, 983struct origin *parent) 984{ 985int last_in_target, made_progress; 986struct blame_entry *e, split[3]; 987 mmfile_t file_p; 988 989 last_in_target =find_last_in_target(sb, target); 990if(last_in_target <0) 991return1;/* nothing remains for this target */ 992 993fill_origin_blob(parent, &file_p); 994if(!file_p.ptr) 995return0; 996 997 made_progress =1; 998while(made_progress) { 999 made_progress =0;1000for(e = sb->ent; e; e = e->next) {1001if(e->guilty || !same_suspect(e->suspect, target))1002continue;1003find_copy_in_blob(sb, e, parent, split, &file_p);1004if(split[1].suspect &&1005 blame_move_score <ent_score(sb, &split[1])) {1006split_blame(sb, split, e);1007 made_progress =1;1008}1009decref_split(split);1010}1011}1012return0;1013}10141015struct blame_list {1016struct blame_entry *ent;1017struct blame_entry split[3];1018};10191020/*1021 * Count the number of entries the target is suspected for,1022 * and prepare a list of entry and the best split.1023 */1024static struct blame_list *setup_blame_list(struct scoreboard *sb,1025struct origin *target,1026int*num_ents_p)1027{1028struct blame_entry *e;1029int num_ents, i;1030struct blame_list *blame_list = NULL;10311032for(e = sb->ent, num_ents =0; e; e = e->next)1033if(!e->guilty &&same_suspect(e->suspect, target))1034 num_ents++;1035if(num_ents) {1036 blame_list =xcalloc(num_ents,sizeof(struct blame_list));1037for(e = sb->ent, i =0; e; e = e->next)1038if(!e->guilty &&same_suspect(e->suspect, target))1039 blame_list[i++].ent = e;1040}1041*num_ents_p = num_ents;1042return blame_list;1043}10441045/*1046 * For lines target is suspected for, see if we can find code movement1047 * across file boundary from the parent commit. porigin is the path1048 * in the parent we already tried.1049 */1050static intfind_copy_in_parent(struct scoreboard *sb,1051struct origin *target,1052struct commit *parent,1053struct origin *porigin,1054int opt)1055{1056struct diff_options diff_opts;1057const char*paths[1];1058int i, j;1059int retval;1060struct blame_list *blame_list;1061int num_ents;10621063 blame_list =setup_blame_list(sb, target, &num_ents);1064if(!blame_list)1065return1;/* nothing remains for this target */10661067diff_setup(&diff_opts);1068 diff_opts.recursive =1;1069 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;10701071 paths[0] = NULL;1072diff_tree_setup_paths(paths, &diff_opts);1073if(diff_setup_done(&diff_opts) <0)1074die("diff-setup");10751076/* Try "find copies harder" on new path if requested;1077 * we do not want to use diffcore_rename() actually to1078 * match things up; find_copies_harder is set only to1079 * force diff_tree_sha1() to feed all filepairs to diff_queue,1080 * and this code needs to be after diff_setup_done(), which1081 * usually makes find-copies-harder imply copy detection.1082 */1083if((opt & PICKAXE_BLAME_COPY_HARDEST)1084|| ((opt & PICKAXE_BLAME_COPY_HARDER)1085&& (!porigin ||strcmp(target->path, porigin->path))))1086 diff_opts.find_copies_harder =1;10871088if(is_null_sha1(target->commit->object.sha1))1089do_diff_cache(parent->tree->object.sha1, &diff_opts);1090else1091diff_tree_sha1(parent->tree->object.sha1,1092 target->commit->tree->object.sha1,1093"", &diff_opts);10941095if(!diff_opts.find_copies_harder)1096diffcore_std(&diff_opts);10971098 retval =0;1099while(1) {1100int made_progress =0;11011102for(i =0; i < diff_queued_diff.nr; i++) {1103struct diff_filepair *p = diff_queued_diff.queue[i];1104struct origin *norigin;1105 mmfile_t file_p;1106struct blame_entry this[3];11071108if(!DIFF_FILE_VALID(p->one))1109continue;/* does not exist in parent */1110if(porigin && !strcmp(p->one->path, porigin->path))1111/* find_move already dealt with this path */1112continue;11131114 norigin =get_origin(sb, parent, p->one->path);1115hashcpy(norigin->blob_sha1, p->one->sha1);1116fill_origin_blob(norigin, &file_p);1117if(!file_p.ptr)1118continue;11191120for(j =0; j < num_ents; j++) {1121find_copy_in_blob(sb, blame_list[j].ent,1122 norigin,this, &file_p);1123copy_split_if_better(sb, blame_list[j].split,1124this);1125decref_split(this);1126}1127origin_decref(norigin);1128}11291130for(j =0; j < num_ents; j++) {1131struct blame_entry *split = blame_list[j].split;1132if(split[1].suspect &&1133 blame_copy_score <ent_score(sb, &split[1])) {1134split_blame(sb, split, blame_list[j].ent);1135 made_progress =1;1136}1137decref_split(split);1138}1139free(blame_list);11401141if(!made_progress)1142break;1143 blame_list =setup_blame_list(sb, target, &num_ents);1144if(!blame_list) {1145 retval =1;1146break;1147}1148}1149diff_flush(&diff_opts);11501151return retval;1152}11531154/*1155 * The blobs of origin and porigin exactly match, so everything1156 * origin is suspected for can be blamed on the parent.1157 */1158static voidpass_whole_blame(struct scoreboard *sb,1159struct origin *origin,struct origin *porigin)1160{1161struct blame_entry *e;11621163if(!porigin->file.ptr && origin->file.ptr) {1164/* Steal its file */1165 porigin->file = origin->file;1166 origin->file.ptr = NULL;1167}1168for(e = sb->ent; e; e = e->next) {1169if(!same_suspect(e->suspect, origin))1170continue;1171origin_incref(porigin);1172origin_decref(e->suspect);1173 e->suspect = porigin;1174}1175}11761177#define MAXPARENT 1611781179static voidpass_blame(struct scoreboard *sb,struct origin *origin,int opt)1180{1181int i, pass;1182struct commit *commit = origin->commit;1183struct commit_list *parent;1184struct origin *parent_origin[MAXPARENT], *porigin;11851186memset(parent_origin,0,sizeof(parent_origin));11871188/* The first pass looks for unrenamed path to optimize for1189 * common cases, then we look for renames in the second pass.1190 */1191for(pass =0; pass <2; pass++) {1192struct origin *(*find)(struct scoreboard *,1193struct commit *,struct origin *);1194 find = pass ? find_rename : find_origin;11951196for(i =0, parent = commit->parents;1197 i < MAXPARENT && parent;1198 parent = parent->next, i++) {1199struct commit *p = parent->item;1200int j, same;12011202if(parent_origin[i])1203continue;1204if(parse_commit(p))1205continue;1206 porigin =find(sb, p, origin);1207if(!porigin)1208continue;1209if(!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {1210pass_whole_blame(sb, origin, porigin);1211origin_decref(porigin);1212goto finish;1213}1214for(j = same =0; j < i; j++)1215if(parent_origin[j] &&1216!hashcmp(parent_origin[j]->blob_sha1,1217 porigin->blob_sha1)) {1218 same =1;1219break;1220}1221if(!same)1222 parent_origin[i] = porigin;1223else1224origin_decref(porigin);1225}1226}12271228 num_commits++;1229for(i =0, parent = commit->parents;1230 i < MAXPARENT && parent;1231 parent = parent->next, i++) {1232struct origin *porigin = parent_origin[i];1233if(!porigin)1234continue;1235if(pass_blame_to_parent(sb, origin, porigin))1236goto finish;1237}12381239/*1240 * Optionally find moves in parents' files.1241 */1242if(opt & PICKAXE_BLAME_MOVE)1243for(i =0, parent = commit->parents;1244 i < MAXPARENT && parent;1245 parent = parent->next, i++) {1246struct origin *porigin = parent_origin[i];1247if(!porigin)1248continue;1249if(find_move_in_parent(sb, origin, porigin))1250goto finish;1251}12521253/*1254 * Optionally find copies from parents' files.1255 */1256if(opt & PICKAXE_BLAME_COPY)1257for(i =0, parent = commit->parents;1258 i < MAXPARENT && parent;1259 parent = parent->next, i++) {1260struct origin *porigin = parent_origin[i];1261if(find_copy_in_parent(sb, origin, parent->item,1262 porigin, opt))1263goto finish;1264}12651266 finish:1267for(i =0; i < MAXPARENT; i++)1268origin_decref(parent_origin[i]);1269}12701271/*1272 * Information on commits, used for output.1273 */1274struct commit_info1275{1276const char*author;1277const char*author_mail;1278unsigned long author_time;1279const char*author_tz;12801281/* filled only when asked for details */1282const char*committer;1283const char*committer_mail;1284unsigned long committer_time;1285const char*committer_tz;12861287const char*summary;1288};12891290/*1291 * Parse author/committer line in the commit object buffer1292 */1293static voidget_ac_line(const char*inbuf,const char*what,1294int bufsz,char*person,const char**mail,1295unsigned long*time,const char**tz)1296{1297int len;1298char*tmp, *endp;12991300 tmp =strstr(inbuf, what);1301if(!tmp)1302goto error_out;1303 tmp +=strlen(what);1304 endp =strchr(tmp,'\n');1305if(!endp)1306 len =strlen(tmp);1307else1308 len = endp - tmp;1309if(bufsz <= len) {1310 error_out:1311/* Ugh */1312*mail = *tz ="(unknown)";1313*time =0;1314return;1315}1316memcpy(person, tmp, len);13171318 tmp = person;1319 tmp += len;1320*tmp =0;1321while(*tmp !=' ')1322 tmp--;1323*tz = tmp+1;13241325*tmp =0;1326while(*tmp !=' ')1327 tmp--;1328*time =strtoul(tmp, NULL,10);13291330*tmp =0;1331while(*tmp !=' ')1332 tmp--;1333*mail = tmp +1;1334*tmp =0;1335}13361337static voidget_commit_info(struct commit *commit,1338struct commit_info *ret,1339int detailed)1340{1341int len;1342char*tmp, *endp;1343static char author_buf[1024];1344static char committer_buf[1024];1345static char summary_buf[1024];13461347/*1348 * We've operated without save_commit_buffer, so1349 * we now need to populate them for output.1350 */1351if(!commit->buffer) {1352enum object_type type;1353unsigned long size;1354 commit->buffer =1355read_sha1_file(commit->object.sha1, &type, &size);1356}1357 ret->author = author_buf;1358get_ac_line(commit->buffer,"\nauthor ",1359sizeof(author_buf), author_buf, &ret->author_mail,1360&ret->author_time, &ret->author_tz);13611362if(!detailed)1363return;13641365 ret->committer = committer_buf;1366get_ac_line(commit->buffer,"\ncommitter ",1367sizeof(committer_buf), committer_buf, &ret->committer_mail,1368&ret->committer_time, &ret->committer_tz);13691370 ret->summary = summary_buf;1371 tmp =strstr(commit->buffer,"\n\n");1372if(!tmp) {1373 error_out:1374sprintf(summary_buf,"(%s)",sha1_to_hex(commit->object.sha1));1375return;1376}1377 tmp +=2;1378 endp =strchr(tmp,'\n');1379if(!endp)1380 endp = tmp +strlen(tmp);1381 len = endp - tmp;1382if(len >=sizeof(summary_buf) || len ==0)1383goto error_out;1384memcpy(summary_buf, tmp, len);1385 summary_buf[len] =0;1386}13871388/*1389 * To allow LF and other nonportable characters in pathnames,1390 * they are c-style quoted as needed.1391 */1392static voidwrite_filename_info(const char*path)1393{1394printf("filename ");1395write_name_quoted(NULL,0, path,1, stdout);1396putchar('\n');1397}13981399/*1400 * The blame_entry is found to be guilty for the range. Mark it1401 * as such, and show it in incremental output.1402 */1403static voidfound_guilty_entry(struct blame_entry *ent)1404{1405if(ent->guilty)1406return;1407 ent->guilty =1;1408if(incremental) {1409struct origin *suspect = ent->suspect;14101411printf("%s %d %d %d\n",1412sha1_to_hex(suspect->commit->object.sha1),1413 ent->s_lno +1, ent->lno +1, ent->num_lines);1414if(!(suspect->commit->object.flags & METAINFO_SHOWN)) {1415struct commit_info ci;1416 suspect->commit->object.flags |= METAINFO_SHOWN;1417get_commit_info(suspect->commit, &ci,1);1418printf("author%s\n", ci.author);1419printf("author-mail%s\n", ci.author_mail);1420printf("author-time%lu\n", ci.author_time);1421printf("author-tz%s\n", ci.author_tz);1422printf("committer%s\n", ci.committer);1423printf("committer-mail%s\n", ci.committer_mail);1424printf("committer-time%lu\n", ci.committer_time);1425printf("committer-tz%s\n", ci.committer_tz);1426printf("summary%s\n", ci.summary);1427if(suspect->commit->object.flags & UNINTERESTING)1428printf("boundary\n");1429}1430write_filename_info(suspect->path);1431}1432}14331434/*1435 * The main loop -- while the scoreboard has lines whose true origin1436 * is still unknown, pick one blame_entry, and allow its current1437 * suspect to pass blames to its parents.1438 */1439static voidassign_blame(struct scoreboard *sb,struct rev_info *revs,int opt)1440{1441while(1) {1442struct blame_entry *ent;1443struct commit *commit;1444struct origin *suspect = NULL;14451446/* find one suspect to break down */1447for(ent = sb->ent; !suspect && ent; ent = ent->next)1448if(!ent->guilty)1449 suspect = ent->suspect;1450if(!suspect)1451return;/* all done */14521453/*1454 * We will use this suspect later in the loop,1455 * so hold onto it in the meantime.1456 */1457origin_incref(suspect);1458 commit = suspect->commit;1459if(!commit->object.parsed)1460parse_commit(commit);1461if(!(commit->object.flags & UNINTERESTING) &&1462!(revs->max_age != -1&& commit->date < revs->max_age))1463pass_blame(sb, suspect, opt);1464else{1465 commit->object.flags |= UNINTERESTING;1466if(commit->object.parsed)1467mark_parents_uninteresting(commit);1468}1469/* treat root commit as boundary */1470if(!commit->parents && !show_root)1471 commit->object.flags |= UNINTERESTING;14721473/* Take responsibility for the remaining entries */1474for(ent = sb->ent; ent; ent = ent->next)1475if(same_suspect(ent->suspect, suspect))1476found_guilty_entry(ent);1477origin_decref(suspect);14781479if(DEBUG)/* sanity */1480sanity_check_refcnt(sb);1481}1482}14831484static const char*format_time(unsigned long time,const char*tz_str,1485int show_raw_time)1486{1487static char time_buf[128];1488time_t t = time;1489int minutes, tz;1490struct tm *tm;14911492if(show_raw_time) {1493sprintf(time_buf,"%lu%s", time, tz_str);1494return time_buf;1495}14961497 tz =atoi(tz_str);1498 minutes = tz <0? -tz : tz;1499 minutes = (minutes /100)*60+ (minutes %100);1500 minutes = tz <0? -minutes : minutes;1501 t = time + minutes *60;1502 tm =gmtime(&t);15031504strftime(time_buf,sizeof(time_buf),"%Y-%m-%d %H:%M:%S", tm);1505strcat(time_buf, tz_str);1506return time_buf;1507}15081509#define OUTPUT_ANNOTATE_COMPAT 0011510#define OUTPUT_LONG_OBJECT_NAME 0021511#define OUTPUT_RAW_TIMESTAMP 0041512#define OUTPUT_PORCELAIN 0101513#define OUTPUT_SHOW_NAME 0201514#define OUTPUT_SHOW_NUMBER 0401515#define OUTPUT_SHOW_SCORE 010015161517static voidemit_porcelain(struct scoreboard *sb,struct blame_entry *ent)1518{1519int cnt;1520const char*cp;1521struct origin *suspect = ent->suspect;1522char hex[41];15231524strcpy(hex,sha1_to_hex(suspect->commit->object.sha1));1525printf("%s%c%d %d %d\n",1526 hex,1527 ent->guilty ?' ':'*',// purely for debugging1528 ent->s_lno +1,1529 ent->lno +1,1530 ent->num_lines);1531if(!(suspect->commit->object.flags & METAINFO_SHOWN)) {1532struct commit_info ci;1533 suspect->commit->object.flags |= METAINFO_SHOWN;1534get_commit_info(suspect->commit, &ci,1);1535printf("author%s\n", ci.author);1536printf("author-mail%s\n", ci.author_mail);1537printf("author-time%lu\n", ci.author_time);1538printf("author-tz%s\n", ci.author_tz);1539printf("committer%s\n", ci.committer);1540printf("committer-mail%s\n", ci.committer_mail);1541printf("committer-time%lu\n", ci.committer_time);1542printf("committer-tz%s\n", ci.committer_tz);1543write_filename_info(suspect->path);1544printf("summary%s\n", ci.summary);1545if(suspect->commit->object.flags & UNINTERESTING)1546printf("boundary\n");1547}1548else if(suspect->commit->object.flags & MORE_THAN_ONE_PATH)1549write_filename_info(suspect->path);15501551 cp =nth_line(sb, ent->lno);1552for(cnt =0; cnt < ent->num_lines; cnt++) {1553char ch;1554if(cnt)1555printf("%s %d %d\n", hex,1556 ent->s_lno +1+ cnt,1557 ent->lno +1+ cnt);1558putchar('\t');1559do{1560 ch = *cp++;1561putchar(ch);1562}while(ch !='\n'&&1563 cp < sb->final_buf + sb->final_buf_size);1564}1565}15661567static voidemit_other(struct scoreboard *sb,struct blame_entry *ent,int opt)1568{1569int cnt;1570const char*cp;1571struct origin *suspect = ent->suspect;1572struct commit_info ci;1573char hex[41];1574int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);15751576get_commit_info(suspect->commit, &ci,1);1577strcpy(hex,sha1_to_hex(suspect->commit->object.sha1));15781579 cp =nth_line(sb, ent->lno);1580for(cnt =0; cnt < ent->num_lines; cnt++) {1581char ch;1582int length = (opt & OUTPUT_LONG_OBJECT_NAME) ?40:8;15831584if(suspect->commit->object.flags & UNINTERESTING) {1585if(blank_boundary)1586memset(hex,' ', length);1587else if(!cmd_is_annotate) {1588 length--;1589putchar('^');1590}1591}15921593printf("%.*s", length, hex);1594if(opt & OUTPUT_ANNOTATE_COMPAT)1595printf("\t(%10s\t%10s\t%d)", ci.author,1596format_time(ci.author_time, ci.author_tz,1597 show_raw_time),1598 ent->lno +1+ cnt);1599else{1600if(opt & OUTPUT_SHOW_SCORE)1601printf(" %*d%02d",1602 max_score_digits, ent->score,1603 ent->suspect->refcnt);1604if(opt & OUTPUT_SHOW_NAME)1605printf(" %-*.*s", longest_file, longest_file,1606 suspect->path);1607if(opt & OUTPUT_SHOW_NUMBER)1608printf(" %*d", max_orig_digits,1609 ent->s_lno +1+ cnt);1610printf(" (%-*.*s%10s %*d) ",1611 longest_author, longest_author, ci.author,1612format_time(ci.author_time, ci.author_tz,1613 show_raw_time),1614 max_digits, ent->lno +1+ cnt);1615}1616do{1617 ch = *cp++;1618putchar(ch);1619}while(ch !='\n'&&1620 cp < sb->final_buf + sb->final_buf_size);1621}1622}16231624static voidoutput(struct scoreboard *sb,int option)1625{1626struct blame_entry *ent;16271628if(option & OUTPUT_PORCELAIN) {1629for(ent = sb->ent; ent; ent = ent->next) {1630struct blame_entry *oth;1631struct origin *suspect = ent->suspect;1632struct commit *commit = suspect->commit;1633if(commit->object.flags & MORE_THAN_ONE_PATH)1634continue;1635for(oth = ent->next; oth; oth = oth->next) {1636if((oth->suspect->commit != commit) ||1637!strcmp(oth->suspect->path, suspect->path))1638continue;1639 commit->object.flags |= MORE_THAN_ONE_PATH;1640break;1641}1642}1643}16441645for(ent = sb->ent; ent; ent = ent->next) {1646if(option & OUTPUT_PORCELAIN)1647emit_porcelain(sb, ent);1648else{1649emit_other(sb, ent, option);1650}1651}1652}16531654/*1655 * To allow quick access to the contents of nth line in the1656 * final image, prepare an index in the scoreboard.1657 */1658static intprepare_lines(struct scoreboard *sb)1659{1660const char*buf = sb->final_buf;1661unsigned long len = sb->final_buf_size;1662int num =0, incomplete =0, bol =1;16631664if(len && buf[len-1] !='\n')1665 incomplete++;/* incomplete line at the end */1666while(len--) {1667if(bol) {1668 sb->lineno =xrealloc(sb->lineno,1669sizeof(int* ) * (num +1));1670 sb->lineno[num] = buf - sb->final_buf;1671 bol =0;1672}1673if(*buf++ =='\n') {1674 num++;1675 bol =1;1676}1677}1678 sb->lineno =xrealloc(sb->lineno,1679sizeof(int* ) * (num + incomplete +1));1680 sb->lineno[num + incomplete] = buf - sb->final_buf;1681 sb->num_lines = num + incomplete;1682return sb->num_lines;1683}16841685/*1686 * Add phony grafts for use with -S; this is primarily to1687 * support git-cvsserver that wants to give a linear history1688 * to its clients.1689 */1690static intread_ancestry(const char*graft_file)1691{1692FILE*fp =fopen(graft_file,"r");1693char buf[1024];1694if(!fp)1695return-1;1696while(fgets(buf,sizeof(buf), fp)) {1697/* The format is just "Commit Parent1 Parent2 ...\n" */1698int len =strlen(buf);1699struct commit_graft *graft =read_graft_line(buf, len);1700if(graft)1701register_commit_graft(graft,0);1702}1703fclose(fp);1704return0;1705}17061707/*1708 * How many columns do we need to show line numbers in decimal?1709 */1710static intlineno_width(int lines)1711{1712int i, width;17131714for(width =1, i =10; i <= lines +1; width++)1715 i *=10;1716return width;1717}17181719/*1720 * How many columns do we need to show line numbers, authors,1721 * and filenames?1722 */1723static voidfind_alignment(struct scoreboard *sb,int*option)1724{1725int longest_src_lines =0;1726int longest_dst_lines =0;1727unsigned largest_score =0;1728struct blame_entry *e;17291730for(e = sb->ent; e; e = e->next) {1731struct origin *suspect = e->suspect;1732struct commit_info ci;1733int num;17341735if(strcmp(suspect->path, sb->path))1736*option |= OUTPUT_SHOW_NAME;1737 num =strlen(suspect->path);1738if(longest_file < num)1739 longest_file = num;1740if(!(suspect->commit->object.flags & METAINFO_SHOWN)) {1741 suspect->commit->object.flags |= METAINFO_SHOWN;1742get_commit_info(suspect->commit, &ci,1);1743 num =strlen(ci.author);1744if(longest_author < num)1745 longest_author = num;1746}1747 num = e->s_lno + e->num_lines;1748if(longest_src_lines < num)1749 longest_src_lines = num;1750 num = e->lno + e->num_lines;1751if(longest_dst_lines < num)1752 longest_dst_lines = num;1753if(largest_score <ent_score(sb, e))1754 largest_score =ent_score(sb, e);1755}1756 max_orig_digits =lineno_width(longest_src_lines);1757 max_digits =lineno_width(longest_dst_lines);1758 max_score_digits =lineno_width(largest_score);1759}17601761/*1762 * For debugging -- origin is refcounted, and this asserts that1763 * we do not underflow.1764 */1765static voidsanity_check_refcnt(struct scoreboard *sb)1766{1767int baa =0;1768struct blame_entry *ent;17691770for(ent = sb->ent; ent; ent = ent->next) {1771/* Nobody should have zero or negative refcnt */1772if(ent->suspect->refcnt <=0) {1773fprintf(stderr,"%sin%shas negative refcnt%d\n",1774 ent->suspect->path,1775sha1_to_hex(ent->suspect->commit->object.sha1),1776 ent->suspect->refcnt);1777 baa =1;1778}1779}1780for(ent = sb->ent; ent; ent = ent->next) {1781/* Mark the ones that haven't been checked */1782if(0< ent->suspect->refcnt)1783 ent->suspect->refcnt = -ent->suspect->refcnt;1784}1785for(ent = sb->ent; ent; ent = ent->next) {1786/*1787 * ... then pick each and see if they have the the1788 * correct refcnt.1789 */1790int found;1791struct blame_entry *e;1792struct origin *suspect = ent->suspect;17931794if(0< suspect->refcnt)1795continue;1796 suspect->refcnt = -suspect->refcnt;/* Unmark */1797for(found =0, e = sb->ent; e; e = e->next) {1798if(e->suspect != suspect)1799continue;1800 found++;1801}1802if(suspect->refcnt != found) {1803fprintf(stderr,"%sin%shas refcnt%d, not%d\n",1804 ent->suspect->path,1805sha1_to_hex(ent->suspect->commit->object.sha1),1806 ent->suspect->refcnt, found);1807 baa =2;1808}1809}1810if(baa) {1811int opt =0160;1812find_alignment(sb, &opt);1813output(sb, opt);1814die("Baa%d!", baa);1815}1816}18171818/*1819 * Used for the command line parsing; check if the path exists1820 * in the working tree.1821 */1822static inthas_path_in_work_tree(const char*path)1823{1824struct stat st;1825return!lstat(path, &st);1826}18271828static unsignedparse_score(const char*arg)1829{1830char*end;1831unsigned long score =strtoul(arg, &end,10);1832if(*end)1833return0;1834return score;1835}18361837static const char*add_prefix(const char*prefix,const char*path)1838{1839if(!prefix || !prefix[0])1840return path;1841returnprefix_path(prefix,strlen(prefix), path);1842}18431844/*1845 * Parsing of (comma separated) one item in the -L option1846 */1847static const char*parse_loc(const char*spec,1848struct scoreboard *sb,long lno,1849long begin,long*ret)1850{1851char*term;1852const char*line;1853long num;1854int reg_error;1855 regex_t regexp;1856 regmatch_t match[1];18571858/* Allow "-L <something>,+20" to mean starting at <something>1859 * for 20 lines, or "-L <something>,-5" for 5 lines ending at1860 * <something>.1861 */1862if(1< begin && (spec[0] =='+'|| spec[0] =='-')) {1863 num =strtol(spec +1, &term,10);1864if(term != spec +1) {1865if(spec[0] =='-')1866 num =0- num;1867if(0< num)1868*ret = begin + num -2;1869else if(!num)1870*ret = begin;1871else1872*ret = begin + num;1873return term;1874}1875return spec;1876}1877 num =strtol(spec, &term,10);1878if(term != spec) {1879*ret = num;1880return term;1881}1882if(spec[0] !='/')1883return spec;18841885/* it could be a regexp of form /.../ */1886for(term = (char*) spec +1; *term && *term !='/'; term++) {1887if(*term =='\\')1888 term++;1889}1890if(*term !='/')1891return spec;18921893/* try [spec+1 .. term-1] as regexp */1894*term =0;1895 begin--;/* input is in human terms */1896 line =nth_line(sb, begin);18971898if(!(reg_error =regcomp(®exp, spec +1, REG_NEWLINE)) &&1899!(reg_error =regexec(®exp, line,1, match,0))) {1900const char*cp = line + match[0].rm_so;1901const char*nline;19021903while(begin++ < lno) {1904 nline =nth_line(sb, begin);1905if(line <= cp && cp < nline)1906break;1907 line = nline;1908}1909*ret = begin;1910regfree(®exp);1911*term++ ='/';1912return term;1913}1914else{1915char errbuf[1024];1916regerror(reg_error, ®exp, errbuf,1024);1917die("-L parameter '%s':%s", spec +1, errbuf);1918}1919}19201921/*1922 * Parsing of -L option1923 */1924static voidprepare_blame_range(struct scoreboard *sb,1925const char*bottomtop,1926long lno,1927long*bottom,long*top)1928{1929const char*term;19301931 term =parse_loc(bottomtop, sb, lno,1, bottom);1932if(*term ==',') {1933 term =parse_loc(term +1, sb, lno, *bottom +1, top);1934if(*term)1935usage(blame_usage);1936}1937if(*term)1938usage(blame_usage);1939}19401941static intgit_blame_config(const char*var,const char*value)1942{1943if(!strcmp(var,"blame.showroot")) {1944 show_root =git_config_bool(var, value);1945return0;1946}1947if(!strcmp(var,"blame.blankboundary")) {1948 blank_boundary =git_config_bool(var, value);1949return0;1950}1951returngit_default_config(var, value);1952}19531954static struct commit *fake_working_tree_commit(const char*path,const char*contents_from)1955{1956struct commit *commit;1957struct origin *origin;1958unsigned char head_sha1[20];1959char*buf;1960const char*ident;1961int fd;1962time_t now;1963unsigned long fin_size;1964int size, len;1965struct cache_entry *ce;1966unsigned mode;19671968if(get_sha1("HEAD", head_sha1))1969die("No such ref: HEAD");19701971time(&now);1972 commit =xcalloc(1,sizeof(*commit));1973 commit->parents =xcalloc(1,sizeof(*commit->parents));1974 commit->parents->item =lookup_commit_reference(head_sha1);1975 commit->object.parsed =1;1976 commit->date = now;1977 commit->object.type = OBJ_COMMIT;19781979 origin =make_origin(commit, path);19801981if(!contents_from ||strcmp("-", contents_from)) {1982struct stat st;1983const char*read_from;19841985if(contents_from) {1986if(stat(contents_from, &st) <0)1987die("Cannot stat%s", contents_from);1988 read_from = contents_from;1989}1990else{1991if(lstat(path, &st) <0)1992die("Cannot lstat%s", path);1993 read_from = path;1994}1995 fin_size =xsize_t(st.st_size);1996 buf =xmalloc(fin_size+1);1997 mode =canon_mode(st.st_mode);1998switch(st.st_mode & S_IFMT) {1999case S_IFREG:2000 fd =open(read_from, O_RDONLY);2001if(fd <0)2002die("cannot open%s", read_from);2003if(read_in_full(fd, buf, fin_size) != fin_size)2004die("cannot read%s", read_from);2005break;2006case S_IFLNK:2007if(readlink(read_from, buf, fin_size+1) != fin_size)2008die("cannot readlink%s", read_from);2009break;2010default:2011die("unsupported file type%s", read_from);2012}2013}2014else{2015/* Reading from stdin */2016 contents_from ="standard input";2017 buf = NULL;2018 fin_size =0;2019 mode =0;2020while(1) {2021 ssize_t cnt =8192;2022 buf =xrealloc(buf, fin_size + cnt);2023 cnt =xread(0, buf + fin_size, cnt);2024if(cnt <0)2025die("read error%sfrom stdin",2026strerror(errno));2027if(!cnt)2028break;2029 fin_size += cnt;2030}2031 buf =xrealloc(buf, fin_size +1);2032}2033 buf[fin_size] =0;2034 origin->file.ptr = buf;2035 origin->file.size = fin_size;2036pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1);2037 commit->util = origin;20382039/*2040 * Read the current index, replace the path entry with2041 * origin->blob_sha1 without mucking with its mode or type2042 * bits; we are not going to write this index out -- we just2043 * want to run "diff-index --cached".2044 */2045discard_cache();2046read_cache();20472048 len =strlen(path);2049if(!mode) {2050int pos =cache_name_pos(path, len);2051if(0<= pos)2052 mode =ntohl(active_cache[pos]->ce_mode);2053else2054/* Let's not bother reading from HEAD tree */2055 mode = S_IFREG |0644;2056}2057 size =cache_entry_size(len);2058 ce =xcalloc(1, size);2059hashcpy(ce->sha1, origin->blob_sha1);2060memcpy(ce->name, path, len);2061 ce->ce_flags =create_ce_flags(len,0);2062 ce->ce_mode =create_ce_mode(mode);2063add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);20642065/*2066 * We are not going to write this out, so this does not matter2067 * right now, but someday we might optimize diff-index --cached2068 * with cache-tree information.2069 */2070cache_tree_invalidate_path(active_cache_tree, path);20712072 commit->buffer =xmalloc(400);2073 ident =fmt_ident("Not Committed Yet","not.committed.yet", NULL,0);2074snprintf(commit->buffer,400,2075"tree 0000000000000000000000000000000000000000\n"2076"parent%s\n"2077"author%s\n"2078"committer%s\n\n"2079"Version of%sfrom%s\n",2080sha1_to_hex(head_sha1),2081 ident, ident, path, contents_from ? contents_from : path);2082return commit;2083}20842085intcmd_blame(int argc,const char**argv,const char*prefix)2086{2087struct rev_info revs;2088const char*path;2089struct scoreboard sb;2090struct origin *o;2091struct blame_entry *ent;2092int i, seen_dashdash, unk, opt;2093long bottom, top, lno;2094int output_option =0;2095int show_stats =0;2096const char*revs_file = NULL;2097const char*final_commit_name = NULL;2098enum object_type type;2099const char*bottomtop = NULL;2100const char*contents_from = NULL;21012102 cmd_is_annotate = !strcmp(argv[0],"annotate");21032104git_config(git_blame_config);2105 save_commit_buffer =0;21062107 opt =0;2108 seen_dashdash =0;2109for(unk = i =1; i < argc; i++) {2110const char*arg = argv[i];2111if(*arg !='-')2112break;2113else if(!strcmp("-b", arg))2114 blank_boundary =1;2115else if(!strcmp("--root", arg))2116 show_root =1;2117else if(!strcmp(arg,"--show-stats"))2118 show_stats =1;2119else if(!strcmp("-c", arg))2120 output_option |= OUTPUT_ANNOTATE_COMPAT;2121else if(!strcmp("-t", arg))2122 output_option |= OUTPUT_RAW_TIMESTAMP;2123else if(!strcmp("-l", arg))2124 output_option |= OUTPUT_LONG_OBJECT_NAME;2125else if(!strcmp("-S", arg) && ++i < argc)2126 revs_file = argv[i];2127else if(!prefixcmp(arg,"-M")) {2128 opt |= PICKAXE_BLAME_MOVE;2129 blame_move_score =parse_score(arg+2);2130}2131else if(!prefixcmp(arg,"-C")) {2132/*2133 * -C enables copy from removed files;2134 * -C -C enables copy from existing files, but only2135 * when blaming a new file;2136 * -C -C -C enables copy from existing files for2137 * everybody2138 */2139if(opt & PICKAXE_BLAME_COPY_HARDER)2140 opt |= PICKAXE_BLAME_COPY_HARDEST;2141if(opt & PICKAXE_BLAME_COPY)2142 opt |= PICKAXE_BLAME_COPY_HARDER;2143 opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;2144 blame_copy_score =parse_score(arg+2);2145}2146else if(!prefixcmp(arg,"-L")) {2147if(!arg[2]) {2148if(++i >= argc)2149usage(blame_usage);2150 arg = argv[i];2151}2152else2153 arg +=2;2154if(bottomtop)2155die("More than one '-L n,m' option given");2156 bottomtop = arg;2157}2158else if(!strcmp("--contents", arg)) {2159if(++i >= argc)2160usage(blame_usage);2161 contents_from = argv[i];2162}2163else if(!strcmp("--incremental", arg))2164 incremental =1;2165else if(!strcmp("--score-debug", arg))2166 output_option |= OUTPUT_SHOW_SCORE;2167else if(!strcmp("-f", arg) ||2168!strcmp("--show-name", arg))2169 output_option |= OUTPUT_SHOW_NAME;2170else if(!strcmp("-n", arg) ||2171!strcmp("--show-number", arg))2172 output_option |= OUTPUT_SHOW_NUMBER;2173else if(!strcmp("-p", arg) ||2174!strcmp("--porcelain", arg))2175 output_option |= OUTPUT_PORCELAIN;2176else if(!strcmp("--", arg)) {2177 seen_dashdash =1;2178 i++;2179break;2180}2181else2182 argv[unk++] = arg;2183}21842185if(!incremental)2186setup_pager();21872188if(!blame_move_score)2189 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;2190if(!blame_copy_score)2191 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;21922193/*2194 * We have collected options unknown to us in argv[1..unk]2195 * which are to be passed to revision machinery if we are2196 * going to do the "bottom" processing.2197 *2198 * The remaining are:2199 *2200 * (1) if seen_dashdash, its either2201 * "-options -- <path>" or2202 * "-options -- <path> <rev>".2203 * but the latter is allowed only if there is no2204 * options that we passed to revision machinery.2205 *2206 * (2) otherwise, we may have "--" somewhere later and2207 * might be looking at the first one of multiple 'rev'2208 * parameters (e.g. " master ^next ^maint -- path").2209 * See if there is a dashdash first, and give the2210 * arguments before that to revision machinery.2211 * After that there must be one 'path'.2212 *2213 * (3) otherwise, its one of the three:2214 * "-options <path> <rev>"2215 * "-options <rev> <path>"2216 * "-options <path>"2217 * but again the first one is allowed only if2218 * there is no options that we passed to revision2219 * machinery.2220 */22212222if(seen_dashdash) {2223/* (1) */2224if(argc <= i)2225usage(blame_usage);2226 path =add_prefix(prefix, argv[i]);2227if(i +1== argc -1) {2228if(unk !=1)2229usage(blame_usage);2230 argv[unk++] = argv[i +1];2231}2232else if(i +1!= argc)2233/* garbage at end */2234usage(blame_usage);2235}2236else{2237int j;2238for(j = i; !seen_dashdash && j < argc; j++)2239if(!strcmp(argv[j],"--"))2240 seen_dashdash = j;2241if(seen_dashdash) {2242/* (2) */2243if(seen_dashdash +1!= argc -1)2244usage(blame_usage);2245 path =add_prefix(prefix, argv[seen_dashdash +1]);2246for(j = i; j < seen_dashdash; j++)2247 argv[unk++] = argv[j];2248}2249else{2250/* (3) */2251if(argc <= i)2252usage(blame_usage);2253 path =add_prefix(prefix, argv[i]);2254if(i +1== argc -1) {2255 final_commit_name = argv[i +1];22562257/* if (unk == 1) we could be getting2258 * old-style2259 */2260if(unk ==1&& !has_path_in_work_tree(path)) {2261 path =add_prefix(prefix, argv[i +1]);2262 final_commit_name = argv[i];2263}2264}2265else if(i != argc -1)2266usage(blame_usage);/* garbage at end */22672268if(!has_path_in_work_tree(path))2269die("cannot stat path%s:%s",2270 path,strerror(errno));2271}2272}22732274if(final_commit_name)2275 argv[unk++] = final_commit_name;22762277/*2278 * Now we got rev and path. We do not want the path pruning2279 * but we may want "bottom" processing.2280 */2281 argv[unk++] ="--";/* terminate the rev name */2282 argv[unk] = NULL;22832284init_revisions(&revs, NULL);2285setup_revisions(unk, argv, &revs, NULL);2286memset(&sb,0,sizeof(sb));22872288/*2289 * There must be one and only one positive commit in the2290 * revs->pending array.2291 */2292for(i =0; i < revs.pending.nr; i++) {2293struct object *obj = revs.pending.objects[i].item;2294if(obj->flags & UNINTERESTING)2295continue;2296while(obj->type == OBJ_TAG)2297 obj =deref_tag(obj, NULL,0);2298if(obj->type != OBJ_COMMIT)2299die("Non commit%s?",2300 revs.pending.objects[i].name);2301if(sb.final)2302die("More than one commit to dig from%sand%s?",2303 revs.pending.objects[i].name,2304 final_commit_name);2305 sb.final = (struct commit *) obj;2306 final_commit_name = revs.pending.objects[i].name;2307}23082309if(!sb.final) {2310/*2311 * "--not A B -- path" without anything positive;2312 * do not default to HEAD, but use the working tree2313 * or "--contents".2314 */2315 sb.final =fake_working_tree_commit(path, contents_from);2316add_pending_object(&revs, &(sb.final->object),":");2317}2318else if(contents_from)2319die("Cannot use --contents with final commit object name");23202321/*2322 * If we have bottom, this will mark the ancestors of the2323 * bottom commits we would reach while traversing as2324 * uninteresting.2325 */2326prepare_revision_walk(&revs);23272328if(is_null_sha1(sb.final->object.sha1)) {2329char*buf;2330 o = sb.final->util;2331 buf =xmalloc(o->file.size +1);2332memcpy(buf, o->file.ptr, o->file.size +1);2333 sb.final_buf = buf;2334 sb.final_buf_size = o->file.size;2335}2336else{2337 o =get_origin(&sb, sb.final, path);2338if(fill_blob_sha1(o))2339die("no such path%sin%s", path, final_commit_name);23402341 sb.final_buf =read_sha1_file(o->blob_sha1, &type,2342&sb.final_buf_size);2343}2344 num_read_blob++;2345 lno =prepare_lines(&sb);23462347 bottom = top =0;2348if(bottomtop)2349prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);2350if(bottom && top && top < bottom) {2351long tmp;2352 tmp = top; top = bottom; bottom = tmp;2353}2354if(bottom <1)2355 bottom =1;2356if(top <1)2357 top = lno;2358 bottom--;2359if(lno < top)2360die("file%shas only%lu lines", path, lno);23612362 ent =xcalloc(1,sizeof(*ent));2363 ent->lno = bottom;2364 ent->num_lines = top - bottom;2365 ent->suspect = o;2366 ent->s_lno = bottom;23672368 sb.ent = ent;2369 sb.path = path;23702371if(revs_file &&read_ancestry(revs_file))2372die("reading graft file%sfailed:%s",2373 revs_file,strerror(errno));23742375assign_blame(&sb, &revs, opt);23762377if(incremental)2378return0;23792380coalesce(&sb);23812382if(!(output_option & OUTPUT_PORCELAIN))2383find_alignment(&sb, &output_option);23842385output(&sb, output_option);2386free((void*)sb.final_buf);2387for(ent = sb.ent; ent; ) {2388struct blame_entry *e = ent->next;2389free(ent);2390 ent = e;2391}23922393if(show_stats) {2394printf("num read blob:%d\n", num_read_blob);2395printf("num get patch:%d\n", num_get_patch);2396printf("num commits:%d\n", num_commits);2397}2398return0;2399}