1/* 2 * Pickaxe 3 * 4 * Copyright (c) 2006, Junio C Hamano 5 */ 6 7#include "cache.h" 8#include "builtin.h" 9#include "blob.h" 10#include "commit.h" 11#include "tag.h" 12#include "tree-walk.h" 13#include "diff.h" 14#include "diffcore.h" 15#include "revision.h" 16#include "quote.h" 17#include "xdiff-interface.h" 18 19static char blame_usage[] = 20"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n" 21" -c, --compatibility Use the same output mode as git-annotate (Default: off)\n" 22" -b Show blank SHA-1 for boundary commits (Default: off)\n" 23" -l, --long Show long commit SHA1 (Default: off)\n" 24" --root Do not treat root commits as boundaries (Default: off)\n" 25" -t, --time Show raw timestamp (Default: off)\n" 26" -f, --show-name Show original filename (Default: auto)\n" 27" -n, --show-number Show original linenumber (Default: off)\n" 28" -p, --porcelain Show in a format designed for machine consumption\n" 29" -L n,m Process only line range n,m, counting from 1\n" 30" -M, -C Find line movements within and across files\n" 31" --incremental Show blame entries as we find them, incrementally\n" 32" -S revs-file Use revisions from revs-file instead of calling git-rev-list\n"; 33 34static int longest_file; 35static int longest_author; 36static int max_orig_digits; 37static int max_digits; 38static int max_score_digits; 39static int show_root; 40static int blank_boundary; 41static int incremental; 42 43#ifndef DEBUG 44#define DEBUG 0 45#endif 46 47/* stats */ 48static int num_read_blob; 49static int num_get_patch; 50static int num_commits; 51 52#define PICKAXE_BLAME_MOVE 01 53#define PICKAXE_BLAME_COPY 02 54#define PICKAXE_BLAME_COPY_HARDER 04 55 56/* 57 * blame for a blame_entry with score lower than these thresholds 58 * is not passed to the parent using move/copy logic. 59 */ 60static unsigned blame_move_score; 61static unsigned blame_copy_score; 62#define BLAME_DEFAULT_MOVE_SCORE 20 63#define BLAME_DEFAULT_COPY_SCORE 40 64 65/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ 66#define METAINFO_SHOWN (1u<<12) 67#define MORE_THAN_ONE_PATH (1u<<13) 68 69/* 70 * One blob in a commit that is being suspected 71 */ 72struct origin { 73 int refcnt; 74 struct commit *commit; 75 mmfile_t file; 76 unsigned char blob_sha1[20]; 77 char path[FLEX_ARRAY]; 78}; 79 80/* 81 * Given an origin, prepare mmfile_t structure to be used by the 82 * diff machinery 83 */ 84static char *fill_origin_blob(struct origin *o, mmfile_t *file) 85{ 86 if (!o->file.ptr) { 87 char type[10]; 88 num_read_blob++; 89 file->ptr = read_sha1_file(o->blob_sha1, type, 90 (unsigned long *)(&(file->size))); 91 o->file = *file; 92 } 93 else 94 *file = o->file; 95 return file->ptr; 96} 97 98/* 99 * Origin is refcounted and usually we keep the blob contents to be 100 * reused. 101 */ 102static inline struct origin *origin_incref(struct origin *o) 103{ 104 if (o) 105 o->refcnt++; 106 return o; 107} 108 109static void origin_decref(struct origin *o) 110{ 111 if (o && --o->refcnt <= 0) { 112 if (o->file.ptr) 113 free(o->file.ptr); 114 memset(o, 0, sizeof(*o)); 115 free(o); 116 } 117} 118 119/* 120 * Each group of lines is described by a blame_entry; it can be split 121 * as we pass blame to the parents. They form a linked list in the 122 * scoreboard structure, sorted by the target line number. 123 */ 124struct blame_entry { 125 struct blame_entry *prev; 126 struct blame_entry *next; 127 128 /* the first line of this group in the final image; 129 * internally all line numbers are 0 based. 130 */ 131 int lno; 132 133 /* how many lines this group has */ 134 int num_lines; 135 136 /* the commit that introduced this group into the final image */ 137 struct origin *suspect; 138 139 /* true if the suspect is truly guilty; false while we have not 140 * checked if the group came from one of its parents. 141 */ 142 char guilty; 143 144 /* the line number of the first line of this group in the 145 * suspect's file; internally all line numbers are 0 based. 146 */ 147 int s_lno; 148 149 /* how significant this entry is -- cached to avoid 150 * scanning the lines over and over. 151 */ 152 unsigned score; 153}; 154 155/* 156 * The current state of the blame assignment. 157 */ 158struct scoreboard { 159 /* the final commit (i.e. where we started digging from) */ 160 struct commit *final; 161 162 const char *path; 163 164 /* 165 * The contents in the final image. 166 * Used by many functions to obtain contents of the nth line, 167 * indexed with scoreboard.lineno[blame_entry.lno]. 168 */ 169 const char *final_buf; 170 unsigned long final_buf_size; 171 172 /* linked list of blames */ 173 struct blame_entry *ent; 174 175 /* look-up a line in the final buffer */ 176 int num_lines; 177 int *lineno; 178}; 179 180static int cmp_suspect(struct origin *a, struct origin *b) 181{ 182 int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1); 183 if (cmp) 184 return cmp; 185 return strcmp(a->path, b->path); 186} 187 188#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) ) 189 190static void sanity_check_refcnt(struct scoreboard *); 191 192/* 193 * If two blame entries that are next to each other came from 194 * contiguous lines in the same origin (i.e. <commit, path> pair), 195 * merge them together. 196 */ 197static void coalesce(struct scoreboard *sb) 198{ 199 struct blame_entry *ent, *next; 200 201 for (ent = sb->ent; ent && (next = ent->next); ent = next) { 202 if (!cmp_suspect(ent->suspect, next->suspect) && 203 ent->guilty == next->guilty && 204 ent->s_lno + ent->num_lines == next->s_lno) { 205 ent->num_lines += next->num_lines; 206 ent->next = next->next; 207 if (ent->next) 208 ent->next->prev = ent; 209 origin_decref(next->suspect); 210 free(next); 211 ent->score = 0; 212 next = ent; /* again */ 213 } 214 } 215 216 if (DEBUG) /* sanity */ 217 sanity_check_refcnt(sb); 218} 219 220/* 221 * Given a commit and a path in it, create a new origin structure. 222 * The callers that add blame to the scoreboard should use 223 * get_origin() to obtain shared, refcounted copy instead of calling 224 * this function directly. 225 */ 226static struct origin *make_origin(struct commit *commit, const char *path) 227{ 228 struct origin *o; 229 o = xcalloc(1, sizeof(*o) + strlen(path) + 1); 230 o->commit = commit; 231 o->refcnt = 1; 232 strcpy(o->path, path); 233 return o; 234} 235 236/* 237 * Locate an existing origin or create a new one. 238 */ 239static struct origin *get_origin(struct scoreboard *sb, 240 struct commit *commit, 241 const char *path) 242{ 243 struct blame_entry *e; 244 245 for (e = sb->ent; e; e = e->next) { 246 if (e->suspect->commit == commit && 247 !strcmp(e->suspect->path, path)) 248 return origin_incref(e->suspect); 249 } 250 return make_origin(commit, path); 251} 252 253/* 254 * Fill the blob_sha1 field of an origin if it hasn't, so that later 255 * call to fill_origin_blob() can use it to locate the data. blob_sha1 256 * for an origin is also used to pass the blame for the entire file to 257 * the parent to detect the case where a child's blob is identical to 258 * that of its parent's. 259 */ 260static int fill_blob_sha1(struct origin *origin) 261{ 262 unsigned mode; 263 char type[10]; 264 265 if (!is_null_sha1(origin->blob_sha1)) 266 return 0; 267 if (get_tree_entry(origin->commit->object.sha1, 268 origin->path, 269 origin->blob_sha1, &mode)) 270 goto error_out; 271 if (sha1_object_info(origin->blob_sha1, type, NULL) || 272 strcmp(type, blob_type)) 273 goto error_out; 274 return 0; 275 error_out: 276 hashclr(origin->blob_sha1); 277 return -1; 278} 279 280/* 281 * We have an origin -- check if the same path exists in the 282 * parent and return an origin structure to represent it. 283 */ 284static struct origin *find_origin(struct scoreboard *sb, 285 struct commit *parent, 286 struct origin *origin) 287{ 288 struct origin *porigin = NULL; 289 struct diff_options diff_opts; 290 const char *paths[2]; 291 292 if (parent->util) { 293 /* 294 * Each commit object can cache one origin in that 295 * commit. This is a freestanding copy of origin and 296 * not refcounted. 297 */ 298 struct origin *cached = parent->util; 299 if (!strcmp(cached->path, origin->path)) { 300 /* 301 * The same path between origin and its parent 302 * without renaming -- the most common case. 303 */ 304 porigin = get_origin(sb, parent, cached->path); 305 306 /* 307 * If the origin was newly created (i.e. get_origin 308 * would call make_origin if none is found in the 309 * scoreboard), it does not know the blob_sha1, 310 * so copy it. Otherwise porigin was in the 311 * scoreboard and already knows blob_sha1. 312 */ 313 if (porigin->refcnt == 1) 314 hashcpy(porigin->blob_sha1, cached->blob_sha1); 315 return porigin; 316 } 317 /* otherwise it was not very useful; free it */ 318 free(parent->util); 319 parent->util = NULL; 320 } 321 322 /* See if the origin->path is different between parent 323 * and origin first. Most of the time they are the 324 * same and diff-tree is fairly efficient about this. 325 */ 326 diff_setup(&diff_opts); 327 diff_opts.recursive = 1; 328 diff_opts.detect_rename = 0; 329 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; 330 paths[0] = origin->path; 331 paths[1] = NULL; 332 333 diff_tree_setup_paths(paths, &diff_opts); 334 if (diff_setup_done(&diff_opts) < 0) 335 die("diff-setup"); 336 diff_tree_sha1(parent->tree->object.sha1, 337 origin->commit->tree->object.sha1, 338 "", &diff_opts); 339 diffcore_std(&diff_opts); 340 341 /* It is either one entry that says "modified", or "created", 342 * or nothing. 343 */ 344 if (!diff_queued_diff.nr) { 345 /* The path is the same as parent */ 346 porigin = get_origin(sb, parent, origin->path); 347 hashcpy(porigin->blob_sha1, origin->blob_sha1); 348 } 349 else if (diff_queued_diff.nr != 1) 350 die("internal error in blame::find_origin"); 351 else { 352 struct diff_filepair *p = diff_queued_diff.queue[0]; 353 switch (p->status) { 354 default: 355 die("internal error in blame::find_origin (%c)", 356 p->status); 357 case 'M': 358 porigin = get_origin(sb, parent, origin->path); 359 hashcpy(porigin->blob_sha1, p->one->sha1); 360 break; 361 case 'A': 362 case 'T': 363 /* Did not exist in parent, or type changed */ 364 break; 365 } 366 } 367 diff_flush(&diff_opts); 368 if (porigin) { 369 /* 370 * Create a freestanding copy that is not part of 371 * the refcounted origin found in the scoreboard, and 372 * cache it in the commit. 373 */ 374 struct origin *cached; 375 376 cached = make_origin(porigin->commit, porigin->path); 377 hashcpy(cached->blob_sha1, porigin->blob_sha1); 378 parent->util = cached; 379 } 380 return porigin; 381} 382 383/* 384 * We have an origin -- find the path that corresponds to it in its 385 * parent and return an origin structure to represent it. 386 */ 387static struct origin *find_rename(struct scoreboard *sb, 388 struct commit *parent, 389 struct origin *origin) 390{ 391 struct origin *porigin = NULL; 392 struct diff_options diff_opts; 393 int i; 394 const char *paths[2]; 395 396 diff_setup(&diff_opts); 397 diff_opts.recursive = 1; 398 diff_opts.detect_rename = DIFF_DETECT_RENAME; 399 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; 400 diff_opts.single_follow = origin->path; 401 paths[0] = NULL; 402 diff_tree_setup_paths(paths, &diff_opts); 403 if (diff_setup_done(&diff_opts) < 0) 404 die("diff-setup"); 405 diff_tree_sha1(parent->tree->object.sha1, 406 origin->commit->tree->object.sha1, 407 "", &diff_opts); 408 diffcore_std(&diff_opts); 409 410 for (i = 0; i < diff_queued_diff.nr; i++) { 411 struct diff_filepair *p = diff_queued_diff.queue[i]; 412 if ((p->status == 'R' || p->status == 'C') && 413 !strcmp(p->two->path, origin->path)) { 414 porigin = get_origin(sb, parent, p->one->path); 415 hashcpy(porigin->blob_sha1, p->one->sha1); 416 break; 417 } 418 } 419 diff_flush(&diff_opts); 420 return porigin; 421} 422 423/* 424 * Parsing of patch chunks... 425 */ 426struct chunk { 427 /* line number in postimage; up to but not including this 428 * line is the same as preimage 429 */ 430 int same; 431 432 /* preimage line number after this chunk */ 433 int p_next; 434 435 /* postimage line number after this chunk */ 436 int t_next; 437}; 438 439struct patch { 440 struct chunk *chunks; 441 int num; 442}; 443 444struct blame_diff_state { 445 struct xdiff_emit_state xm; 446 struct patch *ret; 447 unsigned hunk_post_context; 448 unsigned hunk_in_pre_context : 1; 449}; 450 451static void process_u_diff(void *state_, char *line, unsigned long len) 452{ 453 struct blame_diff_state *state = state_; 454 struct chunk *chunk; 455 int off1, off2, len1, len2, num; 456 457 num = state->ret->num; 458 if (len < 4 || line[0] != '@' || line[1] != '@') { 459 if (state->hunk_in_pre_context && line[0] == ' ') 460 state->ret->chunks[num - 1].same++; 461 else { 462 state->hunk_in_pre_context = 0; 463 if (line[0] == ' ') 464 state->hunk_post_context++; 465 else 466 state->hunk_post_context = 0; 467 } 468 return; 469 } 470 471 if (num && state->hunk_post_context) { 472 chunk = &state->ret->chunks[num - 1]; 473 chunk->p_next -= state->hunk_post_context; 474 chunk->t_next -= state->hunk_post_context; 475 } 476 state->ret->num = ++num; 477 state->ret->chunks = xrealloc(state->ret->chunks, 478 sizeof(struct chunk) * num); 479 chunk = &state->ret->chunks[num - 1]; 480 if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) { 481 state->ret->num--; 482 return; 483 } 484 485 /* Line numbers in patch output are one based. */ 486 off1--; 487 off2--; 488 489 chunk->same = len2 ? off2 : (off2 + 1); 490 491 chunk->p_next = off1 + (len1 ? len1 : 1); 492 chunk->t_next = chunk->same + len2; 493 state->hunk_in_pre_context = 1; 494 state->hunk_post_context = 0; 495} 496 497static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o, 498 int context) 499{ 500 struct blame_diff_state state; 501 xpparam_t xpp; 502 xdemitconf_t xecfg; 503 xdemitcb_t ecb; 504 505 xpp.flags = XDF_NEED_MINIMAL; 506 xecfg.ctxlen = context; 507 xecfg.flags = 0; 508 ecb.outf = xdiff_outf; 509 ecb.priv = &state; 510 memset(&state, 0, sizeof(state)); 511 state.xm.consume = process_u_diff; 512 state.ret = xmalloc(sizeof(struct patch)); 513 state.ret->chunks = NULL; 514 state.ret->num = 0; 515 516 xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb); 517 518 if (state.ret->num) { 519 struct chunk *chunk; 520 chunk = &state.ret->chunks[state.ret->num - 1]; 521 chunk->p_next -= state.hunk_post_context; 522 chunk->t_next -= state.hunk_post_context; 523 } 524 return state.ret; 525} 526 527/* 528 * Run diff between two origins and grab the patch output, so that 529 * we can pass blame for lines origin is currently suspected for 530 * to its parent. 531 */ 532static struct patch *get_patch(struct origin *parent, struct origin *origin) 533{ 534 mmfile_t file_p, file_o; 535 struct patch *patch; 536 537 fill_origin_blob(parent, &file_p); 538 fill_origin_blob(origin, &file_o); 539 if (!file_p.ptr || !file_o.ptr) 540 return NULL; 541 patch = compare_buffer(&file_p, &file_o, 0); 542 num_get_patch++; 543 return patch; 544} 545 546static void free_patch(struct patch *p) 547{ 548 free(p->chunks); 549 free(p); 550} 551 552/* 553 * Link in a new blame entry to the scoreboard. Entries that cover the 554 * same line range have been removed from the scoreboard previously. 555 */ 556static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) 557{ 558 struct blame_entry *ent, *prev = NULL; 559 560 origin_incref(e->suspect); 561 562 for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next) 563 prev = ent; 564 565 /* prev, if not NULL, is the last one that is below e */ 566 e->prev = prev; 567 if (prev) { 568 e->next = prev->next; 569 prev->next = e; 570 } 571 else { 572 e->next = sb->ent; 573 sb->ent = e; 574 } 575 if (e->next) 576 e->next->prev = e; 577} 578 579/* 580 * src typically is on-stack; we want to copy the information in it to 581 * an malloced blame_entry that is already on the linked list of the 582 * scoreboard. The origin of dst loses a refcnt while the origin of src 583 * gains one. 584 */ 585static void dup_entry(struct blame_entry *dst, struct blame_entry *src) 586{ 587 struct blame_entry *p, *n; 588 589 p = dst->prev; 590 n = dst->next; 591 origin_incref(src->suspect); 592 origin_decref(dst->suspect); 593 memcpy(dst, src, sizeof(*src)); 594 dst->prev = p; 595 dst->next = n; 596 dst->score = 0; 597} 598 599static const char *nth_line(struct scoreboard *sb, int lno) 600{ 601 return sb->final_buf + sb->lineno[lno]; 602} 603 604/* 605 * It is known that lines between tlno to same came from parent, and e 606 * has an overlap with that range. it also is known that parent's 607 * line plno corresponds to e's line tlno. 608 * 609 * <---- e -----> 610 * <------> 611 * <------------> 612 * <------------> 613 * <------------------> 614 * 615 * Split e into potentially three parts; before this chunk, the chunk 616 * to be blamed for the parent, and after that portion. 617 */ 618static void split_overlap(struct blame_entry *split, 619 struct blame_entry *e, 620 int tlno, int plno, int same, 621 struct origin *parent) 622{ 623 int chunk_end_lno; 624 memset(split, 0, sizeof(struct blame_entry [3])); 625 626 if (e->s_lno < tlno) { 627 /* there is a pre-chunk part not blamed on parent */ 628 split[0].suspect = origin_incref(e->suspect); 629 split[0].lno = e->lno; 630 split[0].s_lno = e->s_lno; 631 split[0].num_lines = tlno - e->s_lno; 632 split[1].lno = e->lno + tlno - e->s_lno; 633 split[1].s_lno = plno; 634 } 635 else { 636 split[1].lno = e->lno; 637 split[1].s_lno = plno + (e->s_lno - tlno); 638 } 639 640 if (same < e->s_lno + e->num_lines) { 641 /* there is a post-chunk part not blamed on parent */ 642 split[2].suspect = origin_incref(e->suspect); 643 split[2].lno = e->lno + (same - e->s_lno); 644 split[2].s_lno = e->s_lno + (same - e->s_lno); 645 split[2].num_lines = e->s_lno + e->num_lines - same; 646 chunk_end_lno = split[2].lno; 647 } 648 else 649 chunk_end_lno = e->lno + e->num_lines; 650 split[1].num_lines = chunk_end_lno - split[1].lno; 651 652 /* 653 * if it turns out there is nothing to blame the parent for, 654 * forget about the splitting. !split[1].suspect signals this. 655 */ 656 if (split[1].num_lines < 1) 657 return; 658 split[1].suspect = origin_incref(parent); 659} 660 661/* 662 * split_overlap() divided an existing blame e into up to three parts 663 * in split. Adjust the linked list of blames in the scoreboard to 664 * reflect the split. 665 */ 666static void split_blame(struct scoreboard *sb, 667 struct blame_entry *split, 668 struct blame_entry *e) 669{ 670 struct blame_entry *new_entry; 671 672 if (split[0].suspect && split[2].suspect) { 673 /* The first part (reuse storage for the existing entry e) */ 674 dup_entry(e, &split[0]); 675 676 /* The last part -- me */ 677 new_entry = xmalloc(sizeof(*new_entry)); 678 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); 679 add_blame_entry(sb, new_entry); 680 681 /* ... and the middle part -- parent */ 682 new_entry = xmalloc(sizeof(*new_entry)); 683 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); 684 add_blame_entry(sb, new_entry); 685 } 686 else if (!split[0].suspect && !split[2].suspect) 687 /* 688 * The parent covers the entire area; reuse storage for 689 * e and replace it with the parent. 690 */ 691 dup_entry(e, &split[1]); 692 else if (split[0].suspect) { 693 /* me and then parent */ 694 dup_entry(e, &split[0]); 695 696 new_entry = xmalloc(sizeof(*new_entry)); 697 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); 698 add_blame_entry(sb, new_entry); 699 } 700 else { 701 /* parent and then me */ 702 dup_entry(e, &split[1]); 703 704 new_entry = xmalloc(sizeof(*new_entry)); 705 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); 706 add_blame_entry(sb, new_entry); 707 } 708 709 if (DEBUG) { /* sanity */ 710 struct blame_entry *ent; 711 int lno = sb->ent->lno, corrupt = 0; 712 713 for (ent = sb->ent; ent; ent = ent->next) { 714 if (lno != ent->lno) 715 corrupt = 1; 716 if (ent->s_lno < 0) 717 corrupt = 1; 718 lno += ent->num_lines; 719 } 720 if (corrupt) { 721 lno = sb->ent->lno; 722 for (ent = sb->ent; ent; ent = ent->next) { 723 printf("L %8d l %8d n %8d\n", 724 lno, ent->lno, ent->num_lines); 725 lno = ent->lno + ent->num_lines; 726 } 727 die("oops"); 728 } 729 } 730} 731 732/* 733 * After splitting the blame, the origins used by the 734 * on-stack blame_entry should lose one refcnt each. 735 */ 736static void decref_split(struct blame_entry *split) 737{ 738 int i; 739 740 for (i = 0; i < 3; i++) 741 origin_decref(split[i].suspect); 742} 743 744/* 745 * Helper for blame_chunk(). blame_entry e is known to overlap with 746 * the patch hunk; split it and pass blame to the parent. 747 */ 748static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, 749 int tlno, int plno, int same, 750 struct origin *parent) 751{ 752 struct blame_entry split[3]; 753 754 split_overlap(split, e, tlno, plno, same, parent); 755 if (split[1].suspect) 756 split_blame(sb, split, e); 757 decref_split(split); 758} 759 760/* 761 * Find the line number of the last line the target is suspected for. 762 */ 763static int find_last_in_target(struct scoreboard *sb, struct origin *target) 764{ 765 struct blame_entry *e; 766 int last_in_target = -1; 767 768 for (e = sb->ent; e; e = e->next) { 769 if (e->guilty || cmp_suspect(e->suspect, target)) 770 continue; 771 if (last_in_target < e->s_lno + e->num_lines) 772 last_in_target = e->s_lno + e->num_lines; 773 } 774 return last_in_target; 775} 776 777/* 778 * Process one hunk from the patch between the current suspect for 779 * blame_entry e and its parent. Find and split the overlap, and 780 * pass blame to the overlapping part to the parent. 781 */ 782static void blame_chunk(struct scoreboard *sb, 783 int tlno, int plno, int same, 784 struct origin *target, struct origin *parent) 785{ 786 struct blame_entry *e; 787 788 for (e = sb->ent; e; e = e->next) { 789 if (e->guilty || cmp_suspect(e->suspect, target)) 790 continue; 791 if (same <= e->s_lno) 792 continue; 793 if (tlno < e->s_lno + e->num_lines) 794 blame_overlap(sb, e, tlno, plno, same, parent); 795 } 796} 797 798/* 799 * We are looking at the origin 'target' and aiming to pass blame 800 * for the lines it is suspected to its parent. Run diff to find 801 * which lines came from parent and pass blame for them. 802 */ 803static int pass_blame_to_parent(struct scoreboard *sb, 804 struct origin *target, 805 struct origin *parent) 806{ 807 int i, last_in_target, plno, tlno; 808 struct patch *patch; 809 810 last_in_target = find_last_in_target(sb, target); 811 if (last_in_target < 0) 812 return 1; /* nothing remains for this target */ 813 814 patch = get_patch(parent, target); 815 plno = tlno = 0; 816 for (i = 0; i < patch->num; i++) { 817 struct chunk *chunk = &patch->chunks[i]; 818 819 blame_chunk(sb, tlno, plno, chunk->same, target, parent); 820 plno = chunk->p_next; 821 tlno = chunk->t_next; 822 } 823 /* The rest (i.e. anything after tlno) are the same as the parent */ 824 blame_chunk(sb, tlno, plno, last_in_target, target, parent); 825 826 free_patch(patch); 827 return 0; 828} 829 830/* 831 * The lines in blame_entry after splitting blames many times can become 832 * very small and trivial, and at some point it becomes pointless to 833 * blame the parents. E.g. "\t\t}\n\t}\n\n" appears everywhere in any 834 * ordinary C program, and it is not worth to say it was copied from 835 * totally unrelated file in the parent. 836 * 837 * Compute how trivial the lines in the blame_entry are. 838 */ 839static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e) 840{ 841 unsigned score; 842 const char *cp, *ep; 843 844 if (e->score) 845 return e->score; 846 847 score = 1; 848 cp = nth_line(sb, e->lno); 849 ep = nth_line(sb, e->lno + e->num_lines); 850 while (cp < ep) { 851 unsigned ch = *((unsigned char *)cp); 852 if (isalnum(ch)) 853 score++; 854 cp++; 855 } 856 e->score = score; 857 return score; 858} 859 860/* 861 * best_so_far[] and this[] are both a split of an existing blame_entry 862 * that passes blame to the parent. Maintain best_so_far the best split 863 * so far, by comparing this and best_so_far and copying this into 864 * bst_so_far as needed. 865 */ 866static void copy_split_if_better(struct scoreboard *sb, 867 struct blame_entry *best_so_far, 868 struct blame_entry *this) 869{ 870 int i; 871 872 if (!this[1].suspect) 873 return; 874 if (best_so_far[1].suspect) { 875 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1])) 876 return; 877 } 878 879 for (i = 0; i < 3; i++) 880 origin_incref(this[i].suspect); 881 decref_split(best_so_far); 882 memcpy(best_so_far, this, sizeof(struct blame_entry [3])); 883} 884 885/* 886 * Find the lines from parent that are the same as ent so that 887 * we can pass blames to it. file_p has the blob contents for 888 * the parent. 889 */ 890static void find_copy_in_blob(struct scoreboard *sb, 891 struct blame_entry *ent, 892 struct origin *parent, 893 struct blame_entry *split, 894 mmfile_t *file_p) 895{ 896 const char *cp; 897 int cnt; 898 mmfile_t file_o; 899 struct patch *patch; 900 int i, plno, tlno; 901 902 /* 903 * Prepare mmfile that contains only the lines in ent. 904 */ 905 cp = nth_line(sb, ent->lno); 906 file_o.ptr = (char*) cp; 907 cnt = ent->num_lines; 908 909 while (cnt && cp < sb->final_buf + sb->final_buf_size) { 910 if (*cp++ == '\n') 911 cnt--; 912 } 913 file_o.size = cp - file_o.ptr; 914 915 patch = compare_buffer(file_p, &file_o, 1); 916 917 memset(split, 0, sizeof(struct blame_entry [3])); 918 plno = tlno = 0; 919 for (i = 0; i < patch->num; i++) { 920 struct chunk *chunk = &patch->chunks[i]; 921 922 /* tlno to chunk->same are the same as ent */ 923 if (ent->num_lines <= tlno) 924 break; 925 if (tlno < chunk->same) { 926 struct blame_entry this[3]; 927 split_overlap(this, ent, 928 tlno + ent->s_lno, plno, 929 chunk->same + ent->s_lno, 930 parent); 931 copy_split_if_better(sb, split, this); 932 decref_split(this); 933 } 934 plno = chunk->p_next; 935 tlno = chunk->t_next; 936 } 937 free_patch(patch); 938} 939 940/* 941 * See if lines currently target is suspected for can be attributed to 942 * parent. 943 */ 944static int find_move_in_parent(struct scoreboard *sb, 945 struct origin *target, 946 struct origin *parent) 947{ 948 int last_in_target, made_progress; 949 struct blame_entry *e, split[3]; 950 mmfile_t file_p; 951 952 last_in_target = find_last_in_target(sb, target); 953 if (last_in_target < 0) 954 return 1; /* nothing remains for this target */ 955 956 fill_origin_blob(parent, &file_p); 957 if (!file_p.ptr) 958 return 0; 959 960 made_progress = 1; 961 while (made_progress) { 962 made_progress = 0; 963 for (e = sb->ent; e; e = e->next) { 964 if (e->guilty || cmp_suspect(e->suspect, target)) 965 continue; 966 find_copy_in_blob(sb, e, parent, split, &file_p); 967 if (split[1].suspect && 968 blame_move_score < ent_score(sb, &split[1])) { 969 split_blame(sb, split, e); 970 made_progress = 1; 971 } 972 decref_split(split); 973 } 974 } 975 return 0; 976} 977 978struct blame_list { 979 struct blame_entry *ent; 980 struct blame_entry split[3]; 981}; 982 983/* 984 * Count the number of entries the target is suspected for, 985 * and prepare a list of entry and the best split. 986 */ 987static struct blame_list *setup_blame_list(struct scoreboard *sb, 988 struct origin *target, 989 int *num_ents_p) 990{ 991 struct blame_entry *e; 992 int num_ents, i; 993 struct blame_list *blame_list = NULL; 994 995 for (e = sb->ent, num_ents = 0; e; e = e->next) 996 if (!e->guilty && !cmp_suspect(e->suspect, target)) 997 num_ents++; 998 if (num_ents) { 999 blame_list = xcalloc(num_ents, sizeof(struct blame_list));1000 for (e = sb->ent, i = 0; e; e = e->next)1001 if (!e->guilty && !cmp_suspect(e->suspect, target))1002 blame_list[i++].ent = e;1003 }1004 *num_ents_p = num_ents;1005 return blame_list;1006}10071008/*1009 * For lines target is suspected for, see if we can find code movement1010 * across file boundary from the parent commit. porigin is the path1011 * in the parent we already tried.1012 */1013static int find_copy_in_parent(struct scoreboard *sb,1014 struct origin *target,1015 struct commit *parent,1016 struct origin *porigin,1017 int opt)1018{1019 struct diff_options diff_opts;1020 const char *paths[1];1021 int i, j;1022 int retval;1023 struct blame_list *blame_list;1024 int num_ents;10251026 blame_list = setup_blame_list(sb, target, &num_ents);1027 if (!blame_list)1028 return 1; /* nothing remains for this target */10291030 diff_setup(&diff_opts);1031 diff_opts.recursive = 1;1032 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;10331034 paths[0] = NULL;1035 diff_tree_setup_paths(paths, &diff_opts);1036 if (diff_setup_done(&diff_opts) < 0)1037 die("diff-setup");10381039 /* Try "find copies harder" on new path if requested;1040 * we do not want to use diffcore_rename() actually to1041 * match things up; find_copies_harder is set only to1042 * force diff_tree_sha1() to feed all filepairs to diff_queue,1043 * and this code needs to be after diff_setup_done(), which1044 * usually makes find-copies-harder imply copy detection.1045 */1046 if ((opt & PICKAXE_BLAME_COPY_HARDER) &&1047 (!porigin || strcmp(target->path, porigin->path)))1048 diff_opts.find_copies_harder = 1;10491050 diff_tree_sha1(parent->tree->object.sha1,1051 target->commit->tree->object.sha1,1052 "", &diff_opts);10531054 if (!diff_opts.find_copies_harder)1055 diffcore_std(&diff_opts);10561057 retval = 0;1058 while (1) {1059 int made_progress = 0;10601061 for (i = 0; i < diff_queued_diff.nr; i++) {1062 struct diff_filepair *p = diff_queued_diff.queue[i];1063 struct origin *norigin;1064 mmfile_t file_p;1065 struct blame_entry this[3];10661067 if (!DIFF_FILE_VALID(p->one))1068 continue; /* does not exist in parent */1069 if (porigin && !strcmp(p->one->path, porigin->path))1070 /* find_move already dealt with this path */1071 continue;10721073 norigin = get_origin(sb, parent, p->one->path);1074 hashcpy(norigin->blob_sha1, p->one->sha1);1075 fill_origin_blob(norigin, &file_p);1076 if (!file_p.ptr)1077 continue;10781079 for (j = 0; j < num_ents; j++) {1080 find_copy_in_blob(sb, blame_list[j].ent,1081 norigin, this, &file_p);1082 copy_split_if_better(sb, blame_list[j].split,1083 this);1084 decref_split(this);1085 }1086 origin_decref(norigin);1087 }10881089 for (j = 0; j < num_ents; j++) {1090 struct blame_entry *split = blame_list[j].split;1091 if (split[1].suspect &&1092 blame_copy_score < ent_score(sb, &split[1])) {1093 split_blame(sb, split, blame_list[j].ent);1094 made_progress = 1;1095 }1096 decref_split(split);1097 }1098 free(blame_list);10991100 if (!made_progress)1101 break;1102 blame_list = setup_blame_list(sb, target, &num_ents);1103 if (!blame_list) {1104 retval = 1;1105 break;1106 }1107 }1108 diff_flush(&diff_opts);11091110 return retval;1111}11121113/*1114 * The blobs of origin and porigin exactly match, so everything1115 * origin is suspected for can be blamed on the parent.1116 */1117static void pass_whole_blame(struct scoreboard *sb,1118 struct origin *origin, struct origin *porigin)1119{1120 struct blame_entry *e;11211122 if (!porigin->file.ptr && origin->file.ptr) {1123 /* Steal its file */1124 porigin->file = origin->file;1125 origin->file.ptr = NULL;1126 }1127 for (e = sb->ent; e; e = e->next) {1128 if (cmp_suspect(e->suspect, origin))1129 continue;1130 origin_incref(porigin);1131 origin_decref(e->suspect);1132 e->suspect = porigin;1133 }1134}11351136#define MAXPARENT 1611371138static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)1139{1140 int i, pass;1141 struct commit *commit = origin->commit;1142 struct commit_list *parent;1143 struct origin *parent_origin[MAXPARENT], *porigin;11441145 memset(parent_origin, 0, sizeof(parent_origin));11461147 /* The first pass looks for unrenamed path to optimize for1148 * common cases, then we look for renames in the second pass.1149 */1150 for (pass = 0; pass < 2; pass++) {1151 struct origin *(*find)(struct scoreboard *,1152 struct commit *, struct origin *);1153 find = pass ? find_rename : find_origin;11541155 for (i = 0, parent = commit->parents;1156 i < MAXPARENT && parent;1157 parent = parent->next, i++) {1158 struct commit *p = parent->item;1159 int j, same;11601161 if (parent_origin[i])1162 continue;1163 if (parse_commit(p))1164 continue;1165 porigin = find(sb, p, origin);1166 if (!porigin)1167 continue;1168 if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {1169 pass_whole_blame(sb, origin, porigin);1170 origin_decref(porigin);1171 goto finish;1172 }1173 for (j = same = 0; j < i; j++)1174 if (parent_origin[j] &&1175 !hashcmp(parent_origin[j]->blob_sha1,1176 porigin->blob_sha1)) {1177 same = 1;1178 break;1179 }1180 if (!same)1181 parent_origin[i] = porigin;1182 else1183 origin_decref(porigin);1184 }1185 }11861187 num_commits++;1188 for (i = 0, parent = commit->parents;1189 i < MAXPARENT && parent;1190 parent = parent->next, i++) {1191 struct origin *porigin = parent_origin[i];1192 if (!porigin)1193 continue;1194 if (pass_blame_to_parent(sb, origin, porigin))1195 goto finish;1196 }11971198 /*1199 * Optionally find moves in parents' files.1200 */1201 if (opt & PICKAXE_BLAME_MOVE)1202 for (i = 0, parent = commit->parents;1203 i < MAXPARENT && parent;1204 parent = parent->next, i++) {1205 struct origin *porigin = parent_origin[i];1206 if (!porigin)1207 continue;1208 if (find_move_in_parent(sb, origin, porigin))1209 goto finish;1210 }12111212 /*1213 * Optionally find copies from parents' files.1214 */1215 if (opt & PICKAXE_BLAME_COPY)1216 for (i = 0, parent = commit->parents;1217 i < MAXPARENT && parent;1218 parent = parent->next, i++) {1219 struct origin *porigin = parent_origin[i];1220 if (find_copy_in_parent(sb, origin, parent->item,1221 porigin, opt))1222 goto finish;1223 }12241225 finish:1226 for (i = 0; i < MAXPARENT; i++)1227 origin_decref(parent_origin[i]);1228}12291230/*1231 * Information on commits, used for output.1232 */1233struct commit_info1234{1235 char *author;1236 char *author_mail;1237 unsigned long author_time;1238 char *author_tz;12391240 /* filled only when asked for details */1241 char *committer;1242 char *committer_mail;1243 unsigned long committer_time;1244 char *committer_tz;12451246 char *summary;1247};12481249/*1250 * Parse author/committer line in the commit object buffer1251 */1252static void get_ac_line(const char *inbuf, const char *what,1253 int bufsz, char *person, char **mail,1254 unsigned long *time, char **tz)1255{1256 int len;1257 char *tmp, *endp;12581259 tmp = strstr(inbuf, what);1260 if (!tmp)1261 goto error_out;1262 tmp += strlen(what);1263 endp = strchr(tmp, '\n');1264 if (!endp)1265 len = strlen(tmp);1266 else1267 len = endp - tmp;1268 if (bufsz <= len) {1269 error_out:1270 /* Ugh */1271 person = *mail = *tz = "(unknown)";1272 *time = 0;1273 return;1274 }1275 memcpy(person, tmp, len);12761277 tmp = person;1278 tmp += len;1279 *tmp = 0;1280 while (*tmp != ' ')1281 tmp--;1282 *tz = tmp+1;12831284 *tmp = 0;1285 while (*tmp != ' ')1286 tmp--;1287 *time = strtoul(tmp, NULL, 10);12881289 *tmp = 0;1290 while (*tmp != ' ')1291 tmp--;1292 *mail = tmp + 1;1293 *tmp = 0;1294}12951296static void get_commit_info(struct commit *commit,1297 struct commit_info *ret,1298 int detailed)1299{1300 int len;1301 char *tmp, *endp;1302 static char author_buf[1024];1303 static char committer_buf[1024];1304 static char summary_buf[1024];13051306 /*1307 * We've operated without save_commit_buffer, so1308 * we now need to populate them for output.1309 */1310 if (!commit->buffer) {1311 char type[20];1312 unsigned long size;1313 commit->buffer =1314 read_sha1_file(commit->object.sha1, type, &size);1315 }1316 ret->author = author_buf;1317 get_ac_line(commit->buffer, "\nauthor ",1318 sizeof(author_buf), author_buf, &ret->author_mail,1319 &ret->author_time, &ret->author_tz);13201321 if (!detailed)1322 return;13231324 ret->committer = committer_buf;1325 get_ac_line(commit->buffer, "\ncommitter ",1326 sizeof(committer_buf), committer_buf, &ret->committer_mail,1327 &ret->committer_time, &ret->committer_tz);13281329 ret->summary = summary_buf;1330 tmp = strstr(commit->buffer, "\n\n");1331 if (!tmp) {1332 error_out:1333 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));1334 return;1335 }1336 tmp += 2;1337 endp = strchr(tmp, '\n');1338 if (!endp)1339 goto error_out;1340 len = endp - tmp;1341 if (len >= sizeof(summary_buf))1342 goto error_out;1343 memcpy(summary_buf, tmp, len);1344 summary_buf[len] = 0;1345}13461347/*1348 * To allow LF and other nonportable characters in pathnames,1349 * they are c-style quoted as needed.1350 */1351static void write_filename_info(const char *path)1352{1353 printf("filename ");1354 write_name_quoted(NULL, 0, path, 1, stdout);1355 putchar('\n');1356}13571358/*1359 * The blame_entry is found to be guilty for the range. Mark it1360 * as such, and show it in incremental output.1361 */1362static void found_guilty_entry(struct blame_entry *ent)1363{1364 if (ent->guilty)1365 return;1366 ent->guilty = 1;1367 if (incremental) {1368 struct origin *suspect = ent->suspect;13691370 printf("%s %d %d %d\n",1371 sha1_to_hex(suspect->commit->object.sha1),1372 ent->s_lno + 1, ent->lno + 1, ent->num_lines);1373 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {1374 struct commit_info ci;1375 suspect->commit->object.flags |= METAINFO_SHOWN;1376 get_commit_info(suspect->commit, &ci, 1);1377 printf("author %s\n", ci.author);1378 printf("author-mail %s\n", ci.author_mail);1379 printf("author-time %lu\n", ci.author_time);1380 printf("author-tz %s\n", ci.author_tz);1381 printf("committer %s\n", ci.committer);1382 printf("committer-mail %s\n", ci.committer_mail);1383 printf("committer-time %lu\n", ci.committer_time);1384 printf("committer-tz %s\n", ci.committer_tz);1385 printf("summary %s\n", ci.summary);1386 if (suspect->commit->object.flags & UNINTERESTING)1387 printf("boundary\n");1388 }1389 write_filename_info(suspect->path);1390 }1391}13921393/*1394 * The main loop -- while the scoreboard has lines whose true origin1395 * is still unknown, pick one blame_entry, and allow its current1396 * suspect to pass blames to its parents.1397 */1398static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)1399{1400 while (1) {1401 struct blame_entry *ent;1402 struct commit *commit;1403 struct origin *suspect = NULL;14041405 /* find one suspect to break down */1406 for (ent = sb->ent; !suspect && ent; ent = ent->next)1407 if (!ent->guilty)1408 suspect = ent->suspect;1409 if (!suspect)1410 return; /* all done */14111412 /*1413 * We will use this suspect later in the loop,1414 * so hold onto it in the meantime.1415 */1416 origin_incref(suspect);1417 commit = suspect->commit;1418 if (!commit->object.parsed)1419 parse_commit(commit);1420 if (!(commit->object.flags & UNINTERESTING) &&1421 !(revs->max_age != -1 && commit->date < revs->max_age))1422 pass_blame(sb, suspect, opt);1423 else {1424 commit->object.flags |= UNINTERESTING;1425 if (commit->object.parsed)1426 mark_parents_uninteresting(commit);1427 }1428 /* treat root commit as boundary */1429 if (!commit->parents && !show_root)1430 commit->object.flags |= UNINTERESTING;14311432 /* Take responsibility for the remaining entries */1433 for (ent = sb->ent; ent; ent = ent->next)1434 if (!cmp_suspect(ent->suspect, suspect))1435 found_guilty_entry(ent);1436 origin_decref(suspect);14371438 if (DEBUG) /* sanity */1439 sanity_check_refcnt(sb);1440 }1441}14421443static const char *format_time(unsigned long time, const char *tz_str,1444 int show_raw_time)1445{1446 static char time_buf[128];1447 time_t t = time;1448 int minutes, tz;1449 struct tm *tm;14501451 if (show_raw_time) {1452 sprintf(time_buf, "%lu %s", time, tz_str);1453 return time_buf;1454 }14551456 tz = atoi(tz_str);1457 minutes = tz < 0 ? -tz : tz;1458 minutes = (minutes / 100)*60 + (minutes % 100);1459 minutes = tz < 0 ? -minutes : minutes;1460 t = time + minutes * 60;1461 tm = gmtime(&t);14621463 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);1464 strcat(time_buf, tz_str);1465 return time_buf;1466}14671468#define OUTPUT_ANNOTATE_COMPAT 0011469#define OUTPUT_LONG_OBJECT_NAME 0021470#define OUTPUT_RAW_TIMESTAMP 0041471#define OUTPUT_PORCELAIN 0101472#define OUTPUT_SHOW_NAME 0201473#define OUTPUT_SHOW_NUMBER 0401474#define OUTPUT_SHOW_SCORE 010014751476static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)1477{1478 int cnt;1479 const char *cp;1480 struct origin *suspect = ent->suspect;1481 char hex[41];14821483 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));1484 printf("%s%c%d %d %d\n",1485 hex,1486 ent->guilty ? ' ' : '*', // purely for debugging1487 ent->s_lno + 1,1488 ent->lno + 1,1489 ent->num_lines);1490 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {1491 struct commit_info ci;1492 suspect->commit->object.flags |= METAINFO_SHOWN;1493 get_commit_info(suspect->commit, &ci, 1);1494 printf("author %s\n", ci.author);1495 printf("author-mail %s\n", ci.author_mail);1496 printf("author-time %lu\n", ci.author_time);1497 printf("author-tz %s\n", ci.author_tz);1498 printf("committer %s\n", ci.committer);1499 printf("committer-mail %s\n", ci.committer_mail);1500 printf("committer-time %lu\n", ci.committer_time);1501 printf("committer-tz %s\n", ci.committer_tz);1502 write_filename_info(suspect->path);1503 printf("summary %s\n", ci.summary);1504 if (suspect->commit->object.flags & UNINTERESTING)1505 printf("boundary\n");1506 }1507 else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)1508 write_filename_info(suspect->path);15091510 cp = nth_line(sb, ent->lno);1511 for (cnt = 0; cnt < ent->num_lines; cnt++) {1512 char ch;1513 if (cnt)1514 printf("%s %d %d\n", hex,1515 ent->s_lno + 1 + cnt,1516 ent->lno + 1 + cnt);1517 putchar('\t');1518 do {1519 ch = *cp++;1520 putchar(ch);1521 } while (ch != '\n' &&1522 cp < sb->final_buf + sb->final_buf_size);1523 }1524}15251526static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)1527{1528 int cnt;1529 const char *cp;1530 struct origin *suspect = ent->suspect;1531 struct commit_info ci;1532 char hex[41];1533 int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);15341535 get_commit_info(suspect->commit, &ci, 1);1536 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));15371538 cp = nth_line(sb, ent->lno);1539 for (cnt = 0; cnt < ent->num_lines; cnt++) {1540 char ch;1541 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;15421543 if (suspect->commit->object.flags & UNINTERESTING) {1544 if (!blank_boundary) {1545 length--;1546 putchar('^');1547 }1548 else1549 memset(hex, ' ', length);1550 }15511552 printf("%.*s", length, hex);1553 if (opt & OUTPUT_ANNOTATE_COMPAT)1554 printf("\t(%10s\t%10s\t%d)", ci.author,1555 format_time(ci.author_time, ci.author_tz,1556 show_raw_time),1557 ent->lno + 1 + cnt);1558 else {1559 if (opt & OUTPUT_SHOW_SCORE)1560 printf(" %*d %02d",1561 max_score_digits, ent->score,1562 ent->suspect->refcnt);1563 if (opt & OUTPUT_SHOW_NAME)1564 printf(" %-*.*s", longest_file, longest_file,1565 suspect->path);1566 if (opt & OUTPUT_SHOW_NUMBER)1567 printf(" %*d", max_orig_digits,1568 ent->s_lno + 1 + cnt);1569 printf(" (%-*.*s %10s %*d) ",1570 longest_author, longest_author, ci.author,1571 format_time(ci.author_time, ci.author_tz,1572 show_raw_time),1573 max_digits, ent->lno + 1 + cnt);1574 }1575 do {1576 ch = *cp++;1577 putchar(ch);1578 } while (ch != '\n' &&1579 cp < sb->final_buf + sb->final_buf_size);1580 }1581}15821583static void output(struct scoreboard *sb, int option)1584{1585 struct blame_entry *ent;15861587 if (option & OUTPUT_PORCELAIN) {1588 for (ent = sb->ent; ent; ent = ent->next) {1589 struct blame_entry *oth;1590 struct origin *suspect = ent->suspect;1591 struct commit *commit = suspect->commit;1592 if (commit->object.flags & MORE_THAN_ONE_PATH)1593 continue;1594 for (oth = ent->next; oth; oth = oth->next) {1595 if ((oth->suspect->commit != commit) ||1596 !strcmp(oth->suspect->path, suspect->path))1597 continue;1598 commit->object.flags |= MORE_THAN_ONE_PATH;1599 break;1600 }1601 }1602 }16031604 for (ent = sb->ent; ent; ent = ent->next) {1605 if (option & OUTPUT_PORCELAIN)1606 emit_porcelain(sb, ent);1607 else {1608 emit_other(sb, ent, option);1609 }1610 }1611}16121613/*1614 * To allow quick access to the contents of nth line in the1615 * final image, prepare an index in the scoreboard.1616 */1617static int prepare_lines(struct scoreboard *sb)1618{1619 const char *buf = sb->final_buf;1620 unsigned long len = sb->final_buf_size;1621 int num = 0, incomplete = 0, bol = 1;16221623 if (len && buf[len-1] != '\n')1624 incomplete++; /* incomplete line at the end */1625 while (len--) {1626 if (bol) {1627 sb->lineno = xrealloc(sb->lineno,1628 sizeof(int* ) * (num + 1));1629 sb->lineno[num] = buf - sb->final_buf;1630 bol = 0;1631 }1632 if (*buf++ == '\n') {1633 num++;1634 bol = 1;1635 }1636 }1637 sb->lineno = xrealloc(sb->lineno,1638 sizeof(int* ) * (num + incomplete + 1));1639 sb->lineno[num + incomplete] = buf - sb->final_buf;1640 sb->num_lines = num + incomplete;1641 return sb->num_lines;1642}16431644/*1645 * Add phony grafts for use with -S; this is primarily to1646 * support git-cvsserver that wants to give a linear history1647 * to its clients.1648 */1649static int read_ancestry(const char *graft_file)1650{1651 FILE *fp = fopen(graft_file, "r");1652 char buf[1024];1653 if (!fp)1654 return -1;1655 while (fgets(buf, sizeof(buf), fp)) {1656 /* The format is just "Commit Parent1 Parent2 ...\n" */1657 int len = strlen(buf);1658 struct commit_graft *graft = read_graft_line(buf, len);1659 if (graft)1660 register_commit_graft(graft, 0);1661 }1662 fclose(fp);1663 return 0;1664}16651666/*1667 * How many columns do we need to show line numbers in decimal?1668 */1669static int lineno_width(int lines)1670{1671 int i, width;16721673 for (width = 1, i = 10; i <= lines + 1; width++)1674 i *= 10;1675 return width;1676}16771678/*1679 * How many columns do we need to show line numbers, authors,1680 * and filenames?1681 */1682static void find_alignment(struct scoreboard *sb, int *option)1683{1684 int longest_src_lines = 0;1685 int longest_dst_lines = 0;1686 unsigned largest_score = 0;1687 struct blame_entry *e;16881689 for (e = sb->ent; e; e = e->next) {1690 struct origin *suspect = e->suspect;1691 struct commit_info ci;1692 int num;16931694 if (strcmp(suspect->path, sb->path))1695 *option |= OUTPUT_SHOW_NAME;1696 num = strlen(suspect->path);1697 if (longest_file < num)1698 longest_file = num;1699 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {1700 suspect->commit->object.flags |= METAINFO_SHOWN;1701 get_commit_info(suspect->commit, &ci, 1);1702 num = strlen(ci.author);1703 if (longest_author < num)1704 longest_author = num;1705 }1706 num = e->s_lno + e->num_lines;1707 if (longest_src_lines < num)1708 longest_src_lines = num;1709 num = e->lno + e->num_lines;1710 if (longest_dst_lines < num)1711 longest_dst_lines = num;1712 if (largest_score < ent_score(sb, e))1713 largest_score = ent_score(sb, e);1714 }1715 max_orig_digits = lineno_width(longest_src_lines);1716 max_digits = lineno_width(longest_dst_lines);1717 max_score_digits = lineno_width(largest_score);1718}17191720/*1721 * For debugging -- origin is refcounted, and this asserts that1722 * we do not underflow.1723 */1724static void sanity_check_refcnt(struct scoreboard *sb)1725{1726 int baa = 0;1727 struct blame_entry *ent;17281729 for (ent = sb->ent; ent; ent = ent->next) {1730 /* Nobody should have zero or negative refcnt */1731 if (ent->suspect->refcnt <= 0) {1732 fprintf(stderr, "%s in %s has negative refcnt %d\n",1733 ent->suspect->path,1734 sha1_to_hex(ent->suspect->commit->object.sha1),1735 ent->suspect->refcnt);1736 baa = 1;1737 }1738 }1739 for (ent = sb->ent; ent; ent = ent->next) {1740 /* Mark the ones that haven't been checked */1741 if (0 < ent->suspect->refcnt)1742 ent->suspect->refcnt = -ent->suspect->refcnt;1743 }1744 for (ent = sb->ent; ent; ent = ent->next) {1745 /*1746 * ... then pick each and see if they have the the1747 * correct refcnt.1748 */1749 int found;1750 struct blame_entry *e;1751 struct origin *suspect = ent->suspect;17521753 if (0 < suspect->refcnt)1754 continue;1755 suspect->refcnt = -suspect->refcnt; /* Unmark */1756 for (found = 0, e = sb->ent; e; e = e->next) {1757 if (e->suspect != suspect)1758 continue;1759 found++;1760 }1761 if (suspect->refcnt != found) {1762 fprintf(stderr, "%s in %s has refcnt %d, not %d\n",1763 ent->suspect->path,1764 sha1_to_hex(ent->suspect->commit->object.sha1),1765 ent->suspect->refcnt, found);1766 baa = 2;1767 }1768 }1769 if (baa) {1770 int opt = 0160;1771 find_alignment(sb, &opt);1772 output(sb, opt);1773 die("Baa %d!", baa);1774 }1775}17761777/*1778 * Used for the command line parsing; check if the path exists1779 * in the working tree.1780 */1781static int has_path_in_work_tree(const char *path)1782{1783 struct stat st;1784 return !lstat(path, &st);1785}17861787static unsigned parse_score(const char *arg)1788{1789 char *end;1790 unsigned long score = strtoul(arg, &end, 10);1791 if (*end)1792 return 0;1793 return score;1794}17951796static const char *add_prefix(const char *prefix, const char *path)1797{1798 if (!prefix || !prefix[0])1799 return path;1800 return prefix_path(prefix, strlen(prefix), path);1801}18021803/*1804 * Parsing of (comma separated) one item in the -L option1805 */1806static const char *parse_loc(const char *spec,1807 struct scoreboard *sb, long lno,1808 long begin, long *ret)1809{1810 char *term;1811 const char *line;1812 long num;1813 int reg_error;1814 regex_t regexp;1815 regmatch_t match[1];18161817 /* Allow "-L <something>,+20" to mean starting at <something>1818 * for 20 lines, or "-L <something>,-5" for 5 lines ending at1819 * <something>.1820 */1821 if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {1822 num = strtol(spec + 1, &term, 10);1823 if (term != spec + 1) {1824 if (spec[0] == '-')1825 num = 0 - num;1826 if (0 < num)1827 *ret = begin + num - 2;1828 else if (!num)1829 *ret = begin;1830 else1831 *ret = begin + num;1832 return term;1833 }1834 return spec;1835 }1836 num = strtol(spec, &term, 10);1837 if (term != spec) {1838 *ret = num;1839 return term;1840 }1841 if (spec[0] != '/')1842 return spec;18431844 /* it could be a regexp of form /.../ */1845 for (term = (char*) spec + 1; *term && *term != '/'; term++) {1846 if (*term == '\\')1847 term++;1848 }1849 if (*term != '/')1850 return spec;18511852 /* try [spec+1 .. term-1] as regexp */1853 *term = 0;1854 begin--; /* input is in human terms */1855 line = nth_line(sb, begin);18561857 if (!(reg_error = regcomp(®exp, spec + 1, REG_NEWLINE)) &&1858 !(reg_error = regexec(®exp, line, 1, match, 0))) {1859 const char *cp = line + match[0].rm_so;1860 const char *nline;18611862 while (begin++ < lno) {1863 nline = nth_line(sb, begin);1864 if (line <= cp && cp < nline)1865 break;1866 line = nline;1867 }1868 *ret = begin;1869 regfree(®exp);1870 *term++ = '/';1871 return term;1872 }1873 else {1874 char errbuf[1024];1875 regerror(reg_error, ®exp, errbuf, 1024);1876 die("-L parameter '%s': %s", spec + 1, errbuf);1877 }1878}18791880/*1881 * Parsing of -L option1882 */1883static void prepare_blame_range(struct scoreboard *sb,1884 const char *bottomtop,1885 long lno,1886 long *bottom, long *top)1887{1888 const char *term;18891890 term = parse_loc(bottomtop, sb, lno, 1, bottom);1891 if (*term == ',') {1892 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);1893 if (*term)1894 usage(blame_usage);1895 }1896 if (*term)1897 usage(blame_usage);1898}18991900static int git_blame_config(const char *var, const char *value)1901{1902 if (!strcmp(var, "blame.showroot")) {1903 show_root = git_config_bool(var, value);1904 return 0;1905 }1906 if (!strcmp(var, "blame.blankboundary")) {1907 blank_boundary = git_config_bool(var, value);1908 return 0;1909 }1910 return git_default_config(var, value);1911}19121913int cmd_blame(int argc, const char **argv, const char *prefix)1914{1915 struct rev_info revs;1916 const char *path;1917 struct scoreboard sb;1918 struct origin *o;1919 struct blame_entry *ent;1920 int i, seen_dashdash, unk, opt;1921 long bottom, top, lno;1922 int output_option = 0;1923 const char *revs_file = NULL;1924 const char *final_commit_name = NULL;1925 char type[10];1926 const char *bottomtop = NULL;19271928 git_config(git_blame_config);1929 save_commit_buffer = 0;19301931 opt = 0;1932 seen_dashdash = 0;1933 for (unk = i = 1; i < argc; i++) {1934 const char *arg = argv[i];1935 if (*arg != '-')1936 break;1937 else if (!strcmp("-b", arg))1938 blank_boundary = 1;1939 else if (!strcmp("--root", arg))1940 show_root = 1;1941 else if (!strcmp("-c", arg))1942 output_option |= OUTPUT_ANNOTATE_COMPAT;1943 else if (!strcmp("-t", arg))1944 output_option |= OUTPUT_RAW_TIMESTAMP;1945 else if (!strcmp("-l", arg))1946 output_option |= OUTPUT_LONG_OBJECT_NAME;1947 else if (!strcmp("-S", arg) && ++i < argc)1948 revs_file = argv[i];1949 else if (!strncmp("-M", arg, 2)) {1950 opt |= PICKAXE_BLAME_MOVE;1951 blame_move_score = parse_score(arg+2);1952 }1953 else if (!strncmp("-C", arg, 2)) {1954 if (opt & PICKAXE_BLAME_COPY)1955 opt |= PICKAXE_BLAME_COPY_HARDER;1956 opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;1957 blame_copy_score = parse_score(arg+2);1958 }1959 else if (!strncmp("-L", arg, 2)) {1960 if (!arg[2]) {1961 if (++i >= argc)1962 usage(blame_usage);1963 arg = argv[i];1964 }1965 else1966 arg += 2;1967 if (bottomtop)1968 die("More than one '-L n,m' option given");1969 bottomtop = arg;1970 }1971 else if (!strcmp("--incremental", arg))1972 incremental = 1;1973 else if (!strcmp("--score-debug", arg))1974 output_option |= OUTPUT_SHOW_SCORE;1975 else if (!strcmp("-f", arg) ||1976 !strcmp("--show-name", arg))1977 output_option |= OUTPUT_SHOW_NAME;1978 else if (!strcmp("-n", arg) ||1979 !strcmp("--show-number", arg))1980 output_option |= OUTPUT_SHOW_NUMBER;1981 else if (!strcmp("-p", arg) ||1982 !strcmp("--porcelain", arg))1983 output_option |= OUTPUT_PORCELAIN;1984 else if (!strcmp("--", arg)) {1985 seen_dashdash = 1;1986 i++;1987 break;1988 }1989 else1990 argv[unk++] = arg;1991 }19921993 if (!incremental)1994 setup_pager();19951996 if (!blame_move_score)1997 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;1998 if (!blame_copy_score)1999 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;20002001 /*2002 * We have collected options unknown to us in argv[1..unk]2003 * which are to be passed to revision machinery if we are2004 * going to do the "bottom" processing.2005 *2006 * The remaining are:2007 *2008 * (1) if seen_dashdash, its either2009 * "-options -- <path>" or2010 * "-options -- <path> <rev>".2011 * but the latter is allowed only if there is no2012 * options that we passed to revision machinery.2013 *2014 * (2) otherwise, we may have "--" somewhere later and2015 * might be looking at the first one of multiple 'rev'2016 * parameters (e.g. " master ^next ^maint -- path").2017 * See if there is a dashdash first, and give the2018 * arguments before that to revision machinery.2019 * After that there must be one 'path'.2020 *2021 * (3) otherwise, its one of the three:2022 * "-options <path> <rev>"2023 * "-options <rev> <path>"2024 * "-options <path>"2025 * but again the first one is allowed only if2026 * there is no options that we passed to revision2027 * machinery.2028 */20292030 if (seen_dashdash) {2031 /* (1) */2032 if (argc <= i)2033 usage(blame_usage);2034 path = add_prefix(prefix, argv[i]);2035 if (i + 1 == argc - 1) {2036 if (unk != 1)2037 usage(blame_usage);2038 argv[unk++] = argv[i + 1];2039 }2040 else if (i + 1 != argc)2041 /* garbage at end */2042 usage(blame_usage);2043 }2044 else {2045 int j;2046 for (j = i; !seen_dashdash && j < argc; j++)2047 if (!strcmp(argv[j], "--"))2048 seen_dashdash = j;2049 if (seen_dashdash) {2050 if (seen_dashdash + 1 != argc - 1)2051 usage(blame_usage);2052 path = add_prefix(prefix, argv[seen_dashdash + 1]);2053 for (j = i; j < seen_dashdash; j++)2054 argv[unk++] = argv[j];2055 }2056 else {2057 /* (3) */2058 path = add_prefix(prefix, argv[i]);2059 if (i + 1 == argc - 1) {2060 final_commit_name = argv[i + 1];20612062 /* if (unk == 1) we could be getting2063 * old-style2064 */2065 if (unk == 1 && !has_path_in_work_tree(path)) {2066 path = add_prefix(prefix, argv[i + 1]);2067 final_commit_name = argv[i];2068 }2069 }2070 else if (i != argc - 1)2071 usage(blame_usage); /* garbage at end */20722073 if (!has_path_in_work_tree(path))2074 die("cannot stat path %s: %s",2075 path, strerror(errno));2076 }2077 }20782079 if (final_commit_name)2080 argv[unk++] = final_commit_name;20812082 /*2083 * Now we got rev and path. We do not want the path pruning2084 * but we may want "bottom" processing.2085 */2086 argv[unk++] = "--"; /* terminate the rev name */2087 argv[unk] = NULL;20882089 init_revisions(&revs, NULL);2090 setup_revisions(unk, argv, &revs, "HEAD");2091 memset(&sb, 0, sizeof(sb));20922093 /*2094 * There must be one and only one positive commit in the2095 * revs->pending array.2096 */2097 for (i = 0; i < revs.pending.nr; i++) {2098 struct object *obj = revs.pending.objects[i].item;2099 if (obj->flags & UNINTERESTING)2100 continue;2101 while (obj->type == OBJ_TAG)2102 obj = deref_tag(obj, NULL, 0);2103 if (obj->type != OBJ_COMMIT)2104 die("Non commit %s?",2105 revs.pending.objects[i].name);2106 if (sb.final)2107 die("More than one commit to dig from %s and %s?",2108 revs.pending.objects[i].name,2109 final_commit_name);2110 sb.final = (struct commit *) obj;2111 final_commit_name = revs.pending.objects[i].name;2112 }21132114 if (!sb.final) {2115 /*2116 * "--not A B -- path" without anything positive;2117 * default to HEAD.2118 */2119 unsigned char head_sha1[20];21202121 final_commit_name = "HEAD";2122 if (get_sha1(final_commit_name, head_sha1))2123 die("No such ref: HEAD");2124 sb.final = lookup_commit_reference(head_sha1);2125 add_pending_object(&revs, &(sb.final->object), "HEAD");2126 }21272128 /*2129 * If we have bottom, this will mark the ancestors of the2130 * bottom commits we would reach while traversing as2131 * uninteresting.2132 */2133 prepare_revision_walk(&revs);21342135 o = get_origin(&sb, sb.final, path);2136 if (fill_blob_sha1(o))2137 die("no such path %s in %s", path, final_commit_name);21382139 sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);2140 num_read_blob++;2141 lno = prepare_lines(&sb);21422143 bottom = top = 0;2144 if (bottomtop)2145 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);2146 if (bottom && top && top < bottom) {2147 long tmp;2148 tmp = top; top = bottom; bottom = tmp;2149 }2150 if (bottom < 1)2151 bottom = 1;2152 if (top < 1)2153 top = lno;2154 bottom--;2155 if (lno < top)2156 die("file %s has only %lu lines", path, lno);21572158 ent = xcalloc(1, sizeof(*ent));2159 ent->lno = bottom;2160 ent->num_lines = top - bottom;2161 ent->suspect = o;2162 ent->s_lno = bottom;21632164 sb.ent = ent;2165 sb.path = path;21662167 if (revs_file && read_ancestry(revs_file))2168 die("reading graft file %s failed: %s",2169 revs_file, strerror(errno));21702171 assign_blame(&sb, &revs, opt);21722173 if (incremental)2174 return 0;21752176 coalesce(&sb);21772178 if (!(output_option & OUTPUT_PORCELAIN))2179 find_alignment(&sb, &output_option);21802181 output(&sb, output_option);2182 free((void *)sb.final_buf);2183 for (ent = sb.ent; ent; ) {2184 struct blame_entry *e = ent->next;2185 free(ent);2186 ent = e;2187 }21882189 if (DEBUG) {2190 printf("num read blob: %d\n", num_read_blob);2191 printf("num get patch: %d\n", num_get_patch);2192 printf("num commits: %d\n", num_commits);2193 }2194 return 0;2195}