e5a50b92786a56b09b39034eac0f821ead4ae0fb
1/*
2 * Pickaxe
3 *
4 * Copyright (c) 2006, Junio C Hamano
5 */
6
7#include "cache.h"
8#include "builtin.h"
9#include "blob.h"
10#include "commit.h"
11#include "tag.h"
12#include "tree-walk.h"
13#include "diff.h"
14#include "diffcore.h"
15#include "revision.h"
16#include "xdiff-interface.h"
17
18#include <time.h>
19#include <sys/time.h>
20
21static char pickaxe_usage[] =
22"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
23" -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
24" -l, --long Show long commit SHA1 (Default: off)\n"
25" -t, --time Show raw timestamp (Default: off)\n"
26" -f, --show-name Show original filename (Default: auto)\n"
27" -n, --show-number Show original linenumber (Default: off)\n"
28" -p, --porcelain Show in a format designed for machine consumption\n"
29" -L n,m Process only line range n,m, counting from 1\n"
30" -M, -C Find line movements within and across files\n"
31" -S revs-file Use revisions from revs-file instead of calling git-rev-list\n";
32
33static int longest_file;
34static int longest_author;
35static int max_orig_digits;
36static int max_digits;
37static int max_score_digits;
38
39#define PICKAXE_BLAME_MOVE 01
40#define PICKAXE_BLAME_COPY 02
41#define PICKAXE_BLAME_COPY_HARDER 04
42
43/*
44 * blame for a blame_entry with score lower than these thresholds
45 * is not passed to the parent using move/copy logic.
46 */
47static unsigned blame_move_score;
48static unsigned blame_copy_score;
49#define BLAME_DEFAULT_MOVE_SCORE 20
50#define BLAME_DEFAULT_COPY_SCORE 40
51
52/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
53#define METAINFO_SHOWN (1u<<12)
54#define MORE_THAN_ONE_PATH (1u<<13)
55
56/*
57 * One blob in a commit
58 */
59struct origin {
60 struct commit *commit;
61 unsigned char blob_sha1[20];
62 char path[FLEX_ARRAY];
63};
64
65struct blame_entry {
66 struct blame_entry *prev;
67 struct blame_entry *next;
68
69 /* the first line of this group in the final image;
70 * internally all line numbers are 0 based.
71 */
72 int lno;
73
74 /* how many lines this group has */
75 int num_lines;
76
77 /* the commit that introduced this group into the final image */
78 struct origin *suspect;
79
80 /* true if the suspect is truly guilty; false while we have not
81 * checked if the group came from one of its parents.
82 */
83 char guilty;
84
85 /* the line number of the first line of this group in the
86 * suspect's file; internally all line numbers are 0 based.
87 */
88 int s_lno;
89
90 /* how significant this entry is -- cached to avoid
91 * scanning the lines over and over
92 */
93 unsigned score;
94};
95
96struct scoreboard {
97 /* the final commit (i.e. where we started digging from) */
98 struct commit *final;
99
100 const char *path;
101
102 /* the contents in the final; pointed into by buf pointers of
103 * blame_entries
104 */
105 const char *final_buf;
106 unsigned long final_buf_size;
107
108 /* linked list of blames */
109 struct blame_entry *ent;
110
111 int num_lines;
112 int *lineno;
113};
114
115static void coalesce(struct scoreboard *sb)
116{
117 struct blame_entry *ent, *next;
118
119 for (ent = sb->ent; ent && (next = ent->next); ent = next) {
120 if (ent->suspect == next->suspect &&
121 ent->guilty == next->guilty &&
122 ent->s_lno + ent->num_lines == next->s_lno) {
123 ent->num_lines += next->num_lines;
124 ent->next = next->next;
125 if (ent->next)
126 ent->next->prev = ent;
127 free(next);
128 next = ent; /* again */
129 }
130 }
131}
132
133static void free_origin(struct origin *o)
134{
135 free(o);
136}
137
138static struct origin *find_origin(struct scoreboard *sb,
139 struct commit *commit,
140 const char *path)
141{
142 struct blame_entry *ent;
143 struct origin *o;
144 unsigned mode;
145 char type[10];
146
147 for (ent = sb->ent; ent; ent = ent->next) {
148 if (ent->suspect->commit == commit &&
149 !strcmp(ent->suspect->path, path))
150 return ent->suspect;
151 }
152
153 o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
154 o->commit = commit;
155 strcpy(o->path, path);
156 if (get_tree_entry(commit->object.sha1, path, o->blob_sha1, &mode))
157 goto err_out;
158 if (sha1_object_info(o->blob_sha1, type, NULL) ||
159 strcmp(type, blob_type))
160 goto err_out;
161 return o;
162 err_out:
163 free_origin(o);
164 return NULL;
165}
166
167static struct origin *find_rename(struct scoreboard *sb,
168 struct commit *parent,
169 struct origin *origin)
170{
171 struct origin *porigin = NULL;
172 struct diff_options diff_opts;
173 int i;
174 const char *paths[1];
175
176 diff_setup(&diff_opts);
177 diff_opts.recursive = 1;
178 diff_opts.detect_rename = DIFF_DETECT_RENAME;
179 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
180 paths[0] = NULL;
181 diff_tree_setup_paths(paths, &diff_opts);
182 if (diff_setup_done(&diff_opts) < 0)
183 die("diff-setup");
184 diff_tree_sha1(origin->commit->tree->object.sha1,
185 parent->tree->object.sha1,
186 "", &diff_opts);
187 diffcore_std(&diff_opts);
188
189 for (i = 0; i < diff_queued_diff.nr; i++) {
190 struct diff_filepair *p = diff_queued_diff.queue[i];
191 if (p->status == 'R' && !strcmp(p->one->path, origin->path)) {
192 porigin = find_origin(sb, parent, p->two->path);
193 break;
194 }
195 }
196 diff_flush(&diff_opts);
197 return porigin;
198}
199
200struct chunk {
201 /* line number in postimage; up to but not including this
202 * line is the same as preimage
203 */
204 int same;
205
206 /* preimage line number after this chunk */
207 int p_next;
208
209 /* postimage line number after this chunk */
210 int t_next;
211};
212
213struct patch {
214 struct chunk *chunks;
215 int num;
216};
217
218struct blame_diff_state {
219 struct xdiff_emit_state xm;
220 struct patch *ret;
221 unsigned hunk_post_context;
222 unsigned hunk_in_pre_context : 1;
223};
224
225static void process_u_diff(void *state_, char *line, unsigned long len)
226{
227 struct blame_diff_state *state = state_;
228 struct chunk *chunk;
229 int off1, off2, len1, len2, num;
230
231 num = state->ret->num;
232 if (len < 4 || line[0] != '@' || line[1] != '@') {
233 if (state->hunk_in_pre_context && line[0] == ' ')
234 state->ret->chunks[num - 1].same++;
235 else {
236 state->hunk_in_pre_context = 0;
237 if (line[0] == ' ')
238 state->hunk_post_context++;
239 else
240 state->hunk_post_context = 0;
241 }
242 return;
243 }
244
245 if (num && state->hunk_post_context) {
246 chunk = &state->ret->chunks[num - 1];
247 chunk->p_next -= state->hunk_post_context;
248 chunk->t_next -= state->hunk_post_context;
249 }
250 state->ret->num = ++num;
251 state->ret->chunks = xrealloc(state->ret->chunks,
252 sizeof(struct chunk) * num);
253 chunk = &state->ret->chunks[num - 1];
254 if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
255 state->ret->num--;
256 return;
257 }
258
259 /* Line numbers in patch output are one based. */
260 off1--;
261 off2--;
262
263 chunk->same = len2 ? off2 : (off2 + 1);
264
265 chunk->p_next = off1 + (len1 ? len1 : 1);
266 chunk->t_next = chunk->same + len2;
267 state->hunk_in_pre_context = 1;
268 state->hunk_post_context = 0;
269}
270
271static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
272 int context)
273{
274 struct blame_diff_state state;
275 xpparam_t xpp;
276 xdemitconf_t xecfg;
277 xdemitcb_t ecb;
278
279 xpp.flags = XDF_NEED_MINIMAL;
280 xecfg.ctxlen = context;
281 xecfg.flags = 0;
282 ecb.outf = xdiff_outf;
283 ecb.priv = &state;
284 memset(&state, 0, sizeof(state));
285 state.xm.consume = process_u_diff;
286 state.ret = xmalloc(sizeof(struct patch));
287 state.ret->chunks = NULL;
288 state.ret->num = 0;
289
290 xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
291
292 if (state.ret->num) {
293 struct chunk *chunk;
294 chunk = &state.ret->chunks[state.ret->num - 1];
295 chunk->p_next -= state.hunk_post_context;
296 chunk->t_next -= state.hunk_post_context;
297 }
298 return state.ret;
299}
300
301static struct patch *get_patch(struct origin *parent, struct origin *origin)
302{
303 mmfile_t file_p, file_o;
304 char type[10];
305 char *blob_p, *blob_o;
306 struct patch *patch;
307
308 blob_p = read_sha1_file(parent->blob_sha1, type,
309 (unsigned long *) &file_p.size);
310 blob_o = read_sha1_file(origin->blob_sha1, type,
311 (unsigned long *) &file_o.size);
312 file_p.ptr = blob_p;
313 file_o.ptr = blob_o;
314 if (!file_p.ptr || !file_o.ptr) {
315 free(blob_p);
316 free(blob_o);
317 return NULL;
318 }
319
320 patch = compare_buffer(&file_p, &file_o, 0);
321 free(blob_p);
322 free(blob_o);
323 return patch;
324}
325
326static void free_patch(struct patch *p)
327{
328 free(p->chunks);
329 free(p);
330}
331
332static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
333{
334 struct blame_entry *ent, *prev = NULL;
335
336 for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
337 prev = ent;
338
339 /* prev, if not NULL, is the last one that is below e */
340 e->prev = prev;
341 if (prev) {
342 e->next = prev->next;
343 prev->next = e;
344 }
345 else {
346 e->next = sb->ent;
347 sb->ent = e;
348 }
349 if (e->next)
350 e->next->prev = e;
351}
352
353static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
354{
355 struct blame_entry *p, *n;
356 p = dst->prev;
357 n = dst->next;
358 memcpy(dst, src, sizeof(*src));
359 dst->prev = p;
360 dst->next = n;
361 dst->score = 0;
362}
363
364static const char *nth_line(struct scoreboard *sb, int lno)
365{
366 return sb->final_buf + sb->lineno[lno];
367}
368
369static void split_overlap(struct blame_entry split[3],
370 struct blame_entry *e,
371 int tlno, int plno, int same,
372 struct origin *parent)
373{
374 /* it is known that lines between tlno to same came from
375 * parent, and e has an overlap with that range. it also is
376 * known that parent's line plno corresponds to e's line tlno.
377 *
378 * <---- e ----->
379 * <------>
380 * <------------>
381 * <------------>
382 * <------------------>
383 *
384 * Potentially we need to split e into three parts; before
385 * this chunk, the chunk to be blamed for parent, and after
386 * that portion.
387 */
388 int chunk_end_lno;
389 memset(split, 0, sizeof(struct blame_entry [3]));
390
391 if (e->s_lno < tlno) {
392 /* there is a pre-chunk part not blamed on parent */
393 split[0].suspect = e->suspect;
394 split[0].lno = e->lno;
395 split[0].s_lno = e->s_lno;
396 split[0].num_lines = tlno - e->s_lno;
397 split[1].lno = e->lno + tlno - e->s_lno;
398 split[1].s_lno = plno;
399 }
400 else {
401 split[1].lno = e->lno;
402 split[1].s_lno = plno + (e->s_lno - tlno);
403 }
404
405 if (same < e->s_lno + e->num_lines) {
406 /* there is a post-chunk part not blamed on parent */
407 split[2].suspect = e->suspect;
408 split[2].lno = e->lno + (same - e->s_lno);
409 split[2].s_lno = e->s_lno + (same - e->s_lno);
410 split[2].num_lines = e->s_lno + e->num_lines - same;
411 chunk_end_lno = split[2].lno;
412 }
413 else
414 chunk_end_lno = e->lno + e->num_lines;
415 split[1].num_lines = chunk_end_lno - split[1].lno;
416
417 if (split[1].num_lines < 1)
418 return;
419 split[1].suspect = parent;
420}
421
422static void split_blame(struct scoreboard *sb,
423 struct blame_entry split[3],
424 struct blame_entry *e)
425{
426 struct blame_entry *new_entry;
427
428 if (split[0].suspect && split[2].suspect) {
429 /* we need to split e into two and add another for parent */
430 dup_entry(e, &split[0]);
431
432 new_entry = xmalloc(sizeof(*new_entry));
433 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
434 add_blame_entry(sb, new_entry);
435
436 new_entry = xmalloc(sizeof(*new_entry));
437 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
438 add_blame_entry(sb, new_entry);
439 }
440 else if (!split[0].suspect && !split[2].suspect)
441 /* parent covers the entire area */
442 dup_entry(e, &split[1]);
443 else if (split[0].suspect) {
444 dup_entry(e, &split[0]);
445
446 new_entry = xmalloc(sizeof(*new_entry));
447 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
448 add_blame_entry(sb, new_entry);
449 }
450 else {
451 dup_entry(e, &split[1]);
452
453 new_entry = xmalloc(sizeof(*new_entry));
454 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
455 add_blame_entry(sb, new_entry);
456 }
457
458 if (1) { /* sanity */
459 struct blame_entry *ent;
460 int lno = 0, corrupt = 0;
461
462 for (ent = sb->ent; ent; ent = ent->next) {
463 if (lno != ent->lno)
464 corrupt = 1;
465 if (ent->s_lno < 0)
466 corrupt = 1;
467 lno += ent->num_lines;
468 }
469 if (corrupt) {
470 lno = 0;
471 for (ent = sb->ent; ent; ent = ent->next) {
472 printf("L %8d l %8d n %8d\n",
473 lno, ent->lno, ent->num_lines);
474 lno = ent->lno + ent->num_lines;
475 }
476 die("oops");
477 }
478 }
479}
480
481static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
482 int tlno, int plno, int same,
483 struct origin *parent)
484{
485 struct blame_entry split[3];
486
487 split_overlap(split, e, tlno, plno, same, parent);
488 if (!split[1].suspect)
489 return;
490 split_blame(sb, split, e);
491}
492
493static int find_last_in_target(struct scoreboard *sb, struct origin *target)
494{
495 struct blame_entry *e;
496 int last_in_target = -1;
497
498 for (e = sb->ent; e; e = e->next) {
499 if (e->guilty || e->suspect != target)
500 continue;
501 if (last_in_target < e->s_lno + e->num_lines)
502 last_in_target = e->s_lno + e->num_lines;
503 }
504 return last_in_target;
505}
506
507static void blame_chunk(struct scoreboard *sb,
508 int tlno, int plno, int same,
509 struct origin *target, struct origin *parent)
510{
511 struct blame_entry *e, *n;
512
513 for (e = sb->ent; e; e = n) {
514 n = e->next;
515 if (e->guilty || e->suspect != target)
516 continue;
517 if (same <= e->s_lno)
518 continue;
519 if (tlno < e->s_lno + e->num_lines)
520 blame_overlap(sb, e, tlno, plno, same, parent);
521 }
522}
523
524static int pass_blame_to_parent(struct scoreboard *sb,
525 struct origin *target,
526 struct origin *parent)
527{
528 int i, last_in_target, plno, tlno;
529 struct patch *patch;
530
531 last_in_target = find_last_in_target(sb, target);
532 if (last_in_target < 0)
533 return 1; /* nothing remains for this target */
534
535 patch = get_patch(parent, target);
536 plno = tlno = 0;
537 for (i = 0; i < patch->num; i++) {
538 struct chunk *chunk = &patch->chunks[i];
539
540 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
541 plno = chunk->p_next;
542 tlno = chunk->t_next;
543 }
544 /* rest (i.e. anything above tlno) are the same as parent */
545 blame_chunk(sb, tlno, plno, last_in_target, target, parent);
546
547 free_patch(patch);
548 return 0;
549}
550
551static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
552{
553 unsigned score;
554 const char *cp, *ep;
555
556 if (e->score)
557 return e->score;
558
559 score = 0;
560 cp = nth_line(sb, e->lno);
561 ep = nth_line(sb, e->lno + e->num_lines);
562 while (cp < ep) {
563 unsigned ch = *((unsigned char *)cp);
564 if (isalnum(ch))
565 score++;
566 cp++;
567 }
568 e->score = score;
569 return score;
570}
571
572static void copy_split_if_better(struct scoreboard *sb,
573 struct blame_entry best_so_far[3],
574 struct blame_entry this[3])
575{
576 if (!this[1].suspect)
577 return;
578 if (best_so_far[1].suspect) {
579 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
580 return;
581 }
582 memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
583}
584
585static void find_copy_in_blob(struct scoreboard *sb,
586 struct blame_entry *ent,
587 struct origin *parent,
588 struct blame_entry split[3],
589 mmfile_t *file_p)
590{
591 const char *cp;
592 int cnt;
593 mmfile_t file_o;
594 struct patch *patch;
595 int i, plno, tlno;
596
597 cp = nth_line(sb, ent->lno);
598 file_o.ptr = (char*) cp;
599 cnt = ent->num_lines;
600
601 while (cnt && cp < sb->final_buf + sb->final_buf_size) {
602 if (*cp++ == '\n')
603 cnt--;
604 }
605 file_o.size = cp - file_o.ptr;
606
607 patch = compare_buffer(file_p, &file_o, 1);
608
609 memset(split, 0, sizeof(struct blame_entry [3]));
610 plno = tlno = 0;
611 for (i = 0; i < patch->num; i++) {
612 struct chunk *chunk = &patch->chunks[i];
613
614 /* tlno to chunk->same are the same as ent */
615 if (ent->num_lines <= tlno)
616 break;
617 if (tlno < chunk->same) {
618 struct blame_entry this[3];
619 split_overlap(this, ent,
620 tlno + ent->s_lno, plno,
621 chunk->same + ent->s_lno,
622 parent);
623 copy_split_if_better(sb, split, this);
624 }
625 plno = chunk->p_next;
626 tlno = chunk->t_next;
627 }
628 free_patch(patch);
629}
630
631static int find_move_in_parent(struct scoreboard *sb,
632 struct origin *target,
633 struct origin *parent)
634{
635 int last_in_target;
636 struct blame_entry *ent, split[3];
637 mmfile_t file_p;
638 char type[10];
639 char *blob_p;
640
641 last_in_target = find_last_in_target(sb, target);
642 if (last_in_target < 0)
643 return 1; /* nothing remains for this target */
644
645 blob_p = read_sha1_file(parent->blob_sha1, type,
646 (unsigned long *) &file_p.size);
647 file_p.ptr = blob_p;
648 if (!file_p.ptr) {
649 free(blob_p);
650 return 0;
651 }
652
653 for (ent = sb->ent; ent; ent = ent->next) {
654 if (ent->suspect != target || ent->guilty)
655 continue;
656 find_copy_in_blob(sb, ent, parent, split, &file_p);
657 if (split[1].suspect &&
658 blame_move_score < ent_score(sb, &split[1]))
659 split_blame(sb, split, ent);
660 }
661 free(blob_p);
662 return 0;
663}
664
665static int find_copy_in_parent(struct scoreboard *sb,
666 struct origin *target,
667 struct commit *parent,
668 struct origin *porigin,
669 int opt)
670{
671 struct diff_options diff_opts;
672 const char *paths[1];
673 struct blame_entry *ent;
674 int i;
675
676 if (find_last_in_target(sb, target) < 0)
677 return 1; /* nothing remains for this target */
678
679 diff_setup(&diff_opts);
680 diff_opts.recursive = 1;
681 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
682
683 /* Try "find copies harder" on new path */
684 if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
685 (!porigin || strcmp(target->path, porigin->path))) {
686 diff_opts.detect_rename = DIFF_DETECT_COPY;
687 diff_opts.find_copies_harder = 1;
688 }
689 paths[0] = NULL;
690 diff_tree_setup_paths(paths, &diff_opts);
691 if (diff_setup_done(&diff_opts) < 0)
692 die("diff-setup");
693 diff_tree_sha1(parent->tree->object.sha1,
694 target->commit->tree->object.sha1,
695 "", &diff_opts);
696 diffcore_std(&diff_opts);
697
698 for (ent = sb->ent; ent; ent = ent->next) {
699 struct blame_entry split[3];
700 if (ent->suspect != target || ent->guilty)
701 continue;
702
703 memset(split, 0, sizeof(split));
704 for (i = 0; i < diff_queued_diff.nr; i++) {
705 struct diff_filepair *p = diff_queued_diff.queue[i];
706 struct origin *norigin;
707 mmfile_t file_p;
708 char type[10];
709 char *blob;
710 struct blame_entry this[3];
711
712 if (!DIFF_FILE_VALID(p->one))
713 continue; /* does not exist in parent */
714 if (porigin && !strcmp(p->one->path, porigin->path))
715 /* find_move already dealt with this path */
716 continue;
717 norigin = find_origin(sb, parent, p->one->path);
718
719 blob = read_sha1_file(norigin->blob_sha1, type,
720 (unsigned long *) &file_p.size);
721 file_p.ptr = blob;
722 if (!file_p.ptr) {
723 free(blob);
724 continue;
725 }
726 find_copy_in_blob(sb, ent, norigin, this, &file_p);
727 copy_split_if_better(sb, split, this);
728 }
729 if (split[1].suspect &&
730 blame_copy_score < ent_score(sb, &split[1]))
731 split_blame(sb, split, ent);
732 }
733 diff_flush(&diff_opts);
734
735 return 0;
736}
737
738#define MAXPARENT 16
739
740static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
741{
742 int i;
743 struct commit *commit = origin->commit;
744 struct commit_list *parent;
745 struct origin *parent_origin[MAXPARENT], *porigin;
746
747 memset(parent_origin, 0, sizeof(parent_origin));
748 for (i = 0, parent = commit->parents;
749 i < MAXPARENT && parent;
750 parent = parent->next, i++) {
751 struct commit *p = parent->item;
752
753 if (parse_commit(p))
754 continue;
755 porigin = find_origin(sb, parent->item, origin->path);
756 if (!porigin)
757 porigin = find_rename(sb, parent->item, origin);
758 if (!porigin)
759 continue;
760 if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
761 struct blame_entry *e;
762 for (e = sb->ent; e; e = e->next)
763 if (e->suspect == origin)
764 e->suspect = porigin;
765 /* now everything blamed for origin is blamed for
766 * porigin, we do not need to keep it anymore.
767 * Do not free porigin (or the ones we got from
768 * earlier round); they may still be used elsewhere.
769 */
770 free_origin(origin);
771 return;
772 }
773 parent_origin[i] = porigin;
774 }
775
776 for (i = 0, parent = commit->parents;
777 i < MAXPARENT && parent;
778 parent = parent->next, i++) {
779 struct origin *porigin = parent_origin[i];
780 if (!porigin)
781 continue;
782 if (pass_blame_to_parent(sb, origin, porigin))
783 return;
784 }
785
786 /*
787 * Optionally run "miff" to find moves in parents' files here.
788 */
789 if (opt & PICKAXE_BLAME_MOVE)
790 for (i = 0, parent = commit->parents;
791 i < MAXPARENT && parent;
792 parent = parent->next, i++) {
793 struct origin *porigin = parent_origin[i];
794 if (!porigin)
795 continue;
796 if (find_move_in_parent(sb, origin, porigin))
797 return;
798 }
799
800 /*
801 * Optionally run "ciff" to find copies from parents' files here.
802 */
803 if (opt & PICKAXE_BLAME_COPY)
804 for (i = 0, parent = commit->parents;
805 i < MAXPARENT && parent;
806 parent = parent->next, i++) {
807 struct origin *porigin = parent_origin[i];
808 if (find_copy_in_parent(sb, origin, parent->item,
809 porigin, opt))
810 return;
811 }
812}
813
814static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
815{
816 while (1) {
817 struct blame_entry *ent;
818 struct commit *commit;
819 struct origin *suspect = NULL;
820
821 /* find one suspect to break down */
822 for (ent = sb->ent; !suspect && ent; ent = ent->next)
823 if (!ent->guilty)
824 suspect = ent->suspect;
825 if (!suspect)
826 return; /* all done */
827
828 commit = suspect->commit;
829 parse_commit(commit);
830 if (!(commit->object.flags & UNINTERESTING) &&
831 !(revs->max_age != -1 && commit->date < revs->max_age))
832 pass_blame(sb, suspect, opt);
833
834 /* Take responsibility for the remaining entries */
835 for (ent = sb->ent; ent; ent = ent->next)
836 if (ent->suspect == suspect)
837 ent->guilty = 1;
838 }
839}
840
841static const char *format_time(unsigned long time, const char *tz_str,
842 int show_raw_time)
843{
844 static char time_buf[128];
845 time_t t = time;
846 int minutes, tz;
847 struct tm *tm;
848
849 if (show_raw_time) {
850 sprintf(time_buf, "%lu %s", time, tz_str);
851 return time_buf;
852 }
853
854 tz = atoi(tz_str);
855 minutes = tz < 0 ? -tz : tz;
856 minutes = (minutes / 100)*60 + (minutes % 100);
857 minutes = tz < 0 ? -minutes : minutes;
858 t = time + minutes * 60;
859 tm = gmtime(&t);
860
861 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
862 strcat(time_buf, tz_str);
863 return time_buf;
864}
865
866struct commit_info
867{
868 char *author;
869 char *author_mail;
870 unsigned long author_time;
871 char *author_tz;
872
873 /* filled only when asked for details */
874 char *committer;
875 char *committer_mail;
876 unsigned long committer_time;
877 char *committer_tz;
878
879 char *summary;
880};
881
882static void get_ac_line(const char *inbuf, const char *what,
883 int bufsz, char *person, char **mail,
884 unsigned long *time, char **tz)
885{
886 int len;
887 char *tmp, *endp;
888
889 tmp = strstr(inbuf, what);
890 if (!tmp)
891 goto error_out;
892 tmp += strlen(what);
893 endp = strchr(tmp, '\n');
894 if (!endp)
895 len = strlen(tmp);
896 else
897 len = endp - tmp;
898 if (bufsz <= len) {
899 error_out:
900 /* Ugh */
901 person = *mail = *tz = "(unknown)";
902 *time = 0;
903 return;
904 }
905 memcpy(person, tmp, len);
906
907 tmp = person;
908 tmp += len;
909 *tmp = 0;
910 while (*tmp != ' ')
911 tmp--;
912 *tz = tmp+1;
913
914 *tmp = 0;
915 while (*tmp != ' ')
916 tmp--;
917 *time = strtoul(tmp, NULL, 10);
918
919 *tmp = 0;
920 while (*tmp != ' ')
921 tmp--;
922 *mail = tmp + 1;
923 *tmp = 0;
924}
925
926static void get_commit_info(struct commit *commit,
927 struct commit_info *ret,
928 int detailed)
929{
930 int len;
931 char *tmp, *endp;
932 static char author_buf[1024];
933 static char committer_buf[1024];
934 static char summary_buf[1024];
935
936 ret->author = author_buf;
937 get_ac_line(commit->buffer, "\nauthor ",
938 sizeof(author_buf), author_buf, &ret->author_mail,
939 &ret->author_time, &ret->author_tz);
940
941 if (!detailed)
942 return;
943
944 ret->committer = committer_buf;
945 get_ac_line(commit->buffer, "\ncommitter ",
946 sizeof(committer_buf), committer_buf, &ret->committer_mail,
947 &ret->committer_time, &ret->committer_tz);
948
949 ret->summary = summary_buf;
950 tmp = strstr(commit->buffer, "\n\n");
951 if (!tmp) {
952 error_out:
953 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
954 return;
955 }
956 tmp += 2;
957 endp = strchr(tmp, '\n');
958 if (!endp)
959 goto error_out;
960 len = endp - tmp;
961 if (len >= sizeof(summary_buf))
962 goto error_out;
963 memcpy(summary_buf, tmp, len);
964 summary_buf[len] = 0;
965}
966
967#define OUTPUT_ANNOTATE_COMPAT 001
968#define OUTPUT_LONG_OBJECT_NAME 002
969#define OUTPUT_RAW_TIMESTAMP 004
970#define OUTPUT_PORCELAIN 010
971#define OUTPUT_SHOW_NAME 020
972#define OUTPUT_SHOW_NUMBER 040
973#define OUTPUT_SHOW_SCORE 0100
974
975static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
976{
977 int cnt;
978 const char *cp;
979 struct origin *suspect = ent->suspect;
980 char hex[41];
981
982 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
983 printf("%s%c%d %d %d\n",
984 hex,
985 ent->guilty ? ' ' : '*', // purely for debugging
986 ent->s_lno + 1,
987 ent->lno + 1,
988 ent->num_lines);
989 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
990 struct commit_info ci;
991 suspect->commit->object.flags |= METAINFO_SHOWN;
992 get_commit_info(suspect->commit, &ci, 1);
993 printf("author %s\n", ci.author);
994 printf("author-mail %s\n", ci.author_mail);
995 printf("author-time %lu\n", ci.author_time);
996 printf("author-tz %s\n", ci.author_tz);
997 printf("committer %s\n", ci.committer);
998 printf("committer-mail %s\n", ci.committer_mail);
999 printf("committer-time %lu\n", ci.committer_time);
1000 printf("committer-tz %s\n", ci.committer_tz);
1001 printf("filename %s\n", suspect->path);
1002 printf("summary %s\n", ci.summary);
1003 }
1004 else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1005 printf("filename %s\n", suspect->path);
1006
1007 cp = nth_line(sb, ent->lno);
1008 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1009 char ch;
1010 if (cnt)
1011 printf("%s %d %d\n", hex,
1012 ent->s_lno + 1 + cnt,
1013 ent->lno + 1 + cnt);
1014 putchar('\t');
1015 do {
1016 ch = *cp++;
1017 putchar(ch);
1018 } while (ch != '\n' &&
1019 cp < sb->final_buf + sb->final_buf_size);
1020 }
1021}
1022
1023static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1024{
1025 int cnt;
1026 const char *cp;
1027 struct origin *suspect = ent->suspect;
1028 struct commit_info ci;
1029 char hex[41];
1030 int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1031
1032 get_commit_info(suspect->commit, &ci, 1);
1033 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1034
1035 cp = nth_line(sb, ent->lno);
1036 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1037 char ch;
1038
1039 printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex);
1040 if (opt & OUTPUT_ANNOTATE_COMPAT)
1041 printf("\t(%10s\t%10s\t%d)", ci.author,
1042 format_time(ci.author_time, ci.author_tz,
1043 show_raw_time),
1044 ent->lno + 1 + cnt);
1045 else {
1046 if (opt & OUTPUT_SHOW_SCORE)
1047 printf(" %*d", max_score_digits, ent->score);
1048 if (opt & OUTPUT_SHOW_NAME)
1049 printf(" %-*.*s", longest_file, longest_file,
1050 suspect->path);
1051 if (opt & OUTPUT_SHOW_NUMBER)
1052 printf(" %*d", max_orig_digits,
1053 ent->s_lno + 1 + cnt);
1054 printf(" (%-*.*s %10s %*d) ",
1055 longest_author, longest_author, ci.author,
1056 format_time(ci.author_time, ci.author_tz,
1057 show_raw_time),
1058 max_digits, ent->lno + 1 + cnt);
1059 }
1060 do {
1061 ch = *cp++;
1062 putchar(ch);
1063 } while (ch != '\n' &&
1064 cp < sb->final_buf + sb->final_buf_size);
1065 }
1066}
1067
1068static void output(struct scoreboard *sb, int option)
1069{
1070 struct blame_entry *ent;
1071
1072 if (option & OUTPUT_PORCELAIN) {
1073 for (ent = sb->ent; ent; ent = ent->next) {
1074 struct blame_entry *oth;
1075 struct origin *suspect = ent->suspect;
1076 struct commit *commit = suspect->commit;
1077 if (commit->object.flags & MORE_THAN_ONE_PATH)
1078 continue;
1079 for (oth = ent->next; oth; oth = oth->next) {
1080 if ((oth->suspect->commit != commit) ||
1081 !strcmp(oth->suspect->path, suspect->path))
1082 continue;
1083 commit->object.flags |= MORE_THAN_ONE_PATH;
1084 break;
1085 }
1086 }
1087 }
1088
1089 for (ent = sb->ent; ent; ent = ent->next) {
1090 if (option & OUTPUT_PORCELAIN)
1091 emit_porcelain(sb, ent);
1092 else {
1093 emit_other(sb, ent, option);
1094 }
1095 }
1096}
1097
1098static int prepare_lines(struct scoreboard *sb)
1099{
1100 const char *buf = sb->final_buf;
1101 unsigned long len = sb->final_buf_size;
1102 int num = 0, incomplete = 0, bol = 1;
1103
1104 if (len && buf[len-1] != '\n')
1105 incomplete++; /* incomplete line at the end */
1106 while (len--) {
1107 if (bol) {
1108 sb->lineno = xrealloc(sb->lineno,
1109 sizeof(int* ) * (num + 1));
1110 sb->lineno[num] = buf - sb->final_buf;
1111 bol = 0;
1112 }
1113 if (*buf++ == '\n') {
1114 num++;
1115 bol = 1;
1116 }
1117 }
1118 sb->lineno = xrealloc(sb->lineno,
1119 sizeof(int* ) * (num + incomplete + 1));
1120 sb->lineno[num + incomplete] = buf - sb->final_buf;
1121 sb->num_lines = num + incomplete;
1122 return sb->num_lines;
1123}
1124
1125static int read_ancestry(const char *graft_file)
1126{
1127 FILE *fp = fopen(graft_file, "r");
1128 char buf[1024];
1129 if (!fp)
1130 return -1;
1131 while (fgets(buf, sizeof(buf), fp)) {
1132 /* The format is just "Commit Parent1 Parent2 ...\n" */
1133 int len = strlen(buf);
1134 struct commit_graft *graft = read_graft_line(buf, len);
1135 register_commit_graft(graft, 0);
1136 }
1137 fclose(fp);
1138 return 0;
1139}
1140
1141static int lineno_width(int lines)
1142{
1143 int i, width;
1144
1145 for (width = 1, i = 10; i <= lines + 1; width++)
1146 i *= 10;
1147 return width;
1148}
1149
1150static void find_alignment(struct scoreboard *sb, int *option)
1151{
1152 int longest_src_lines = 0;
1153 int longest_dst_lines = 0;
1154 unsigned largest_score = 0;
1155 struct blame_entry *e;
1156
1157 for (e = sb->ent; e; e = e->next) {
1158 struct origin *suspect = e->suspect;
1159 struct commit_info ci;
1160 int num;
1161
1162 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1163 suspect->commit->object.flags |= METAINFO_SHOWN;
1164 get_commit_info(suspect->commit, &ci, 1);
1165 if (strcmp(suspect->path, sb->path))
1166 *option |= OUTPUT_SHOW_NAME;
1167 num = strlen(suspect->path);
1168 if (longest_file < num)
1169 longest_file = num;
1170 num = strlen(ci.author);
1171 if (longest_author < num)
1172 longest_author = num;
1173 }
1174 num = e->s_lno + e->num_lines;
1175 if (longest_src_lines < num)
1176 longest_src_lines = num;
1177 num = e->lno + e->num_lines;
1178 if (longest_dst_lines < num)
1179 longest_dst_lines = num;
1180 if (largest_score < ent_score(sb, e))
1181 largest_score = ent_score(sb, e);
1182 }
1183 max_orig_digits = lineno_width(longest_src_lines);
1184 max_digits = lineno_width(longest_dst_lines);
1185 max_score_digits = lineno_width(largest_score);
1186}
1187
1188static int has_path_in_work_tree(const char *path)
1189{
1190 struct stat st;
1191 return !lstat(path, &st);
1192}
1193
1194static unsigned parse_score(const char *arg)
1195{
1196 char *end;
1197 unsigned long score = strtoul(arg, &end, 10);
1198 if (*end)
1199 return 0;
1200 return score;
1201}
1202
1203int cmd_pickaxe(int argc, const char **argv, const char *prefix)
1204{
1205 struct rev_info revs;
1206 const char *path;
1207 struct scoreboard sb;
1208 struct origin *o;
1209 struct blame_entry *ent;
1210 int i, seen_dashdash, unk, opt;
1211 long bottom, top, lno;
1212 int output_option = 0;
1213 const char *revs_file = NULL;
1214 const char *final_commit_name = NULL;
1215 char type[10];
1216
1217 opt = 0;
1218 bottom = top = 0;
1219 seen_dashdash = 0;
1220 for (unk = i = 1; i < argc; i++) {
1221 const char *arg = argv[i];
1222 if (*arg != '-')
1223 break;
1224 else if (!strcmp("-c", arg))
1225 output_option |= OUTPUT_ANNOTATE_COMPAT;
1226 else if (!strcmp("-t", arg))
1227 output_option |= OUTPUT_RAW_TIMESTAMP;
1228 else if (!strcmp("-l", arg))
1229 output_option |= OUTPUT_LONG_OBJECT_NAME;
1230 else if (!strcmp("-S", arg) && ++i < argc)
1231 revs_file = argv[i];
1232 else if (!strncmp("-M", arg, 2)) {
1233 opt |= PICKAXE_BLAME_MOVE;
1234 blame_move_score = parse_score(arg+2);
1235 }
1236 else if (!strncmp("-C", arg, 2)) {
1237 if (opt & PICKAXE_BLAME_COPY)
1238 opt |= PICKAXE_BLAME_COPY_HARDER;
1239 opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1240 blame_copy_score = parse_score(arg+2);
1241 }
1242 else if (!strcmp("-L", arg) && ++i < argc) {
1243 char *term;
1244 arg = argv[i];
1245 if (bottom || top)
1246 die("More than one '-L n,m' option given");
1247 bottom = strtol(arg, &term, 10);
1248 if (*term == ',') {
1249 top = strtol(term + 1, &term, 10);
1250 if (*term)
1251 usage(pickaxe_usage);
1252 }
1253 if (bottom && top && top < bottom) {
1254 unsigned long tmp;
1255 tmp = top; top = bottom; bottom = tmp;
1256 }
1257 }
1258 else if (!strcmp("--score-debug", arg))
1259 output_option |= OUTPUT_SHOW_SCORE;
1260 else if (!strcmp("-f", arg) ||
1261 !strcmp("--show-name", arg))
1262 output_option |= OUTPUT_SHOW_NAME;
1263 else if (!strcmp("-n", arg) ||
1264 !strcmp("--show-number", arg))
1265 output_option |= OUTPUT_SHOW_NUMBER;
1266 else if (!strcmp("-p", arg) ||
1267 !strcmp("--porcelain", arg))
1268 output_option |= OUTPUT_PORCELAIN;
1269 else if (!strcmp("--", arg)) {
1270 seen_dashdash = 1;
1271 i++;
1272 break;
1273 }
1274 else
1275 argv[unk++] = arg;
1276 }
1277
1278 if (!blame_move_score)
1279 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1280 if (!blame_copy_score)
1281 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1282
1283 /* We have collected options unknown to us in argv[1..unk]
1284 * which are to be passed to revision machinery if we are
1285 * going to do the "bottom" procesing.
1286 *
1287 * The remaining are:
1288 *
1289 * (1) if seen_dashdash, its either
1290 * "-options -- <path>" or
1291 * "-options -- <path> <rev>".
1292 * but the latter is allowed only if there is no
1293 * options that we passed to revision machinery.
1294 *
1295 * (2) otherwise, we may have "--" somewhere later and
1296 * might be looking at the first one of multiple 'rev'
1297 * parameters (e.g. " master ^next ^maint -- path").
1298 * See if there is a dashdash first, and give the
1299 * arguments before that to revision machinery.
1300 * After that there must be one 'path'.
1301 *
1302 * (3) otherwise, its one of the three:
1303 * "-options <path> <rev>"
1304 * "-options <rev> <path>"
1305 * "-options <path>"
1306 * but again the first one is allowed only if
1307 * there is no options that we passed to revision
1308 * machinery.
1309 */
1310
1311 if (seen_dashdash) {
1312 /* (1) */
1313 if (argc <= i)
1314 usage(pickaxe_usage);
1315 path = argv[i];
1316 if (i + 1 == argc - 1) {
1317 if (unk != 1)
1318 usage(pickaxe_usage);
1319 argv[unk++] = argv[i + 1];
1320 }
1321 else if (i + 1 != argc)
1322 /* garbage at end */
1323 usage(pickaxe_usage);
1324 }
1325 else {
1326 int j;
1327 for (j = i; !seen_dashdash && j < argc; j++)
1328 if (!strcmp(argv[j], "--"))
1329 seen_dashdash = j;
1330 if (seen_dashdash) {
1331 if (seen_dashdash + 1 != argc - 1)
1332 usage(pickaxe_usage);
1333 path = argv[seen_dashdash + 1];
1334 for (j = i; j < seen_dashdash; j++)
1335 argv[unk++] = argv[j];
1336 }
1337 else {
1338 /* (3) */
1339 path = argv[i];
1340 if (i + 1 == argc - 1) {
1341 final_commit_name = argv[i + 1];
1342
1343 /* if (unk == 1) we could be getting
1344 * old-style
1345 */
1346 if (unk == 1 && !has_path_in_work_tree(path)) {
1347 path = argv[i + 1];
1348 final_commit_name = argv[i];
1349 }
1350 }
1351 else if (i != argc - 1)
1352 usage(pickaxe_usage); /* garbage at end */
1353
1354 if (!has_path_in_work_tree(path))
1355 die("cannot stat path %s: %s",
1356 path, strerror(errno));
1357 }
1358 }
1359
1360 if (final_commit_name)
1361 argv[unk++] = final_commit_name;
1362
1363 /* Now we got rev and path. We do not want the path pruning
1364 * but we may want "bottom" processing.
1365 */
1366 argv[unk] = NULL;
1367
1368 init_revisions(&revs, NULL);
1369 setup_revisions(unk, argv, &revs, "HEAD");
1370 memset(&sb, 0, sizeof(sb));
1371
1372 /* There must be one and only one positive commit in the
1373 * revs->pending array.
1374 */
1375 for (i = 0; i < revs.pending.nr; i++) {
1376 struct object *obj = revs.pending.objects[i].item;
1377 if (obj->flags & UNINTERESTING)
1378 continue;
1379 while (obj->type == OBJ_TAG)
1380 obj = deref_tag(obj, NULL, 0);
1381 if (obj->type != OBJ_COMMIT)
1382 die("Non commit %s?",
1383 revs.pending.objects[i].name);
1384 if (sb.final)
1385 die("More than one commit to dig from %s and %s?",
1386 revs.pending.objects[i].name,
1387 final_commit_name);
1388 sb.final = (struct commit *) obj;
1389 final_commit_name = revs.pending.objects[i].name;
1390 }
1391
1392 if (!sb.final) {
1393 /* "--not A B -- path" without anything positive */
1394 unsigned char head_sha1[20];
1395
1396 final_commit_name = "HEAD";
1397 if (get_sha1(final_commit_name, head_sha1))
1398 die("No such ref: HEAD");
1399 sb.final = lookup_commit_reference(head_sha1);
1400 add_pending_object(&revs, &(sb.final->object), "HEAD");
1401 }
1402
1403 /* If we have bottom, this will mark the ancestors of the
1404 * bottom commits we would reach while traversing as
1405 * uninteresting.
1406 */
1407 prepare_revision_walk(&revs);
1408
1409 o = find_origin(&sb, sb.final, path);
1410 if (!o)
1411 die("no such path %s in %s", path, final_commit_name);
1412
1413 sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1414 lno = prepare_lines(&sb);
1415
1416 if (bottom < 1)
1417 bottom = 1;
1418 if (top < 1)
1419 top = lno;
1420 bottom--;
1421 if (lno < top)
1422 die("file %s has only %lu lines", path, lno);
1423
1424 ent = xcalloc(1, sizeof(*ent));
1425 ent->lno = bottom;
1426 ent->num_lines = top - bottom;
1427 ent->suspect = o;
1428 ent->s_lno = bottom;
1429
1430 sb.ent = ent;
1431 sb.path = path;
1432
1433 if (revs_file && read_ancestry(revs_file))
1434 die("reading graft file %s failed: %s",
1435 revs_file, strerror(errno));
1436
1437 assign_blame(&sb, &revs, opt);
1438
1439 coalesce(&sb);
1440
1441 if (!(output_option & OUTPUT_PORCELAIN))
1442 find_alignment(&sb, &output_option);
1443
1444 output(&sb, output_option);
1445 free((void *)sb.final_buf);
1446 for (ent = sb.ent; ent; ) {
1447 struct blame_entry *e = ent->next;
1448 free(ent);
1449 ent = e;
1450 }
1451 return 0;
1452}