e628b5a807600c7555e10e6f40f45cf11f1a6055
1/*
2 * Pickaxe
3 *
4 * Copyright (c) 2006, Junio C Hamano
5 */
6
7#include "cache.h"
8#include "builtin.h"
9#include "blob.h"
10#include "commit.h"
11#include "tag.h"
12#include "tree-walk.h"
13#include "diff.h"
14#include "diffcore.h"
15#include "revision.h"
16#include "xdiff-interface.h"
17
18#include <time.h>
19#include <sys/time.h>
20
21static char pickaxe_usage[] =
22"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
23" -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
24" -l, --long Show long commit SHA1 (Default: off)\n"
25" -t, --time Show raw timestamp (Default: off)\n"
26" -f, --show-name Show original filename (Default: auto)\n"
27" -n, --show-number Show original linenumber (Default: off)\n"
28" -p, --porcelain Show in a format designed for machine consumption\n"
29" -L n,m Process only line range n,m, counting from 1\n"
30" -M, -C Find line movements within and across files\n"
31" -S revs-file Use revisions from revs-file instead of calling git-rev-list\n";
32
33static int longest_file;
34static int longest_author;
35static int max_orig_digits;
36static int max_digits;
37static int max_score_digits;
38
39#define PICKAXE_BLAME_MOVE 01
40#define PICKAXE_BLAME_COPY 02
41#define PICKAXE_BLAME_COPY_HARDER 04
42
43/*
44 * blame for a blame_entry with score lower than these thresholds
45 * is not passed to the parent using move/copy logic.
46 */
47static unsigned blame_move_score;
48static unsigned blame_copy_score;
49#define BLAME_DEFAULT_MOVE_SCORE 20
50#define BLAME_DEFAULT_COPY_SCORE 40
51
52/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
53#define METAINFO_SHOWN (1u<<12)
54#define MORE_THAN_ONE_PATH (1u<<13)
55
56/*
57 * One blob in a commit
58 */
59struct origin {
60 struct commit *commit;
61 unsigned char blob_sha1[20];
62 char path[FLEX_ARRAY];
63};
64
65struct blame_entry {
66 struct blame_entry *prev;
67 struct blame_entry *next;
68
69 /* the first line of this group in the final image;
70 * internally all line numbers are 0 based.
71 */
72 int lno;
73
74 /* how many lines this group has */
75 int num_lines;
76
77 /* the commit that introduced this group into the final image */
78 struct origin *suspect;
79
80 /* true if the suspect is truly guilty; false while we have not
81 * checked if the group came from one of its parents.
82 */
83 char guilty;
84
85 /* the line number of the first line of this group in the
86 * suspect's file; internally all line numbers are 0 based.
87 */
88 int s_lno;
89
90 /* how significant this entry is -- cached to avoid
91 * scanning the lines over and over
92 */
93 unsigned score;
94};
95
96struct scoreboard {
97 /* the final commit (i.e. where we started digging from) */
98 struct commit *final;
99
100 const char *path;
101
102 /* the contents in the final; pointed into by buf pointers of
103 * blame_entries
104 */
105 const char *final_buf;
106 unsigned long final_buf_size;
107
108 /* linked list of blames */
109 struct blame_entry *ent;
110
111 /* look-up a line in the final buffer */
112 int num_lines;
113 int *lineno;
114};
115
116static void coalesce(struct scoreboard *sb)
117{
118 struct blame_entry *ent, *next;
119
120 for (ent = sb->ent; ent && (next = ent->next); ent = next) {
121 if (ent->suspect == next->suspect &&
122 ent->guilty == next->guilty &&
123 ent->s_lno + ent->num_lines == next->s_lno) {
124 ent->num_lines += next->num_lines;
125 ent->next = next->next;
126 if (ent->next)
127 ent->next->prev = ent;
128 free(next);
129 next = ent; /* again */
130 }
131 }
132}
133
134static void free_origin(struct origin *o)
135{
136 free(o);
137}
138
139static struct origin *find_origin(struct scoreboard *sb,
140 struct commit *commit,
141 const char *path)
142{
143 struct blame_entry *ent;
144 struct origin *o;
145 unsigned mode;
146 char type[10];
147
148 for (ent = sb->ent; ent; ent = ent->next) {
149 if (ent->suspect->commit == commit &&
150 !strcmp(ent->suspect->path, path))
151 return ent->suspect;
152 }
153
154 o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
155 o->commit = commit;
156 strcpy(o->path, path);
157 if (get_tree_entry(commit->object.sha1, path, o->blob_sha1, &mode))
158 goto err_out;
159 if (sha1_object_info(o->blob_sha1, type, NULL) ||
160 strcmp(type, blob_type))
161 goto err_out;
162 return o;
163 err_out:
164 free_origin(o);
165 return NULL;
166}
167
168static struct origin *find_rename(struct scoreboard *sb,
169 struct commit *parent,
170 struct origin *origin)
171{
172 struct origin *porigin = NULL;
173 struct diff_options diff_opts;
174 int i;
175 const char *paths[1];
176
177 diff_setup(&diff_opts);
178 diff_opts.recursive = 1;
179 diff_opts.detect_rename = DIFF_DETECT_RENAME;
180 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
181 paths[0] = NULL;
182 diff_tree_setup_paths(paths, &diff_opts);
183 if (diff_setup_done(&diff_opts) < 0)
184 die("diff-setup");
185 diff_tree_sha1(origin->commit->tree->object.sha1,
186 parent->tree->object.sha1,
187 "", &diff_opts);
188 diffcore_std(&diff_opts);
189
190 for (i = 0; i < diff_queued_diff.nr; i++) {
191 struct diff_filepair *p = diff_queued_diff.queue[i];
192 if ((p->status == 'R' || p->status == 'C') &&
193 !strcmp(p->one->path, origin->path)) {
194 porigin = find_origin(sb, parent, p->two->path);
195 break;
196 }
197 }
198 diff_flush(&diff_opts);
199 return porigin;
200}
201
202struct chunk {
203 /* line number in postimage; up to but not including this
204 * line is the same as preimage
205 */
206 int same;
207
208 /* preimage line number after this chunk */
209 int p_next;
210
211 /* postimage line number after this chunk */
212 int t_next;
213};
214
215struct patch {
216 struct chunk *chunks;
217 int num;
218};
219
220struct blame_diff_state {
221 struct xdiff_emit_state xm;
222 struct patch *ret;
223 unsigned hunk_post_context;
224 unsigned hunk_in_pre_context : 1;
225};
226
227static void process_u_diff(void *state_, char *line, unsigned long len)
228{
229 struct blame_diff_state *state = state_;
230 struct chunk *chunk;
231 int off1, off2, len1, len2, num;
232
233 num = state->ret->num;
234 if (len < 4 || line[0] != '@' || line[1] != '@') {
235 if (state->hunk_in_pre_context && line[0] == ' ')
236 state->ret->chunks[num - 1].same++;
237 else {
238 state->hunk_in_pre_context = 0;
239 if (line[0] == ' ')
240 state->hunk_post_context++;
241 else
242 state->hunk_post_context = 0;
243 }
244 return;
245 }
246
247 if (num && state->hunk_post_context) {
248 chunk = &state->ret->chunks[num - 1];
249 chunk->p_next -= state->hunk_post_context;
250 chunk->t_next -= state->hunk_post_context;
251 }
252 state->ret->num = ++num;
253 state->ret->chunks = xrealloc(state->ret->chunks,
254 sizeof(struct chunk) * num);
255 chunk = &state->ret->chunks[num - 1];
256 if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
257 state->ret->num--;
258 return;
259 }
260
261 /* Line numbers in patch output are one based. */
262 off1--;
263 off2--;
264
265 chunk->same = len2 ? off2 : (off2 + 1);
266
267 chunk->p_next = off1 + (len1 ? len1 : 1);
268 chunk->t_next = chunk->same + len2;
269 state->hunk_in_pre_context = 1;
270 state->hunk_post_context = 0;
271}
272
273static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
274 int context)
275{
276 struct blame_diff_state state;
277 xpparam_t xpp;
278 xdemitconf_t xecfg;
279 xdemitcb_t ecb;
280
281 xpp.flags = XDF_NEED_MINIMAL;
282 xecfg.ctxlen = context;
283 xecfg.flags = 0;
284 ecb.outf = xdiff_outf;
285 ecb.priv = &state;
286 memset(&state, 0, sizeof(state));
287 state.xm.consume = process_u_diff;
288 state.ret = xmalloc(sizeof(struct patch));
289 state.ret->chunks = NULL;
290 state.ret->num = 0;
291
292 xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
293
294 if (state.ret->num) {
295 struct chunk *chunk;
296 chunk = &state.ret->chunks[state.ret->num - 1];
297 chunk->p_next -= state.hunk_post_context;
298 chunk->t_next -= state.hunk_post_context;
299 }
300 return state.ret;
301}
302
303static struct patch *get_patch(struct origin *parent, struct origin *origin)
304{
305 mmfile_t file_p, file_o;
306 char type[10];
307 char *blob_p, *blob_o;
308 struct patch *patch;
309
310 blob_p = read_sha1_file(parent->blob_sha1, type,
311 (unsigned long *) &file_p.size);
312 blob_o = read_sha1_file(origin->blob_sha1, type,
313 (unsigned long *) &file_o.size);
314 file_p.ptr = blob_p;
315 file_o.ptr = blob_o;
316 if (!file_p.ptr || !file_o.ptr) {
317 free(blob_p);
318 free(blob_o);
319 return NULL;
320 }
321
322 patch = compare_buffer(&file_p, &file_o, 0);
323 free(blob_p);
324 free(blob_o);
325 return patch;
326}
327
328static void free_patch(struct patch *p)
329{
330 free(p->chunks);
331 free(p);
332}
333
334static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
335{
336 struct blame_entry *ent, *prev = NULL;
337
338 for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
339 prev = ent;
340
341 /* prev, if not NULL, is the last one that is below e */
342 e->prev = prev;
343 if (prev) {
344 e->next = prev->next;
345 prev->next = e;
346 }
347 else {
348 e->next = sb->ent;
349 sb->ent = e;
350 }
351 if (e->next)
352 e->next->prev = e;
353}
354
355static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
356{
357 struct blame_entry *p, *n;
358 p = dst->prev;
359 n = dst->next;
360 memcpy(dst, src, sizeof(*src));
361 dst->prev = p;
362 dst->next = n;
363 dst->score = 0;
364}
365
366static const char *nth_line(struct scoreboard *sb, int lno)
367{
368 return sb->final_buf + sb->lineno[lno];
369}
370
371static void split_overlap(struct blame_entry split[3],
372 struct blame_entry *e,
373 int tlno, int plno, int same,
374 struct origin *parent)
375{
376 /* it is known that lines between tlno to same came from
377 * parent, and e has an overlap with that range. it also is
378 * known that parent's line plno corresponds to e's line tlno.
379 *
380 * <---- e ----->
381 * <------>
382 * <------------>
383 * <------------>
384 * <------------------>
385 *
386 * Potentially we need to split e into three parts; before
387 * this chunk, the chunk to be blamed for parent, and after
388 * that portion.
389 */
390 int chunk_end_lno;
391 memset(split, 0, sizeof(struct blame_entry [3]));
392
393 if (e->s_lno < tlno) {
394 /* there is a pre-chunk part not blamed on parent */
395 split[0].suspect = e->suspect;
396 split[0].lno = e->lno;
397 split[0].s_lno = e->s_lno;
398 split[0].num_lines = tlno - e->s_lno;
399 split[1].lno = e->lno + tlno - e->s_lno;
400 split[1].s_lno = plno;
401 }
402 else {
403 split[1].lno = e->lno;
404 split[1].s_lno = plno + (e->s_lno - tlno);
405 }
406
407 if (same < e->s_lno + e->num_lines) {
408 /* there is a post-chunk part not blamed on parent */
409 split[2].suspect = e->suspect;
410 split[2].lno = e->lno + (same - e->s_lno);
411 split[2].s_lno = e->s_lno + (same - e->s_lno);
412 split[2].num_lines = e->s_lno + e->num_lines - same;
413 chunk_end_lno = split[2].lno;
414 }
415 else
416 chunk_end_lno = e->lno + e->num_lines;
417 split[1].num_lines = chunk_end_lno - split[1].lno;
418
419 if (split[1].num_lines < 1)
420 return;
421 split[1].suspect = parent;
422}
423
424static void split_blame(struct scoreboard *sb,
425 struct blame_entry split[3],
426 struct blame_entry *e)
427{
428 struct blame_entry *new_entry;
429
430 if (split[0].suspect && split[2].suspect) {
431 /* we need to split e into two and add another for parent */
432 dup_entry(e, &split[0]);
433
434 new_entry = xmalloc(sizeof(*new_entry));
435 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
436 add_blame_entry(sb, new_entry);
437
438 new_entry = xmalloc(sizeof(*new_entry));
439 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
440 add_blame_entry(sb, new_entry);
441 }
442 else if (!split[0].suspect && !split[2].suspect)
443 /* parent covers the entire area */
444 dup_entry(e, &split[1]);
445 else if (split[0].suspect) {
446 dup_entry(e, &split[0]);
447
448 new_entry = xmalloc(sizeof(*new_entry));
449 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
450 add_blame_entry(sb, new_entry);
451 }
452 else {
453 dup_entry(e, &split[1]);
454
455 new_entry = xmalloc(sizeof(*new_entry));
456 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
457 add_blame_entry(sb, new_entry);
458 }
459
460 if (1) { /* sanity */
461 struct blame_entry *ent;
462 int lno = sb->ent->lno, corrupt = 0;
463
464 for (ent = sb->ent; ent; ent = ent->next) {
465 if (lno != ent->lno)
466 corrupt = 1;
467 if (ent->s_lno < 0)
468 corrupt = 1;
469 lno += ent->num_lines;
470 }
471 if (corrupt) {
472 lno = sb->ent->lno;
473 for (ent = sb->ent; ent; ent = ent->next) {
474 printf("L %8d l %8d n %8d\n",
475 lno, ent->lno, ent->num_lines);
476 lno = ent->lno + ent->num_lines;
477 }
478 die("oops");
479 }
480 }
481}
482
483static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
484 int tlno, int plno, int same,
485 struct origin *parent)
486{
487 struct blame_entry split[3];
488
489 split_overlap(split, e, tlno, plno, same, parent);
490 if (!split[1].suspect)
491 return;
492 split_blame(sb, split, e);
493}
494
495static int find_last_in_target(struct scoreboard *sb, struct origin *target)
496{
497 struct blame_entry *e;
498 int last_in_target = -1;
499
500 for (e = sb->ent; e; e = e->next) {
501 if (e->guilty || e->suspect != target)
502 continue;
503 if (last_in_target < e->s_lno + e->num_lines)
504 last_in_target = e->s_lno + e->num_lines;
505 }
506 return last_in_target;
507}
508
509static void blame_chunk(struct scoreboard *sb,
510 int tlno, int plno, int same,
511 struct origin *target, struct origin *parent)
512{
513 struct blame_entry *e;
514
515 for (e = sb->ent; e; e = e->next) {
516 if (e->guilty || e->suspect != target)
517 continue;
518 if (same <= e->s_lno)
519 continue;
520 if (tlno < e->s_lno + e->num_lines)
521 blame_overlap(sb, e, tlno, plno, same, parent);
522 }
523}
524
525static int pass_blame_to_parent(struct scoreboard *sb,
526 struct origin *target,
527 struct origin *parent)
528{
529 int i, last_in_target, plno, tlno;
530 struct patch *patch;
531
532 last_in_target = find_last_in_target(sb, target);
533 if (last_in_target < 0)
534 return 1; /* nothing remains for this target */
535
536 patch = get_patch(parent, target);
537 plno = tlno = 0;
538 for (i = 0; i < patch->num; i++) {
539 struct chunk *chunk = &patch->chunks[i];
540
541 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
542 plno = chunk->p_next;
543 tlno = chunk->t_next;
544 }
545 /* rest (i.e. anything above tlno) are the same as parent */
546 blame_chunk(sb, tlno, plno, last_in_target, target, parent);
547
548 free_patch(patch);
549 return 0;
550}
551
552static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
553{
554 unsigned score;
555 const char *cp, *ep;
556
557 if (e->score)
558 return e->score;
559
560 score = 1;
561 cp = nth_line(sb, e->lno);
562 ep = nth_line(sb, e->lno + e->num_lines);
563 while (cp < ep) {
564 unsigned ch = *((unsigned char *)cp);
565 if (isalnum(ch))
566 score++;
567 cp++;
568 }
569 e->score = score;
570 return score;
571}
572
573static void copy_split_if_better(struct scoreboard *sb,
574 struct blame_entry best_so_far[3],
575 struct blame_entry this[3])
576{
577 if (!this[1].suspect)
578 return;
579 if (best_so_far[1].suspect) {
580 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
581 return;
582 }
583 memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
584}
585
586static void find_copy_in_blob(struct scoreboard *sb,
587 struct blame_entry *ent,
588 struct origin *parent,
589 struct blame_entry split[3],
590 mmfile_t *file_p)
591{
592 const char *cp;
593 int cnt;
594 mmfile_t file_o;
595 struct patch *patch;
596 int i, plno, tlno;
597
598 cp = nth_line(sb, ent->lno);
599 file_o.ptr = (char*) cp;
600 cnt = ent->num_lines;
601
602 while (cnt && cp < sb->final_buf + sb->final_buf_size) {
603 if (*cp++ == '\n')
604 cnt--;
605 }
606 file_o.size = cp - file_o.ptr;
607
608 patch = compare_buffer(file_p, &file_o, 1);
609
610 memset(split, 0, sizeof(struct blame_entry [3]));
611 plno = tlno = 0;
612 for (i = 0; i < patch->num; i++) {
613 struct chunk *chunk = &patch->chunks[i];
614
615 /* tlno to chunk->same are the same as ent */
616 if (ent->num_lines <= tlno)
617 break;
618 if (tlno < chunk->same) {
619 struct blame_entry this[3];
620 split_overlap(this, ent,
621 tlno + ent->s_lno, plno,
622 chunk->same + ent->s_lno,
623 parent);
624 copy_split_if_better(sb, split, this);
625 }
626 plno = chunk->p_next;
627 tlno = chunk->t_next;
628 }
629 free_patch(patch);
630}
631
632static int find_move_in_parent(struct scoreboard *sb,
633 struct origin *target,
634 struct origin *parent)
635{
636 int last_in_target;
637 struct blame_entry *ent, split[3];
638 mmfile_t file_p;
639 char type[10];
640 char *blob_p;
641
642 last_in_target = find_last_in_target(sb, target);
643 if (last_in_target < 0)
644 return 1; /* nothing remains for this target */
645
646 blob_p = read_sha1_file(parent->blob_sha1, type,
647 (unsigned long *) &file_p.size);
648 file_p.ptr = blob_p;
649 if (!file_p.ptr) {
650 free(blob_p);
651 return 0;
652 }
653
654 for (ent = sb->ent; ent; ent = ent->next) {
655 if (ent->suspect != target || ent->guilty)
656 continue;
657 find_copy_in_blob(sb, ent, parent, split, &file_p);
658 if (split[1].suspect &&
659 blame_move_score < ent_score(sb, &split[1]))
660 split_blame(sb, split, ent);
661 }
662 free(blob_p);
663 return 0;
664}
665
666static int find_copy_in_parent(struct scoreboard *sb,
667 struct origin *target,
668 struct commit *parent,
669 struct origin *porigin,
670 int opt)
671{
672 struct diff_options diff_opts;
673 const char *paths[1];
674 struct blame_entry *ent;
675 int i;
676
677 if (find_last_in_target(sb, target) < 0)
678 return 1; /* nothing remains for this target */
679
680 diff_setup(&diff_opts);
681 diff_opts.recursive = 1;
682 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
683
684 /* Try "find copies harder" on new path */
685 if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
686 (!porigin || strcmp(target->path, porigin->path))) {
687 diff_opts.detect_rename = DIFF_DETECT_COPY;
688 diff_opts.find_copies_harder = 1;
689 }
690 paths[0] = NULL;
691 diff_tree_setup_paths(paths, &diff_opts);
692 if (diff_setup_done(&diff_opts) < 0)
693 die("diff-setup");
694 diff_tree_sha1(parent->tree->object.sha1,
695 target->commit->tree->object.sha1,
696 "", &diff_opts);
697 diffcore_std(&diff_opts);
698
699 for (ent = sb->ent; ent; ent = ent->next) {
700 struct blame_entry split[3];
701 if (ent->suspect != target || ent->guilty)
702 continue;
703
704 memset(split, 0, sizeof(split));
705 for (i = 0; i < diff_queued_diff.nr; i++) {
706 struct diff_filepair *p = diff_queued_diff.queue[i];
707 struct origin *norigin;
708 mmfile_t file_p;
709 char type[10];
710 char *blob;
711 struct blame_entry this[3];
712
713 if (!DIFF_FILE_VALID(p->one))
714 continue; /* does not exist in parent */
715 if (porigin && !strcmp(p->one->path, porigin->path))
716 /* find_move already dealt with this path */
717 continue;
718 norigin = find_origin(sb, parent, p->one->path);
719
720 blob = read_sha1_file(norigin->blob_sha1, type,
721 (unsigned long *) &file_p.size);
722 file_p.ptr = blob;
723 if (!file_p.ptr) {
724 free(blob);
725 continue;
726 }
727 find_copy_in_blob(sb, ent, norigin, this, &file_p);
728 copy_split_if_better(sb, split, this);
729 }
730 if (split[1].suspect &&
731 blame_copy_score < ent_score(sb, &split[1]))
732 split_blame(sb, split, ent);
733 }
734 diff_flush(&diff_opts);
735
736 return 0;
737}
738
739#define MAXPARENT 16
740
741static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
742{
743 int i;
744 struct commit *commit = origin->commit;
745 struct commit_list *parent;
746 struct origin *parent_origin[MAXPARENT], *porigin;
747
748 memset(parent_origin, 0, sizeof(parent_origin));
749 for (i = 0, parent = commit->parents;
750 i < MAXPARENT && parent;
751 parent = parent->next, i++) {
752 struct commit *p = parent->item;
753
754 if (parse_commit(p))
755 continue;
756 porigin = find_origin(sb, parent->item, origin->path);
757 if (!porigin)
758 porigin = find_rename(sb, parent->item, origin);
759 if (!porigin)
760 continue;
761 if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
762 struct blame_entry *e;
763 for (e = sb->ent; e; e = e->next)
764 if (e->suspect == origin)
765 e->suspect = porigin;
766 /* now everything blamed for origin is blamed for
767 * porigin, we do not need to keep it anymore.
768 * Do not free porigin (or the ones we got from
769 * earlier round); they may still be used elsewhere.
770 */
771 free_origin(origin);
772 return;
773 }
774 parent_origin[i] = porigin;
775 }
776
777 for (i = 0, parent = commit->parents;
778 i < MAXPARENT && parent;
779 parent = parent->next, i++) {
780 struct origin *porigin = parent_origin[i];
781 if (!porigin)
782 continue;
783 if (pass_blame_to_parent(sb, origin, porigin))
784 return;
785 }
786
787 /*
788 * Optionally run "miff" to find moves in parents' files here.
789 */
790 if (opt & PICKAXE_BLAME_MOVE)
791 for (i = 0, parent = commit->parents;
792 i < MAXPARENT && parent;
793 parent = parent->next, i++) {
794 struct origin *porigin = parent_origin[i];
795 if (!porigin)
796 continue;
797 if (find_move_in_parent(sb, origin, porigin))
798 return;
799 }
800
801 /*
802 * Optionally run "ciff" to find copies from parents' files here.
803 */
804 if (opt & PICKAXE_BLAME_COPY)
805 for (i = 0, parent = commit->parents;
806 i < MAXPARENT && parent;
807 parent = parent->next, i++) {
808 struct origin *porigin = parent_origin[i];
809 if (find_copy_in_parent(sb, origin, parent->item,
810 porigin, opt))
811 return;
812 }
813}
814
815static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
816{
817 while (1) {
818 struct blame_entry *ent;
819 struct commit *commit;
820 struct origin *suspect = NULL;
821
822 /* find one suspect to break down */
823 for (ent = sb->ent; !suspect && ent; ent = ent->next)
824 if (!ent->guilty)
825 suspect = ent->suspect;
826 if (!suspect)
827 return; /* all done */
828
829 commit = suspect->commit;
830 parse_commit(commit);
831 if (!(commit->object.flags & UNINTERESTING) &&
832 !(revs->max_age != -1 && commit->date < revs->max_age))
833 pass_blame(sb, suspect, opt);
834
835 /* Take responsibility for the remaining entries */
836 for (ent = sb->ent; ent; ent = ent->next)
837 if (ent->suspect == suspect)
838 ent->guilty = 1;
839 }
840}
841
842static const char *format_time(unsigned long time, const char *tz_str,
843 int show_raw_time)
844{
845 static char time_buf[128];
846 time_t t = time;
847 int minutes, tz;
848 struct tm *tm;
849
850 if (show_raw_time) {
851 sprintf(time_buf, "%lu %s", time, tz_str);
852 return time_buf;
853 }
854
855 tz = atoi(tz_str);
856 minutes = tz < 0 ? -tz : tz;
857 minutes = (minutes / 100)*60 + (minutes % 100);
858 minutes = tz < 0 ? -minutes : minutes;
859 t = time + minutes * 60;
860 tm = gmtime(&t);
861
862 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
863 strcat(time_buf, tz_str);
864 return time_buf;
865}
866
867struct commit_info
868{
869 char *author;
870 char *author_mail;
871 unsigned long author_time;
872 char *author_tz;
873
874 /* filled only when asked for details */
875 char *committer;
876 char *committer_mail;
877 unsigned long committer_time;
878 char *committer_tz;
879
880 char *summary;
881};
882
883static void get_ac_line(const char *inbuf, const char *what,
884 int bufsz, char *person, char **mail,
885 unsigned long *time, char **tz)
886{
887 int len;
888 char *tmp, *endp;
889
890 tmp = strstr(inbuf, what);
891 if (!tmp)
892 goto error_out;
893 tmp += strlen(what);
894 endp = strchr(tmp, '\n');
895 if (!endp)
896 len = strlen(tmp);
897 else
898 len = endp - tmp;
899 if (bufsz <= len) {
900 error_out:
901 /* Ugh */
902 person = *mail = *tz = "(unknown)";
903 *time = 0;
904 return;
905 }
906 memcpy(person, tmp, len);
907
908 tmp = person;
909 tmp += len;
910 *tmp = 0;
911 while (*tmp != ' ')
912 tmp--;
913 *tz = tmp+1;
914
915 *tmp = 0;
916 while (*tmp != ' ')
917 tmp--;
918 *time = strtoul(tmp, NULL, 10);
919
920 *tmp = 0;
921 while (*tmp != ' ')
922 tmp--;
923 *mail = tmp + 1;
924 *tmp = 0;
925}
926
927static void get_commit_info(struct commit *commit,
928 struct commit_info *ret,
929 int detailed)
930{
931 int len;
932 char *tmp, *endp;
933 static char author_buf[1024];
934 static char committer_buf[1024];
935 static char summary_buf[1024];
936
937 /* We've operated without save_commit_buffer, so
938 * we now need to populate them for output.
939 */
940 if (!commit->buffer) {
941 char type[20];
942 unsigned long size;
943 commit->buffer =
944 read_sha1_file(commit->object.sha1, type, &size);
945 }
946 ret->author = author_buf;
947 get_ac_line(commit->buffer, "\nauthor ",
948 sizeof(author_buf), author_buf, &ret->author_mail,
949 &ret->author_time, &ret->author_tz);
950
951 if (!detailed)
952 return;
953
954 ret->committer = committer_buf;
955 get_ac_line(commit->buffer, "\ncommitter ",
956 sizeof(committer_buf), committer_buf, &ret->committer_mail,
957 &ret->committer_time, &ret->committer_tz);
958
959 ret->summary = summary_buf;
960 tmp = strstr(commit->buffer, "\n\n");
961 if (!tmp) {
962 error_out:
963 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
964 return;
965 }
966 tmp += 2;
967 endp = strchr(tmp, '\n');
968 if (!endp)
969 goto error_out;
970 len = endp - tmp;
971 if (len >= sizeof(summary_buf))
972 goto error_out;
973 memcpy(summary_buf, tmp, len);
974 summary_buf[len] = 0;
975}
976
977#define OUTPUT_ANNOTATE_COMPAT 001
978#define OUTPUT_LONG_OBJECT_NAME 002
979#define OUTPUT_RAW_TIMESTAMP 004
980#define OUTPUT_PORCELAIN 010
981#define OUTPUT_SHOW_NAME 020
982#define OUTPUT_SHOW_NUMBER 040
983#define OUTPUT_SHOW_SCORE 0100
984
985static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
986{
987 int cnt;
988 const char *cp;
989 struct origin *suspect = ent->suspect;
990 char hex[41];
991
992 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
993 printf("%s%c%d %d %d\n",
994 hex,
995 ent->guilty ? ' ' : '*', // purely for debugging
996 ent->s_lno + 1,
997 ent->lno + 1,
998 ent->num_lines);
999 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1000 struct commit_info ci;
1001 suspect->commit->object.flags |= METAINFO_SHOWN;
1002 get_commit_info(suspect->commit, &ci, 1);
1003 printf("author %s\n", ci.author);
1004 printf("author-mail %s\n", ci.author_mail);
1005 printf("author-time %lu\n", ci.author_time);
1006 printf("author-tz %s\n", ci.author_tz);
1007 printf("committer %s\n", ci.committer);
1008 printf("committer-mail %s\n", ci.committer_mail);
1009 printf("committer-time %lu\n", ci.committer_time);
1010 printf("committer-tz %s\n", ci.committer_tz);
1011 printf("filename %s\n", suspect->path);
1012 printf("summary %s\n", ci.summary);
1013 }
1014 else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1015 printf("filename %s\n", suspect->path);
1016
1017 cp = nth_line(sb, ent->lno);
1018 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1019 char ch;
1020 if (cnt)
1021 printf("%s %d %d\n", hex,
1022 ent->s_lno + 1 + cnt,
1023 ent->lno + 1 + cnt);
1024 putchar('\t');
1025 do {
1026 ch = *cp++;
1027 putchar(ch);
1028 } while (ch != '\n' &&
1029 cp < sb->final_buf + sb->final_buf_size);
1030 }
1031}
1032
1033static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1034{
1035 int cnt;
1036 const char *cp;
1037 struct origin *suspect = ent->suspect;
1038 struct commit_info ci;
1039 char hex[41];
1040 int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1041
1042 get_commit_info(suspect->commit, &ci, 1);
1043 strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1044
1045 cp = nth_line(sb, ent->lno);
1046 for (cnt = 0; cnt < ent->num_lines; cnt++) {
1047 char ch;
1048
1049 printf("%.*s", (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8, hex);
1050 if (opt & OUTPUT_ANNOTATE_COMPAT)
1051 printf("\t(%10s\t%10s\t%d)", ci.author,
1052 format_time(ci.author_time, ci.author_tz,
1053 show_raw_time),
1054 ent->lno + 1 + cnt);
1055 else {
1056 if (opt & OUTPUT_SHOW_SCORE)
1057 printf(" %*d", max_score_digits, ent->score);
1058 if (opt & OUTPUT_SHOW_NAME)
1059 printf(" %-*.*s", longest_file, longest_file,
1060 suspect->path);
1061 if (opt & OUTPUT_SHOW_NUMBER)
1062 printf(" %*d", max_orig_digits,
1063 ent->s_lno + 1 + cnt);
1064 printf(" (%-*.*s %10s %*d) ",
1065 longest_author, longest_author, ci.author,
1066 format_time(ci.author_time, ci.author_tz,
1067 show_raw_time),
1068 max_digits, ent->lno + 1 + cnt);
1069 }
1070 do {
1071 ch = *cp++;
1072 putchar(ch);
1073 } while (ch != '\n' &&
1074 cp < sb->final_buf + sb->final_buf_size);
1075 }
1076}
1077
1078static void output(struct scoreboard *sb, int option)
1079{
1080 struct blame_entry *ent;
1081
1082 if (option & OUTPUT_PORCELAIN) {
1083 for (ent = sb->ent; ent; ent = ent->next) {
1084 struct blame_entry *oth;
1085 struct origin *suspect = ent->suspect;
1086 struct commit *commit = suspect->commit;
1087 if (commit->object.flags & MORE_THAN_ONE_PATH)
1088 continue;
1089 for (oth = ent->next; oth; oth = oth->next) {
1090 if ((oth->suspect->commit != commit) ||
1091 !strcmp(oth->suspect->path, suspect->path))
1092 continue;
1093 commit->object.flags |= MORE_THAN_ONE_PATH;
1094 break;
1095 }
1096 }
1097 }
1098
1099 for (ent = sb->ent; ent; ent = ent->next) {
1100 if (option & OUTPUT_PORCELAIN)
1101 emit_porcelain(sb, ent);
1102 else {
1103 emit_other(sb, ent, option);
1104 }
1105 }
1106}
1107
1108static int prepare_lines(struct scoreboard *sb)
1109{
1110 const char *buf = sb->final_buf;
1111 unsigned long len = sb->final_buf_size;
1112 int num = 0, incomplete = 0, bol = 1;
1113
1114 if (len && buf[len-1] != '\n')
1115 incomplete++; /* incomplete line at the end */
1116 while (len--) {
1117 if (bol) {
1118 sb->lineno = xrealloc(sb->lineno,
1119 sizeof(int* ) * (num + 1));
1120 sb->lineno[num] = buf - sb->final_buf;
1121 bol = 0;
1122 }
1123 if (*buf++ == '\n') {
1124 num++;
1125 bol = 1;
1126 }
1127 }
1128 sb->lineno = xrealloc(sb->lineno,
1129 sizeof(int* ) * (num + incomplete + 1));
1130 sb->lineno[num + incomplete] = buf - sb->final_buf;
1131 sb->num_lines = num + incomplete;
1132 return sb->num_lines;
1133}
1134
1135static int read_ancestry(const char *graft_file)
1136{
1137 FILE *fp = fopen(graft_file, "r");
1138 char buf[1024];
1139 if (!fp)
1140 return -1;
1141 while (fgets(buf, sizeof(buf), fp)) {
1142 /* The format is just "Commit Parent1 Parent2 ...\n" */
1143 int len = strlen(buf);
1144 struct commit_graft *graft = read_graft_line(buf, len);
1145 register_commit_graft(graft, 0);
1146 }
1147 fclose(fp);
1148 return 0;
1149}
1150
1151static int lineno_width(int lines)
1152{
1153 int i, width;
1154
1155 for (width = 1, i = 10; i <= lines + 1; width++)
1156 i *= 10;
1157 return width;
1158}
1159
1160static void find_alignment(struct scoreboard *sb, int *option)
1161{
1162 int longest_src_lines = 0;
1163 int longest_dst_lines = 0;
1164 unsigned largest_score = 0;
1165 struct blame_entry *e;
1166
1167 for (e = sb->ent; e; e = e->next) {
1168 struct origin *suspect = e->suspect;
1169 struct commit_info ci;
1170 int num;
1171
1172 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1173 suspect->commit->object.flags |= METAINFO_SHOWN;
1174 get_commit_info(suspect->commit, &ci, 1);
1175 if (strcmp(suspect->path, sb->path))
1176 *option |= OUTPUT_SHOW_NAME;
1177 num = strlen(suspect->path);
1178 if (longest_file < num)
1179 longest_file = num;
1180 num = strlen(ci.author);
1181 if (longest_author < num)
1182 longest_author = num;
1183 }
1184 num = e->s_lno + e->num_lines;
1185 if (longest_src_lines < num)
1186 longest_src_lines = num;
1187 num = e->lno + e->num_lines;
1188 if (longest_dst_lines < num)
1189 longest_dst_lines = num;
1190 if (largest_score < ent_score(sb, e))
1191 largest_score = ent_score(sb, e);
1192 }
1193 max_orig_digits = lineno_width(longest_src_lines);
1194 max_digits = lineno_width(longest_dst_lines);
1195 max_score_digits = lineno_width(largest_score);
1196}
1197
1198static int has_path_in_work_tree(const char *path)
1199{
1200 struct stat st;
1201 return !lstat(path, &st);
1202}
1203
1204static unsigned parse_score(const char *arg)
1205{
1206 char *end;
1207 unsigned long score = strtoul(arg, &end, 10);
1208 if (*end)
1209 return 0;
1210 return score;
1211}
1212
1213int cmd_pickaxe(int argc, const char **argv, const char *prefix)
1214{
1215 struct rev_info revs;
1216 const char *path;
1217 struct scoreboard sb;
1218 struct origin *o;
1219 struct blame_entry *ent;
1220 int i, seen_dashdash, unk, opt;
1221 long bottom, top, lno;
1222 int output_option = 0;
1223 const char *revs_file = NULL;
1224 const char *final_commit_name = NULL;
1225 char type[10];
1226
1227 save_commit_buffer = 0;
1228
1229 opt = 0;
1230 bottom = top = 0;
1231 seen_dashdash = 0;
1232 for (unk = i = 1; i < argc; i++) {
1233 const char *arg = argv[i];
1234 if (*arg != '-')
1235 break;
1236 else if (!strcmp("-c", arg))
1237 output_option |= OUTPUT_ANNOTATE_COMPAT;
1238 else if (!strcmp("-t", arg))
1239 output_option |= OUTPUT_RAW_TIMESTAMP;
1240 else if (!strcmp("-l", arg))
1241 output_option |= OUTPUT_LONG_OBJECT_NAME;
1242 else if (!strcmp("-S", arg) && ++i < argc)
1243 revs_file = argv[i];
1244 else if (!strncmp("-M", arg, 2)) {
1245 opt |= PICKAXE_BLAME_MOVE;
1246 blame_move_score = parse_score(arg+2);
1247 }
1248 else if (!strncmp("-C", arg, 2)) {
1249 if (opt & PICKAXE_BLAME_COPY)
1250 opt |= PICKAXE_BLAME_COPY_HARDER;
1251 opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
1252 blame_copy_score = parse_score(arg+2);
1253 }
1254 else if (!strcmp("-L", arg) && ++i < argc) {
1255 char *term;
1256 arg = argv[i];
1257 if (bottom || top)
1258 die("More than one '-L n,m' option given");
1259 bottom = strtol(arg, &term, 10);
1260 if (*term == ',') {
1261 top = strtol(term + 1, &term, 10);
1262 if (*term)
1263 usage(pickaxe_usage);
1264 }
1265 if (bottom && top && top < bottom) {
1266 unsigned long tmp;
1267 tmp = top; top = bottom; bottom = tmp;
1268 }
1269 }
1270 else if (!strcmp("--score-debug", arg))
1271 output_option |= OUTPUT_SHOW_SCORE;
1272 else if (!strcmp("-f", arg) ||
1273 !strcmp("--show-name", arg))
1274 output_option |= OUTPUT_SHOW_NAME;
1275 else if (!strcmp("-n", arg) ||
1276 !strcmp("--show-number", arg))
1277 output_option |= OUTPUT_SHOW_NUMBER;
1278 else if (!strcmp("-p", arg) ||
1279 !strcmp("--porcelain", arg))
1280 output_option |= OUTPUT_PORCELAIN;
1281 else if (!strcmp("--", arg)) {
1282 seen_dashdash = 1;
1283 i++;
1284 break;
1285 }
1286 else
1287 argv[unk++] = arg;
1288 }
1289
1290 if (!blame_move_score)
1291 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
1292 if (!blame_copy_score)
1293 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
1294
1295 /* We have collected options unknown to us in argv[1..unk]
1296 * which are to be passed to revision machinery if we are
1297 * going to do the "bottom" procesing.
1298 *
1299 * The remaining are:
1300 *
1301 * (1) if seen_dashdash, its either
1302 * "-options -- <path>" or
1303 * "-options -- <path> <rev>".
1304 * but the latter is allowed only if there is no
1305 * options that we passed to revision machinery.
1306 *
1307 * (2) otherwise, we may have "--" somewhere later and
1308 * might be looking at the first one of multiple 'rev'
1309 * parameters (e.g. " master ^next ^maint -- path").
1310 * See if there is a dashdash first, and give the
1311 * arguments before that to revision machinery.
1312 * After that there must be one 'path'.
1313 *
1314 * (3) otherwise, its one of the three:
1315 * "-options <path> <rev>"
1316 * "-options <rev> <path>"
1317 * "-options <path>"
1318 * but again the first one is allowed only if
1319 * there is no options that we passed to revision
1320 * machinery.
1321 */
1322
1323 if (seen_dashdash) {
1324 /* (1) */
1325 if (argc <= i)
1326 usage(pickaxe_usage);
1327 path = argv[i];
1328 if (i + 1 == argc - 1) {
1329 if (unk != 1)
1330 usage(pickaxe_usage);
1331 argv[unk++] = argv[i + 1];
1332 }
1333 else if (i + 1 != argc)
1334 /* garbage at end */
1335 usage(pickaxe_usage);
1336 }
1337 else {
1338 int j;
1339 for (j = i; !seen_dashdash && j < argc; j++)
1340 if (!strcmp(argv[j], "--"))
1341 seen_dashdash = j;
1342 if (seen_dashdash) {
1343 if (seen_dashdash + 1 != argc - 1)
1344 usage(pickaxe_usage);
1345 path = argv[seen_dashdash + 1];
1346 for (j = i; j < seen_dashdash; j++)
1347 argv[unk++] = argv[j];
1348 }
1349 else {
1350 /* (3) */
1351 path = argv[i];
1352 if (i + 1 == argc - 1) {
1353 final_commit_name = argv[i + 1];
1354
1355 /* if (unk == 1) we could be getting
1356 * old-style
1357 */
1358 if (unk == 1 && !has_path_in_work_tree(path)) {
1359 path = argv[i + 1];
1360 final_commit_name = argv[i];
1361 }
1362 }
1363 else if (i != argc - 1)
1364 usage(pickaxe_usage); /* garbage at end */
1365
1366 if (!has_path_in_work_tree(path))
1367 die("cannot stat path %s: %s",
1368 path, strerror(errno));
1369 }
1370 }
1371
1372 if (final_commit_name)
1373 argv[unk++] = final_commit_name;
1374
1375 /* Now we got rev and path. We do not want the path pruning
1376 * but we may want "bottom" processing.
1377 */
1378 argv[unk] = NULL;
1379
1380 init_revisions(&revs, NULL);
1381 setup_revisions(unk, argv, &revs, "HEAD");
1382 memset(&sb, 0, sizeof(sb));
1383
1384 /* There must be one and only one positive commit in the
1385 * revs->pending array.
1386 */
1387 for (i = 0; i < revs.pending.nr; i++) {
1388 struct object *obj = revs.pending.objects[i].item;
1389 if (obj->flags & UNINTERESTING)
1390 continue;
1391 while (obj->type == OBJ_TAG)
1392 obj = deref_tag(obj, NULL, 0);
1393 if (obj->type != OBJ_COMMIT)
1394 die("Non commit %s?",
1395 revs.pending.objects[i].name);
1396 if (sb.final)
1397 die("More than one commit to dig from %s and %s?",
1398 revs.pending.objects[i].name,
1399 final_commit_name);
1400 sb.final = (struct commit *) obj;
1401 final_commit_name = revs.pending.objects[i].name;
1402 }
1403
1404 if (!sb.final) {
1405 /* "--not A B -- path" without anything positive */
1406 unsigned char head_sha1[20];
1407
1408 final_commit_name = "HEAD";
1409 if (get_sha1(final_commit_name, head_sha1))
1410 die("No such ref: HEAD");
1411 sb.final = lookup_commit_reference(head_sha1);
1412 add_pending_object(&revs, &(sb.final->object), "HEAD");
1413 }
1414
1415 /* If we have bottom, this will mark the ancestors of the
1416 * bottom commits we would reach while traversing as
1417 * uninteresting.
1418 */
1419 prepare_revision_walk(&revs);
1420
1421 o = find_origin(&sb, sb.final, path);
1422 if (!o)
1423 die("no such path %s in %s", path, final_commit_name);
1424
1425 sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);
1426 lno = prepare_lines(&sb);
1427
1428 if (bottom < 1)
1429 bottom = 1;
1430 if (top < 1)
1431 top = lno;
1432 bottom--;
1433 if (lno < top)
1434 die("file %s has only %lu lines", path, lno);
1435
1436 ent = xcalloc(1, sizeof(*ent));
1437 ent->lno = bottom;
1438 ent->num_lines = top - bottom;
1439 ent->suspect = o;
1440 ent->s_lno = bottom;
1441
1442 sb.ent = ent;
1443 sb.path = path;
1444
1445 if (revs_file && read_ancestry(revs_file))
1446 die("reading graft file %s failed: %s",
1447 revs_file, strerror(errno));
1448
1449 assign_blame(&sb, &revs, opt);
1450
1451 coalesce(&sb);
1452
1453 if (!(output_option & OUTPUT_PORCELAIN))
1454 find_alignment(&sb, &output_option);
1455
1456 output(&sb, output_option);
1457 free((void *)sb.final_buf);
1458 for (ent = sb.ent; ent; ) {
1459 struct blame_entry *e = ent->next;
1460 free(ent);
1461 ent = e;
1462 }
1463 return 0;
1464}