git-pickaxe -C: blame cut-and-pasted lines.
authorJunio C Hamano <junkio@cox.net>
Fri, 20 Oct 2006 01:50:17 +0000 (18:50 -0700)
committerJunio C Hamano <junkio@cox.net>
Fri, 20 Oct 2006 07:30:44 +0000 (00:30 -0700)
This completes the initial round of git-pickaxe. In addition to
the detection of line movements we already have, this finds new
lines that were created by moving or cutting-and-pasting lines
from different files in the parent.

With this,

git pickaxe -f -n -C v1.4.0 -- revision.c

finds that a major part of that file actually came from
rev-list.c when Linus split the latter at commit ae563642 and
blames them to earlier commits that touch rev-list.c.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Documentation/git-pickaxe.txt
builtin-pickaxe.c
index ebae20ff33307d218c0790073d4ed9ef8b99ad58..6d22fd9e993edebf229c9e35b01fd2ecf6f59de5 100644 (file)
@@ -9,7 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>]
 --------
 [verse]
 'git-pickaxe' [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>]
-              [-M] [--since=<date>] [<rev>] [--] <file>
+              [-M] [-C] [-C] [--since=<date>] [<rev>] [--] <file>
 
 DESCRIPTION
 -----------
 
 DESCRIPTION
 -----------
@@ -73,6 +73,14 @@ OPTIONS
        to the child commit.  With this option, both groups of
        lines are blamed on the parent.
 
        to the child commit.  With this option, both groups of
        lines are blamed on the parent.
 
+-C::
+       In addition to `-M`, detect lines copied from other
+       files that were modified in the same commit.  This is
+       useful when you reorganize your program and move code
+       around across files.  When this option is given twice,
+       the command looks for copies from all other files in the
+       parent for the commit that creates the file in addition.
+
 -h, --help::
        Show help message.
 
 -h, --help::
        Show help message.
 
index e6ce6551d08c770653a9dc32f0ae7b96f4d1cb55..74c7c9a33b1a3fe6c2a49dd2ba426431451c9fed 100644 (file)
@@ -19,7 +19,7 @@
 #include <sys/time.h>
 
 static char pickaxe_usage[] =
 #include <sys/time.h>
 
 static char pickaxe_usage[] =
-"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [commit] [--] file\n"
+"git-pickaxe [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [commit] [--] file\n"
 "  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
 "  -l, --long          Show long commit SHA1 (Default: off)\n"
 "  -t, --time          Show raw timestamp (Default: off)\n"
 "  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
 "  -l, --long          Show long commit SHA1 (Default: off)\n"
 "  -t, --time          Show raw timestamp (Default: off)\n"
@@ -27,7 +27,7 @@ static char pickaxe_usage[] =
 "  -n, --show-number   Show original linenumber (Default: off)\n"
 "  -p, --porcelain     Show in a format designed for machine consumption\n"
 "  -L n,m              Process only line range n,m, counting from 1\n"
 "  -n, --show-number   Show original linenumber (Default: off)\n"
 "  -p, --porcelain     Show in a format designed for machine consumption\n"
 "  -L n,m              Process only line range n,m, counting from 1\n"
-"  -M                  Find line movements within the file\n"
+"  -M, -C              Find line movements within and across files\n"
 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
 
 static int longest_file;
 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
 
 static int longest_file;
@@ -38,6 +38,8 @@ static int max_digits;
 #define DEBUG 0
 
 #define PICKAXE_BLAME_MOVE             01
 #define DEBUG 0
 
 #define PICKAXE_BLAME_MOVE             01
+#define PICKAXE_BLAME_COPY             02
+#define PICKAXE_BLAME_COPY_HARDER      04
 
 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
 #define METAINFO_SHOWN         (1u<<12)
 
 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
 #define METAINFO_SHOWN         (1u<<12)
@@ -635,6 +637,78 @@ static int find_move_in_parent(struct scoreboard *sb,
        return 0;
 }
 
        return 0;
 }
 
+static int find_copy_in_parent(struct scoreboard *sb,
+                              struct origin *target,
+                              struct commit *parent,
+                              struct origin *porigin,
+                              int opt)
+{
+       struct diff_options diff_opts;
+       const char *paths[1];
+       struct blame_entry *ent;
+       int i;
+
+       if (find_last_in_target(sb, target) < 0)
+               return 1; /* nothing remains for this target */
+
+       diff_setup(&diff_opts);
+       diff_opts.recursive = 1;
+       diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+
+       /* Try "find copies harder" on new path */
+       if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
+           (!porigin || strcmp(target->path, porigin->path))) {
+               diff_opts.detect_rename = DIFF_DETECT_COPY;
+               diff_opts.find_copies_harder = 1;
+       }
+       paths[0] = NULL;
+       diff_tree_setup_paths(paths, &diff_opts);
+       if (diff_setup_done(&diff_opts) < 0)
+               die("diff-setup");
+       diff_tree_sha1(parent->tree->object.sha1,
+                      target->commit->tree->object.sha1,
+                      "", &diff_opts);
+       diffcore_std(&diff_opts);
+
+       for (ent = sb->ent; ent; ent = ent->next) {
+               struct blame_entry split[3];
+               if (ent->suspect != target || ent->guilty)
+                       continue;
+
+               memset(split, 0, sizeof(split));
+               for (i = 0; i < diff_queued_diff.nr; i++) {
+                       struct diff_filepair *p = diff_queued_diff.queue[i];
+                       struct origin *norigin;
+                       mmfile_t file_p;
+                       char type[10];
+                       char *blob;
+                       struct blame_entry this[3];
+
+                       if (!DIFF_FILE_VALID(p->one))
+                               continue; /* does not exist in parent */
+                       if (porigin && !strcmp(p->one->path, porigin->path))
+                               /* find_move already dealt with this path */
+                               continue;
+                       norigin = find_origin(sb, parent, p->one->path);
+
+                       blob = read_sha1_file(norigin->blob_sha1, type,
+                                             (unsigned long *) &file_p.size);
+                       file_p.ptr = blob;
+                       if (!file_p.ptr) {
+                               free(blob);
+                               continue;
+                       }
+                       find_copy_in_blob(sb, ent, norigin, this, &file_p);
+                       copy_split_if_better(split, this);
+               }
+               if (split[1].suspect)
+                       split_blame(sb, split, ent);
+       }
+       diff_flush(&diff_opts);
+
+       return 0;
+}
+
 #define MAXPARENT 16
 
 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
 #define MAXPARENT 16
 
 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
@@ -697,6 +771,18 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
                                return;
                }
 
                                return;
                }
 
+       /*
+        * Optionally run "ciff" to find copies from parents' files here.
+        */
+       if (opt & PICKAXE_BLAME_COPY)
+               for (i = 0, parent = commit->parents;
+                    i < MAXPARENT && parent;
+                    parent = parent->next, i++) {
+                       struct origin *porigin = parent_origin[i];
+                       if (find_copy_in_parent(sb, origin, parent->item,
+                                               porigin, opt))
+                               return;
+               }
 }
 
 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
 }
 
 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
@@ -1099,6 +1185,11 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
                        revs_file = argv[i];
                else if (!strcmp("-M", arg))
                        opt |= PICKAXE_BLAME_MOVE;
                        revs_file = argv[i];
                else if (!strcmp("-M", arg))
                        opt |= PICKAXE_BLAME_MOVE;
+               else if (!strcmp("-C", arg)) {
+                       if (opt & PICKAXE_BLAME_COPY)
+                               opt |= PICKAXE_BLAME_COPY_HARDER;
+                       opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
+               }
                else if (!strcmp("-L", arg) && ++i < argc) {
                        char *term;
                        arg = argv[i];
                else if (!strcmp("-L", arg) && ++i < argc) {
                        char *term;
                        arg = argv[i];