simple euristic for further free packing improvements
[gitweb.git] / blame.c
diff --git a/blame.c b/blame.c
index 90338af31cf1f532891d8e34ede6c5cf9f72681f..99ceea81df7b7c69bc263cf170cbb23d88420613 100644 (file)
--- a/blame.c
+++ b/blame.c
@@ -14,7 +14,9 @@
 #include "tree.h"
 #include "blob.h"
 #include "diff.h"
+#include "diffcore.h"
 #include "revision.h"
+#include "xdiff-interface.h"
 
 #define DEBUG 0
 
@@ -34,7 +36,9 @@ struct util_info {
        char *buf;
        unsigned long size;
        int num_lines;
-//    const char* path;
+       const char* pathname;
+
+       void* topo_data;
 };
 
 struct chunk {
@@ -54,116 +58,89 @@ static int num_get_patch = 0;
 static int num_commits = 0;
 static int patch_time = 0;
 
-#define TEMPFILE_PATH_LEN 60
-static struct patch *get_patch(struct commit *commit, struct commit *other)
-{
+struct blame_diff_state {
+       struct xdiff_emit_state xm;
        struct patch *ret;
-       struct util_info *info_c = (struct util_info *)commit->object.util;
-       struct util_info *info_o = (struct util_info *)other->object.util;
-       char tmp_path1[TEMPFILE_PATH_LEN], tmp_path2[TEMPFILE_PATH_LEN];
-       char diff_cmd[TEMPFILE_PATH_LEN*2 + 20];
-       struct timeval tv_start, tv_end;
-       int fd;
-       FILE *fin;
-       char buf[1024];
-
-       ret = xmalloc(sizeof(struct patch));
-       ret->chunks = NULL;
-       ret->num = 0;
-
-       get_blob(commit);
-       get_blob(other);
+};
 
-       gettimeofday(&tv_start, NULL);
+static void process_u0_diff(void *state_, char *line, unsigned long len)
+{
+       struct blame_diff_state *state = state_;
+       struct chunk *chunk;
 
-       fd = git_mkstemp(tmp_path1, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
-       if (fd < 0)
-               die("unable to create temp-file: %s", strerror(errno));
+       if (len < 4 || line[0] != '@' || line[1] != '@')
+               return;
 
-       if (xwrite(fd, info_c->buf, info_c->size) != info_c->size)
-               die("write failed: %s", strerror(errno));
-       close(fd);
+       if (DEBUG)
+               printf("chunk line: %.*s", (int)len, line);
+       state->ret->num++;
+       state->ret->chunks = xrealloc(state->ret->chunks,
+                                     sizeof(struct chunk) * state->ret->num);
+       chunk = &state->ret->chunks[state->ret->num - 1];
+
+       assert(!strncmp(line, "@@ -", 4));
+
+       if (parse_hunk_header(line, len,
+                             &chunk->off1, &chunk->len1,
+                             &chunk->off2, &chunk->len2)) {
+               state->ret->num--;
+               return;
+       }
 
-       fd = git_mkstemp(tmp_path2, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
-       if (fd < 0)
-               die("unable to create temp-file: %s", strerror(errno));
+       if (chunk->len1 == 0)
+               chunk->off1++;
+       if (chunk->len2 == 0)
+               chunk->off2++;
 
-       if (xwrite(fd, info_o->buf, info_o->size) != info_o->size)
-               die("write failed: %s", strerror(errno));
-       close(fd);
+       if (chunk->off1 > 0)
+               chunk->off1--;
+       if (chunk->off2 > 0)
+               chunk->off2--;
 
-       sprintf(diff_cmd, "diff -U 0 %s %s", tmp_path1, tmp_path2);
-       fin = popen(diff_cmd, "r");
-       if (!fin)
-               die("popen failed: %s", strerror(errno));
+       assert(chunk->off1 >= 0);
+       assert(chunk->off2 >= 0);
+}
 
-       while (fgets(buf, sizeof(buf), fin)) {
-               struct chunk *chunk;
-               char *start, *sp;
+static struct patch *get_patch(struct commit *commit, struct commit *other)
+{
+       struct blame_diff_state state;
+       xpparam_t xpp;
+       xdemitconf_t xecfg;
+       mmfile_t file_c, file_o;
+       xdemitcb_t ecb;
+       struct util_info *info_c = (struct util_info *)commit->object.util;
+       struct util_info *info_o = (struct util_info *)other->object.util;
+       struct timeval tv_start, tv_end;
 
-               if (buf[0] != '@' || buf[1] != '@')
-                       continue;
+       get_blob(commit);
+       file_c.ptr = info_c->buf;
+       file_c.size = info_c->size;
 
-               if (DEBUG)
-                       printf("chunk line: %s", buf);
-               ret->num++;
-               ret->chunks = xrealloc(ret->chunks,
-                                      sizeof(struct chunk) * ret->num);
-               chunk = &ret->chunks[ret->num - 1];
-
-               assert(!strncmp(buf, "@@ -", 4));
-
-               start = buf + 4;
-               sp = index(start, ' ');
-               *sp = '\0';
-               if (index(start, ',')) {
-                       int ret =
-                           sscanf(start, "%d,%d", &chunk->off1, &chunk->len1);
-                       assert(ret == 2);
-               } else {
-                       int ret = sscanf(start, "%d", &chunk->off1);
-                       assert(ret == 1);
-                       chunk->len1 = 1;
-               }
-               *sp = ' ';
-
-               start = sp + 1;
-               sp = index(start, ' ');
-               *sp = '\0';
-               if (index(start, ',')) {
-                       int ret =
-                           sscanf(start, "%d,%d", &chunk->off2, &chunk->len2);
-                       assert(ret == 2);
-               } else {
-                       int ret = sscanf(start, "%d", &chunk->off2);
-                       assert(ret == 1);
-                       chunk->len2 = 1;
-               }
-               *sp = ' ';
+       get_blob(other);
+       file_o.ptr = info_o->buf;
+       file_o.size = info_o->size;
 
-               if (chunk->len1 == 0)
-                       chunk->off1++;
-               if (chunk->len2 == 0)
-                       chunk->off2++;
+       gettimeofday(&tv_start, NULL);
 
-               if (chunk->off1 > 0)
-                       chunk->off1--;
-               if (chunk->off2 > 0)
-                       chunk->off2--;
+       xpp.flags = XDF_NEED_MINIMAL;
+       xecfg.ctxlen = 0;
+       xecfg.flags = 0;
+       ecb.outf = xdiff_outf;
+       ecb.priv = &state;
+       memset(&state, 0, sizeof(state));
+       state.xm.consume = process_u0_diff;
+       state.ret = xmalloc(sizeof(struct patch));
+       state.ret->chunks = NULL;
+       state.ret->num = 0;
 
-               assert(chunk->off1 >= 0);
-               assert(chunk->off2 >= 0);
-       }
-       pclose(fin);
-       unlink(tmp_path1);
-       unlink(tmp_path2);
+       xdl_diff(&file_c, &file_o, &xpp, &xecfg, &ecb);
 
        gettimeofday(&tv_end, NULL);
        patch_time += 1000000 * (tv_end.tv_sec - tv_start.tv_sec) +
                tv_end.tv_usec - tv_start.tv_usec;
 
        num_get_patch++;
-       return ret;
+       return state.ret;
 }
 
 static void free_patch(struct patch *p)
@@ -177,11 +154,13 @@ static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
                                  unsigned mode, int stage);
 
 static unsigned char blob_sha1[20];
+static const char* blame_file;
 static int get_blob_sha1(struct tree *t, const char *pathname,
                         unsigned char *sha1)
 {
        int i;
        const char *pathspec[2];
+       blame_file = pathname;
        pathspec[0] = pathname;
        pathspec[1] = NULL;
        memset(blob_sha1, 0, sizeof(blob_sha1));
@@ -206,6 +185,10 @@ static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
        if (S_ISDIR(mode))
                return READ_TREE_RECURSIVE;
 
+       if (strncmp(blame_file, base, baselen) ||
+           strcmp(blame_file + baselen, pathname))
+               return -1;
+
        memcpy(blob_sha1, sha1, 20);
        return -1;
 }
@@ -220,7 +203,7 @@ static void get_blob(struct commit *commit)
 
        info->buf = read_sha1_file(info->sha1, type, &info->size);
 
-       assert(!strcmp(type, "blob"));
+       assert(!strcmp(type, blob_type));
 }
 
 /* For debugging only */
@@ -342,25 +325,34 @@ static int map_line(struct commit *commit, int line)
        return info->line_map[line];
 }
 
-static int fill_util_info(struct commit *commit, const char *path)
+static struct util_info* get_util(struct commit *commit)
 {
-       struct util_info *util;
-       if (commit->object.util)
-               return 0;
+       struct util_info *util = commit->object.util;
+
+       if (util)
+               return util;
 
        util = xmalloc(sizeof(struct util_info));
+       util->buf = NULL;
+       util->size = 0;
+       util->line_map = NULL;
+       util->num_lines = -1;
+       util->pathname = NULL;
+       commit->object.util = util;
+       return util;
+}
+
+static int fill_util_info(struct commit *commit)
+{
+       struct util_info *util = commit->object.util;
 
-       if (get_blob_sha1(commit->tree, path, util->sha1)) {
-               free(util);
+       assert(util);
+       assert(util->pathname);
+
+       if (get_blob_sha1(commit->tree, util->pathname, util->sha1))
                return 1;
-       } else {
-               util->buf = NULL;
-               util->size = 0;
-               util->line_map = NULL;
-               util->num_lines = -1;
-               commit->object.util = util;
+       else
                return 0;
-       }
 }
 
 static void alloc_line_map(struct commit *commit)
@@ -389,10 +381,11 @@ static void alloc_line_map(struct commit *commit)
 
 static void init_first_commit(struct commit* commit, const char* filename)
 {
-       struct util_info* util;
+       struct util_info* util = commit->object.util;
        int i;
 
-       if (fill_util_info(commit, filename))
+       util->pathname = filename;
+       if (fill_util_info(commit))
                die("fill_util_info failed");
 
        alloc_line_map(commit);
@@ -453,7 +446,7 @@ static void process_commits(struct rev_info *rev, const char *path,
                if(num_parents == 0)
                        *initial = commit;
 
-               if(fill_util_info(commit, path))
+               if (fill_util_info(commit))
                        continue;
 
                alloc_line_map(commit);
@@ -471,7 +464,7 @@ static void process_commits(struct rev_info *rev, const char *path,
                                printf("parent: %s\n",
                                       sha1_to_hex(parent->object.sha1));
 
-                       if(fill_util_info(parent, path)) {
+                       if (fill_util_info(parent)) {
                                num_parents--;
                                continue;
                        }
@@ -511,6 +504,143 @@ static void process_commits(struct rev_info *rev, const char *path,
        } while ((commit = get_revision(rev)) != NULL);
 }
 
+
+static int compare_tree_path(struct rev_info* revs,
+                            struct commit* c1, struct commit* c2)
+{
+       int ret;
+       const char* paths[2];
+       struct util_info* util = c2->object.util;
+       paths[0] = util->pathname;
+       paths[1] = NULL;
+
+       diff_tree_setup_paths(get_pathspec(revs->prefix, paths),
+                             &revs->pruning);
+       ret = rev_compare_tree(revs, c1->tree, c2->tree);
+       diff_tree_release_paths(&revs->pruning);
+       return ret;
+}
+
+
+static int same_tree_as_empty_path(struct rev_info *revs, struct tree* t1,
+                                  const char* path)
+{
+       int ret;
+       const char* paths[2];
+       paths[0] = path;
+       paths[1] = NULL;
+
+       diff_tree_setup_paths(get_pathspec(revs->prefix, paths),
+                             &revs->pruning);
+       ret = rev_same_tree_as_empty(revs, t1);
+       diff_tree_release_paths(&revs->pruning);
+       return ret;
+}
+
+static const char* find_rename(struct commit* commit, struct commit* parent)
+{
+       struct util_info* cutil = commit->object.util;
+       struct diff_options diff_opts;
+       const char *paths[1];
+       int i;
+
+       if (DEBUG) {
+               printf("find_rename commit: %s ",
+                      sha1_to_hex(commit->object.sha1));
+               puts(sha1_to_hex(parent->object.sha1));
+       }
+
+       diff_setup(&diff_opts);
+       diff_opts.recursive = 1;
+       diff_opts.detect_rename = DIFF_DETECT_RENAME;
+       paths[0] = NULL;
+       diff_tree_setup_paths(paths, &diff_opts);
+       if (diff_setup_done(&diff_opts) < 0)
+               die("diff_setup_done failed");
+
+       diff_tree_sha1(commit->tree->object.sha1, parent->tree->object.sha1,
+                      "", &diff_opts);
+       diffcore_std(&diff_opts);
+
+       for (i = 0; i < diff_queued_diff.nr; i++) {
+               struct diff_filepair *p = diff_queued_diff.queue[i];
+
+               if (p->status == 'R' && !strcmp(p->one->path, cutil->pathname)) {
+                       if (DEBUG)
+                               printf("rename %s -> %s\n", p->one->path, p->two->path);
+                       return p->two->path;
+               }
+       }
+
+       return 0;
+}
+
+static void simplify_commit(struct rev_info *revs, struct commit *commit)
+{
+       struct commit_list **pp, *parent;
+
+       if (!commit->tree)
+               return;
+
+       if (!commit->parents) {
+               struct util_info* util = commit->object.util;
+               if (!same_tree_as_empty_path(revs, commit->tree,
+                                            util->pathname))
+                       commit->object.flags |= TREECHANGE;
+               return;
+       }
+
+       pp = &commit->parents;
+       while ((parent = *pp) != NULL) {
+               struct commit *p = parent->item;
+
+               if (p->object.flags & UNINTERESTING) {
+                       pp = &parent->next;
+                       continue;
+               }
+
+               parse_commit(p);
+               switch (compare_tree_path(revs, p, commit)) {
+               case REV_TREE_SAME:
+                       parent->next = NULL;
+                       commit->parents = parent;
+                       get_util(p)->pathname = get_util(commit)->pathname;
+                       return;
+
+               case REV_TREE_NEW:
+               {
+
+                       struct util_info* util = commit->object.util;
+                       if (revs->remove_empty_trees &&
+                           same_tree_as_empty_path(revs, p->tree,
+                                                   util->pathname)) {
+                               const char* new_name = find_rename(commit, p);
+                               if (new_name) {
+                                       struct util_info* putil = get_util(p);
+                                       if (!putil->pathname)
+                                               putil->pathname = strdup(new_name);
+                               } else {
+                                       *pp = parent->next;
+                                       continue;
+                               }
+                       }
+               }
+
+               /* fallthrough */
+               case REV_TREE_DIFFERENT:
+                       pp = &parent->next;
+                       if (!get_util(p)->pathname)
+                               get_util(p)->pathname =
+                                       get_util(commit)->pathname;
+                       continue;
+               }
+               die("bad tree compare for commit %s",
+                   sha1_to_hex(commit->object.sha1));
+       }
+       commit->object.flags |= TREECHANGE;
+}
+
+
 struct commit_info
 {
        char* author;
@@ -526,7 +656,7 @@ static void get_commit_info(struct commit* commit, struct commit_info* ret)
        static char author_buf[1024];
 
        tmp = strstr(commit->buffer, "\nauthor ") + 8;
-       len = index(tmp, '\n') - tmp;
+       len = strchr(tmp, '\n') - tmp;
        ret->author = author_buf;
        memcpy(ret->author, tmp, len);
 
@@ -569,27 +699,61 @@ static const char* format_time(unsigned long time, const char* tz_str)
        return time_buf;
 }
 
+static void topo_setter(struct commit* c, void* data)
+{
+       struct util_info* util = c->object.util;
+       util->topo_data = data;
+}
+
+static void* topo_getter(struct commit* c)
+{
+       struct util_info* util = c->object.util;
+       return util->topo_data;
+}
+
+static int read_ancestry(const char *graft_file,
+                        unsigned char **start_sha1)
+{
+       FILE *fp = fopen(graft_file, "r");
+       char buf[1024];
+       if (!fp)
+               return -1;
+       while (fgets(buf, sizeof(buf), fp)) {
+               /* The format is just "Commit Parent1 Parent2 ...\n" */
+               int len = strlen(buf);
+               struct commit_graft *graft = read_graft_line(buf, len);
+               register_commit_graft(graft, 0);
+               if (!*start_sha1)
+                       *start_sha1 = graft->sha1;
+       }
+       fclose(fp);
+       return 0;
+}
+
 int main(int argc, const char **argv)
 {
        int i;
        struct commit *initial = NULL;
-       unsigned char sha1[20];
+       unsigned char sha1[20], *sha1_p = NULL;
 
        const char *filename = NULL, *commit = NULL;
        char filename_buf[256];
        int sha1_len = 8;
        int compability = 0;
        int options = 1;
+       struct commit* start_commit;
 
-       int num_args;
        const char* args[10];
        struct rev_info rev;
 
        struct commit_info ci;
        const char *buf;
        int max_digits;
+       int longest_file, longest_author;
+       int found_rename;
 
        const char* prefix = setup_git_directory();
+       git_config(git_default_config);
 
        for(i = 1; i < argc; i++) {
                if(options) {
@@ -604,6 +768,14 @@ int main(int argc, const char **argv)
                                  !strcmp(argv[i], "--compability")) {
                                compability = 1;
                                continue;
+                       } else if(!strcmp(argv[i], "-S")) {
+                               if (i + 1 < argc &&
+                                   !read_ancestry(argv[i + 1], &sha1_p)) {
+                                       compability = 1;
+                                       i++;
+                                       continue;
+                               }
+                               usage(blame_usage);
                        } else if(!strcmp(argv[i], "--")) {
                                options = 0;
                                continue;
@@ -625,7 +797,9 @@ int main(int argc, const char **argv)
 
        if(!filename)
                usage(blame_usage);
-       if(!commit)
+       if (commit && sha1_p)
+               usage(blame_usage);
+       else if(!commit)
                commit = "HEAD";
 
        if(prefix)
@@ -634,28 +808,33 @@ int main(int argc, const char **argv)
                strcpy(filename_buf, filename);
        filename = filename_buf;
 
-       {
-               struct commit* c;
+       if (!sha1_p) {
                if (get_sha1(commit, sha1))
                        die("get_sha1 failed, commit '%s' not found", commit);
-               c = lookup_commit_reference(sha1);
-
-               if (fill_util_info(c, filename)) {
-                       printf("%s not found in %s\n", filename, commit);
-                       return 1;
-               }
+               sha1_p = sha1;
+       }
+       start_commit = lookup_commit_reference(sha1_p);
+       get_util(start_commit)->pathname = filename;
+       if (fill_util_info(start_commit)) {
+               printf("%s not found in %s\n", filename, commit);
+               return 1;
        }
 
-       num_args = 0;
-       args[num_args++] = NULL;
-       args[num_args++] = "--topo-order";
-       args[num_args++] = "--remove-empty";
-       args[num_args++] = commit;
-       args[num_args++] = "--";
-       args[num_args++] = filename;
-       args[num_args] = NULL;
 
-       setup_revisions(num_args, args, &rev, "HEAD");
+       init_revisions(&rev);
+       rev.remove_empty_trees = 1;
+       rev.topo_order = 1;
+       rev.prune_fn = simplify_commit;
+       rev.topo_setter = topo_setter;
+       rev.topo_getter = topo_getter;
+       rev.parents = 1;
+       rev.limited = 1;
+
+       commit_list_insert(start_commit, &rev.commits);
+
+       args[0] = filename;
+       args[1] = NULL;
+       diff_tree_setup_paths(args, &rev.pruning);
        prepare_revision_walk(&rev);
        process_commits(&rev, filename, &initial);
 
@@ -663,20 +842,47 @@ int main(int argc, const char **argv)
        for (max_digits = 1, i = 10; i <= num_blame_lines + 1; max_digits++)
                i *= 10;
 
+       longest_file = 0;
+       longest_author = 0;
+       found_rename = 0;
+       for (i = 0; i < num_blame_lines; i++) {
+               struct commit *c = blame_lines[i];
+               struct util_info* u;
+               if (!c)
+                       c = initial;
+               u = c->object.util;
+
+               if (!found_rename && strcmp(filename, u->pathname))
+                       found_rename = 1;
+               if (longest_file < strlen(u->pathname))
+                       longest_file = strlen(u->pathname);
+               get_commit_info(c, &ci);
+               if (longest_author < strlen(ci.author))
+                       longest_author = strlen(ci.author);
+       }
+
        for (i = 0; i < num_blame_lines; i++) {
                struct commit *c = blame_lines[i];
+               struct util_info* u;
+
                if (!c)
                        c = initial;
 
+               u = c->object.util;
                get_commit_info(c, &ci);
                fwrite(sha1_to_hex(c->object.sha1), sha1_len, 1, stdout);
-               if(compability)
+               if(compability) {
                        printf("\t(%10s\t%10s\t%d)", ci.author,
                               format_time(ci.author_time, ci.author_tz), i+1);
-               else
-                       printf(" (%-15.15s %10s %*d) ", ci.author,
+               } else {
+                       if (found_rename)
+                               printf(" %-*.*s", longest_file, longest_file,
+                                      u->pathname);
+                       printf(" (%-*.*s %10s %*d) ",
+                              longest_author, longest_author, ci.author,
                               format_time(ci.author_time, ci.author_tz),
                               max_digits, i+1);
+               }
 
                if(i == num_blame_lines - 1) {
                        fwrite(buf, blame_len - (buf - blame_contents),
@@ -684,7 +890,7 @@ int main(int argc, const char **argv)
                        if(blame_contents[blame_len-1] != '\n')
                                putc('\n', stdout);
                } else {
-                       char* next_buf = index(buf, '\n') + 1;
+                       char* next_buf = strchr(buf, '\n') + 1;
                        fwrite(buf, next_buf - buf, 1, stdout);
                        buf = next_buf;
                }