general improvements
[gitweb.git] / blame.c
diff --git a/blame.c b/blame.c
index ad8810cb0e049d294d54fe823c93ff5ad170e770..36a2e7ef119d7bea691babe15f861a0600028196 100644 (file)
--- a/blame.c
+++ b/blame.c
@@ -99,9 +99,9 @@ static void verify_working_tree_path(struct repository *r,
        for (parents = work_tree->parents; parents; parents = parents->next) {
                const struct object_id *commit_oid = &parents->item->object.oid;
                struct object_id blob_oid;
-               unsigned mode;
+               unsigned short mode;
 
-               if (!get_tree_entry(commit_oid, path, &blob_oid, &mode) &&
+               if (!get_tree_entry(r, commit_oid, path, &blob_oid, &mode) &&
                    oid_object_info(r, &blob_oid, NULL) == OBJ_BLOB)
                        return;
        }
@@ -188,7 +188,7 @@ static struct commit *fake_working_tree_commit(struct repository *r,
        unsigned mode;
        struct strbuf msg = STRBUF_INIT;
 
-       read_index(r->index);
+       repo_read_index(r);
        time(&now);
        commit = alloc_commit_node(r);
        commit->object.parsed = 1;
@@ -204,7 +204,8 @@ static struct commit *fake_working_tree_commit(struct repository *r,
 
        origin = make_origin(commit, path);
 
-       ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
+       ident = fmt_ident("Not Committed Yet", "not.committed.yet",
+                       WANT_BLANK_IDENT, NULL, 0);
        strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
        for (parent = commit->parents; parent; parent = parent->next)
                strbuf_addf(&msg, "parent %s\n",
@@ -270,7 +271,7 @@ static struct commit *fake_working_tree_commit(struct repository *r,
         * want to run "diff-index --cached".
         */
        discard_index(r->index);
-       read_index(r->index);
+       repo_read_index(r);
 
        len = strlen(path);
        if (!mode) {
@@ -981,7 +982,7 @@ static int *fuzzy_find_matching_lines(struct blame_origin *parent,
        return result;
 }
 
-static void fill_origin_fingerprints(struct blame_origin *o, mmfile_t *file)
+static void fill_origin_fingerprints(struct blame_origin *o)
 {
        int *line_starts;
 
@@ -989,12 +990,19 @@ static void fill_origin_fingerprints(struct blame_origin *o, mmfile_t *file)
                return;
        o->num_lines = find_line_starts(&line_starts, o->file.ptr,
                                        o->file.size);
-       /* TODO: Will fill in fingerprints in a future commit */
+       o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
+       get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
+                             0, o->num_lines);
        free(line_starts);
 }
 
 static void drop_origin_fingerprints(struct blame_origin *o)
 {
+       if (o->fingerprints) {
+               free_line_fingerprints(o->fingerprints, o->num_lines);
+               o->num_lines = 0;
+               FREE_AND_NULL(o->fingerprints);
+       }
 }
 
 /*
@@ -1028,7 +1036,7 @@ static void fill_origin_blob(struct diff_options *opt,
        else
                *file = o->file;
        if (fill_fingerprints)
-               fill_origin_fingerprints(o, file);
+               fill_origin_fingerprints(o);
 }
 
 static void drop_origin_blob(struct blame_origin *o)
@@ -1224,7 +1232,7 @@ static int fill_blob_sha1_and_mode(struct repository *r,
 {
        if (!is_null_oid(&origin->blob_oid))
                return 0;
-       if (get_tree_entry(&origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode))
+       if (get_tree_entry(r, &origin->commit->object.oid, origin->path, &origin->blob_oid, &origin->mode))
                goto error_out;
        if (oid_object_info(r, &origin->blob_oid, NULL) != OBJ_BLOB)
                goto error_out;
@@ -1572,9 +1580,34 @@ static int are_lines_adjacent(struct blame_line_tracker *first,
               first->s_lno + 1 == second->s_lno;
 }
 
+static int scan_parent_range(struct fingerprint *p_fps,
+                            struct fingerprint *t_fps, int t_idx,
+                            int from, int nr_lines)
+{
+       int sim, p_idx;
+       #define FINGERPRINT_FILE_THRESHOLD      10
+       int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
+       int best_sim_idx = -1;
+
+       for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
+               sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
+               if (sim < best_sim_val)
+                       continue;
+               /* Break ties with the closest-to-target line number */
+               if (sim == best_sim_val && best_sim_idx != -1 &&
+                   abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
+                       continue;
+               best_sim_val = sim;
+               best_sim_idx = p_idx;
+       }
+       return best_sim_idx;
+}
+
 /*
- * This cheap heuristic assigns lines in the chunk to their relative location in
- * the parent's chunk.  Any additional lines are left with the target.
+ * The first pass checks the blame entry (from the target) against the parent's
+ * diff chunk.  If that fails for a line, the second pass tries to match that
+ * line to any part of parent file.  That catches cases where a change was
+ * broken into two chunks by 'context.'
  */
 static void guess_line_blames(struct blame_origin *parent,
                              struct blame_origin *target,
@@ -1583,11 +1616,22 @@ static void guess_line_blames(struct blame_origin *parent,
 {
        int i, best_idx, target_idx;
        int parent_slno = tlno + offset;
+       int *fuzzy_matches;
 
+       fuzzy_matches = fuzzy_find_matching_lines(parent, target,
+                                                 tlno, parent_slno, same,
+                                                 parent_len);
        for (i = 0; i < same - tlno; i++) {
                target_idx = tlno + i;
-               best_idx = target_idx + offset;
-               if (best_idx < parent_slno + parent_len) {
+               if (fuzzy_matches && fuzzy_matches[i] >= 0) {
+                       best_idx = fuzzy_matches[i];
+               } else {
+                       best_idx = scan_parent_range(parent->fingerprints,
+                                                    target->fingerprints,
+                                                    target_idx, 0,
+                                                    parent->num_lines);
+               }
+               if (best_idx >= 0) {
                        line_blames[i].is_parent = 1;
                        line_blames[i].s_lno = best_idx;
                } else {
@@ -1595,6 +1639,7 @@ static void guess_line_blames(struct blame_origin *parent,
                        line_blames[i].s_lno = target_idx;
                }
        }
+       free(fuzzy_matches);
 }
 
 /*
@@ -1609,7 +1654,6 @@ static void guess_line_blames(struct blame_origin *parent,
  */
 static void ignore_blame_entry(struct blame_entry *e,
                               struct blame_origin *parent,
-                              struct blame_origin *target,
                               struct blame_entry **diffp,
                               struct blame_entry **ignoredp,
                               struct blame_line_tracker *line_blames)
@@ -1758,7 +1802,7 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
                        samep = n;
                }
                if (ignore_diffs) {
-                       ignore_blame_entry(e, parent, target, &diffp, &ignoredp,
+                       ignore_blame_entry(e, parent, &diffp, &ignoredp,
                                           line_blames + e->s_lno - tlno);
                } else {
                        e->next = diffp;
@@ -2371,6 +2415,12 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
                        if (!porigin)
                                continue;
                        pass_blame_to_parent(sb, origin, porigin, 1);
+                       /*
+                        * Preemptively drop porigin so we can refresh the
+                        * fingerprints if we use the parent again, which can
+                        * occur if you ignore back-to-back commits.
+                        */
+                       drop_origin_blob(porigin);
                        if (!origin->suspects)
                                goto finish;
                }
@@ -2438,7 +2488,8 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
        }
        for (i = 0; i < num_sg; i++) {
                if (sg_origin[i]) {
-                       drop_origin_blob(sg_origin[i]);
+                       if (!sg_origin[i]->suspects)
+                               drop_origin_blob(sg_origin[i]);
                        blame_origin_decref(sg_origin[i]);
                }
        }