diffcore-rename: cache file deltas
authorJeff King <peff@peff.net>
Tue, 25 Sep 2007 19:29:42 +0000 (15:29 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 26 Sep 2007 07:40:32 +0000 (00:40 -0700)
We find rename candidates by computing a fingerprint hash of
each file, and then comparing those fingerprints. There are
inherently O(n^2) comparisons, so it pays in CPU time to
hoist the (rather expensive) computation of the fingerprint
out of that loop (or to cache it once we have computed it once).

Previously, we didn't keep the filespec information around
because then we had the potential to consume a great deal of
memory. However, instead of keeping all of the filespec
data, we can instead just keep the fingerprint.

This patch implements and uses diff_free_filespec_data_large
to accomplish that goal. We also have to change
estimate_similarity not to needlessly repopulate the
filespec data when we already have the hash.

Practical tests showed 4.5x speedup for a 10% memory usage
increase.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff.c
diffcore-rename.c
diffcore.h
diff --git a/diff.c b/diff.c
index 0ee9ea1c1b47b82710a3850e7e9f679e4486f7bd..35e3c619864809a46cd5b6a36d08e8103b5ce485 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -1675,7 +1675,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
        return 0;
 }
 
-void diff_free_filespec_data(struct diff_filespec *s)
+void diff_free_filespec_data_large(struct diff_filespec *s)
 {
        if (s->should_free)
                free(s->data);
@@ -1686,6 +1686,11 @@ void diff_free_filespec_data(struct diff_filespec *s)
                s->should_free = s->should_munmap = 0;
                s->data = NULL;
        }
+}
+
+void diff_free_filespec_data(struct diff_filespec *s)
+{
+       diff_free_filespec_data_large(s);
        free(s->cnt_data);
        s->cnt_data = NULL;
 }
index 41b35c3a9e6935f6cd8563de732321e74e115765..4fc200064ae655c6936828c6bf04234afde778ce 100644 (file)
@@ -184,7 +184,8 @@ static int estimate_similarity(struct diff_filespec *src,
        if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
                return 0;
 
-       if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+       if ((!src->cnt_data && diff_populate_filespec(src, 0))
+               || (!dst->cnt_data && diff_populate_filespec(dst, 0)))
                return 0; /* error but caught downstream */
 
 
@@ -377,10 +378,10 @@ void diffcore_rename(struct diff_options *options)
                        m->score = estimate_similarity(one, two,
                                                       minimum_score);
                        m->name_score = basename_same(one, two);
-                       diff_free_filespec_data(one);
+                       diff_free_filespec_data_large(one);
                }
                /* We do not need the text anymore */
-               diff_free_filespec_data(two);
+               diff_free_filespec_data_large(two);
                dst_cnt++;
        }
        /* cost matrix sorted by most to least similar pair */
index eef17c4ca2e81c572fb110e9eb11e8ed9d51f9a0..4bf175bda99f51f8df1445349d87bc59b56095e7 100644 (file)
@@ -48,6 +48,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
 
 extern int diff_populate_filespec(struct diff_filespec *, int);
 extern void diff_free_filespec_data(struct diff_filespec *);
+extern void diff_free_filespec_data_large(struct diff_filespec *);
 extern int diff_filespec_is_binary(struct diff_filespec *);
 
 struct diff_filepair {