optimize diffcore-delta by sorting hash entries.
[gitweb.git] / diffcore-rename.c
index 79c984c9cf5489d22f359b7a2ad3464ffc271c35..142e5376dd741377c311075816f139a0949ee82f 100644 (file)
@@ -138,6 +138,7 @@ struct diff_score {
        int src; /* index in rename_src */
        int dst; /* index in rename_dst */
        int score;
+       int name_score;
 };
 
 static int estimate_similarity(struct diff_filespec *src,
@@ -183,14 +184,14 @@ static int estimate_similarity(struct diff_filespec *src,
        if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
                return 0;
 
-       if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+       if ((!src->cnt_data && diff_populate_filespec(src, 0))
+               || (!dst->cnt_data && diff_populate_filespec(dst, 0)))
                return 0; /* error but caught downstream */
 
 
        delta_limit = (unsigned long)
                (base_size * (MAX_SCORE-minimum_score) / MAX_SCORE);
-       if (diffcore_count_changes(src->data, src->size,
-                                  dst->data, dst->size,
+       if (diffcore_count_changes(src, dst,
                                   &src->cnt_data, &dst->cnt_data,
                                   delta_limit,
                                   &src_copied, &literal_added))
@@ -201,11 +202,8 @@ static int estimate_similarity(struct diff_filespec *src,
         */
        if (!dst->size)
                score = 0; /* should not happen */
-       else {
+       else
                score = (int)(src_copied * MAX_SCORE / max_size);
-               if (basename_same(src, dst))
-                       score++;
-       }
        return score;
 }
 
@@ -242,6 +240,10 @@ static void record_rename_pair(int dst_index, int src_index, int score)
 static int score_compare(const void *a_, const void *b_)
 {
        const struct diff_score *a = a_, *b = b_;
+
+       if (a->score == b->score)
+               return b->name_score - a->name_score;
+
        return b->score - a->score;
 }
 
@@ -297,10 +299,25 @@ void diffcore_rename(struct diff_options *options)
                else if (detect_rename == DIFF_DETECT_COPY)
                        register_rename_src(p->one, 1, p->score);
        }
-       if (rename_dst_nr == 0 || rename_src_nr == 0 ||
-           (0 < rename_limit && rename_limit < rename_dst_nr))
+       if (rename_dst_nr == 0 || rename_src_nr == 0)
                goto cleanup; /* nothing to do */
 
+       /*
+        * This basically does a test for the rename matrix not
+        * growing larger than a "rename_limit" square matrix, ie:
+        *
+        *    rename_dst_nr * rename_src_nr > rename_limit * rename_limit
+        *
+        * but handles the potential overflow case specially (and we
+        * assume at least 32-bit integers)
+        */
+       if (rename_limit <= 0 || rename_limit > 32767)
+               rename_limit = 32767;
+       if (rename_dst_nr > rename_limit && rename_src_nr > rename_limit)
+               goto cleanup;
+       if (rename_dst_nr * rename_src_nr > rename_limit * rename_limit)
+               goto cleanup;
+
        /* We really want to cull the candidates list early
         * with cheap tests in order to avoid doing deltas.
         * The first round matches up the up-to-date entries,
@@ -360,10 +377,11 @@ void diffcore_rename(struct diff_options *options)
                        m->dst = i;
                        m->score = estimate_similarity(one, two,
                                                       minimum_score);
-                       diff_free_filespec_data(one);
+                       m->name_score = basename_same(one, two);
+                       diff_free_filespec_blob(one);
                }
                /* We do not need the text anymore */
-               diff_free_filespec_data(two);
+               diff_free_filespec_blob(two);
                dst_cnt++;
        }
        /* cost matrix sorted by most to least similar pair */