int src; /* index in rename_src */
int dst; /* index in rename_dst */
int score;
+ int name_score;
};
static int estimate_similarity(struct diff_filespec *src,
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
- if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ if ((!src->cnt_data && diff_populate_filespec(src, 0))
+ || (!dst->cnt_data && diff_populate_filespec(dst, 0)))
return 0; /* error but caught downstream */
delta_limit = (unsigned long)
(base_size * (MAX_SCORE-minimum_score) / MAX_SCORE);
- if (diffcore_count_changes(src->data, src->size,
- dst->data, dst->size,
+ if (diffcore_count_changes(src, dst,
&src->cnt_data, &dst->cnt_data,
delta_limit,
&src_copied, &literal_added))
*/
if (!dst->size)
score = 0; /* should not happen */
- else {
+ else
score = (int)(src_copied * MAX_SCORE / max_size);
- if (basename_same(src, dst))
- score++;
- }
return score;
}
static int score_compare(const void *a_, const void *b_)
{
const struct diff_score *a = a_, *b = b_;
+
+ if (a->score == b->score)
+ return b->name_score - a->name_score;
+
return b->score - a->score;
}
else if (detect_rename == DIFF_DETECT_COPY)
register_rename_src(p->one, 1, p->score);
}
- if (rename_dst_nr == 0 || rename_src_nr == 0 ||
- (0 < rename_limit && rename_limit < rename_dst_nr))
+ if (rename_dst_nr == 0 || rename_src_nr == 0)
goto cleanup; /* nothing to do */
+ /*
+ * This basically does a test for the rename matrix not
+ * growing larger than a "rename_limit" square matrix, ie:
+ *
+ * rename_dst_nr * rename_src_nr > rename_limit * rename_limit
+ *
+ * but handles the potential overflow case specially (and we
+ * assume at least 32-bit integers)
+ */
+ if (rename_limit <= 0 || rename_limit > 32767)
+ rename_limit = 32767;
+ if (rename_dst_nr > rename_limit && rename_src_nr > rename_limit)
+ goto cleanup;
+ if (rename_dst_nr * rename_src_nr > rename_limit * rename_limit)
+ goto cleanup;
+
/* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
* The first round matches up the up-to-date entries,
m->dst = i;
m->score = estimate_similarity(one, two,
minimum_score);
- diff_free_filespec_data(one);
+ m->name_score = basename_same(one, two);
+ diff_free_filespec_blob(one);
}
/* We do not need the text anymore */
- diff_free_filespec_data(two);
+ diff_free_filespec_blob(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */