/* Table of rename/copy src files */
static struct diff_rename_src {
struct diff_filespec *one;
+ unsigned short score; /* to remember the break score */
unsigned src_path_left : 1;
} *rename_src;
static int rename_src_nr, rename_src_alloc;
static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
- int src_path_left)
+ int src_path_left,
+ unsigned short score)
{
int first, last;
memmove(rename_src + first + 1, rename_src + first,
(rename_src_nr - first - 1) * sizeof(*rename_src));
rename_src[first].one = one;
+ rename_src[first].score = score;
rename_src[first].src_path_left = src_path_left;
return &(rename_src[first]);
}
-static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
+static int is_exact_match(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int contents_too)
{
if (src->sha1_valid && dst->sha1_valid &&
- !memcmp(src->sha1, dst->sha1, 20))
+ !hashcmp(src->sha1, dst->sha1))
return 1;
+ if (!contents_too)
+ return 0;
if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
return 0;
if (src->size != dst->size)
return 0;
+ if (src->sha1_valid && dst->sha1_valid)
+ return !hashcmp(src->sha1, dst->sha1);
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0;
if (src->size == dst->size &&
return 0;
}
+static int basename_same(struct diff_filespec *src, struct diff_filespec *dst)
+{
+ int src_len = strlen(src->path), dst_len = strlen(dst->path);
+ while (src_len && dst_len) {
+ char c1 = src->path[--src_len];
+ char c2 = dst->path[--dst_len];
+ if (c1 != c2)
+ return 0;
+ if (c1 == '/')
+ return 1;
+ }
+ return (!src_len || src->path[src_len - 1] == '/') &&
+ (!dst_len || dst->path[dst_len - 1] == '/');
+}
+
struct diff_score {
int src; /* index in rename_src */
int dst; /* index in rename_dst */
int score;
+ int name_score;
};
static int estimate_similarity(struct diff_filespec *src,
* match than anything else; the destination does not even
* call into this function in that case.
*/
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
return 0; /* error but caught downstream */
- delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
- if (diffcore_count_changes(src->data, src->size,
- dst->data, dst->size,
+ delta_limit = (unsigned long)
+ (base_size * (MAX_SCORE-minimum_score) / MAX_SCORE);
+ if (diffcore_count_changes(src, dst,
+ &src->cnt_data, &dst->cnt_data,
delta_limit,
&src_copied, &literal_added))
return 0;
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ if (!dst->size)
+ score = 0; /* should not happen */
+ else
+ score = (int)(src_copied * MAX_SCORE / max_size);
return score;
}
fill_filespec(two, dst->sha1, dst->mode);
dp = diff_queue(NULL, one, two);
- dp->score = score;
+ dp->renamed_pair = 1;
+ if (!strcmp(src->path, dst->path))
+ dp->score = rename_src[src_index].score;
+ else
+ dp->score = score;
dp->source_stays = rename_src[src_index].src_path_left;
rename_dst[dst_index].pair = dp;
}
static int score_compare(const void *a_, const void *b_)
{
const struct diff_score *a = a_, *b = b_;
+
+ if (a->score == b->score)
+ return b->name_score - a->name_score;
+
return b->score - a->score;
}
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
struct diff_score *mx;
- int i, j, rename_count;
+ int i, j, rename_count, contents_too;
int num_create, num_src, dst_cnt;
if (!minimum_score)
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- if (!DIFF_FILE_VALID(p->one))
+ if (!DIFF_FILE_VALID(p->one)) {
if (!DIFF_FILE_VALID(p->two))
continue; /* unmerged */
+ else if (options->single_follow &&
+ strcmp(options->single_follow, p->two->path))
+ continue; /* not interested */
else
locate_rename_dst(p->two, 1);
+ }
else if (!DIFF_FILE_VALID(p->two)) {
/* If the source is a broken "delete", and
* they did not really want to get broken,
* that means the source actually stays.
*/
int stays = (p->broken_pair && !p->score);
- register_rename_src(p->one, stays);
+ register_rename_src(p->one, stays, p->score);
}
else if (detect_rename == DIFF_DETECT_COPY)
- register_rename_src(p->one, 1);
+ register_rename_src(p->one, 1, p->score);
}
if (rename_dst_nr == 0 || rename_src_nr == 0 ||
(0 < rename_limit && rename_limit < rename_dst_nr))
/* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
+ * The first round matches up the up-to-date entries,
+ * and then during the second round we try to match
+ * cache-dirty entries as well.
*/
- for (i = 0; i < rename_dst_nr; i++) {
- struct diff_filespec *two = rename_dst[i].two;
- for (j = 0; j < rename_src_nr; j++) {
- struct diff_filespec *one = rename_src[j].one;
- if (!is_exact_match(one, two))
- continue;
- record_rename_pair(i, j, MAX_SCORE);
- rename_count++;
- break; /* we are done with this entry */
+ for (contents_too = 0; contents_too < 2; contents_too++) {
+ for (i = 0; i < rename_dst_nr; i++) {
+ struct diff_filespec *two = rename_dst[i].two;
+ if (rename_dst[i].pair)
+ continue; /* dealt with an earlier round */
+ for (j = 0; j < rename_src_nr; j++) {
+ int k;
+ struct diff_filespec *one = rename_src[j].one;
+ if (!is_exact_match(one, two, contents_too))
+ continue;
+
+ /* see if there is a basename match, too */
+ for (k = j; k < rename_src_nr; k++) {
+ one = rename_src[k].one;
+ if (basename_same(one, two) &&
+ is_exact_match(one, two,
+ contents_too)) {
+ j = k;
+ break;
+ }
+ }
+
+ record_rename_pair(i, j, (int)MAX_SCORE);
+ rename_count++;
+ break; /* we are done with this entry */
+ }
}
}
m->dst = i;
m->score = estimate_similarity(one, two,
minimum_score);
+ m->name_score = basename_same(one, two);
+ diff_free_filespec_data(one);
}
+ /* We do not need the text anymore */
+ diff_free_filespec_data(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */