/* Table of rename/copy src files */
static struct diff_rename_src {
struct diff_filespec *one;
+ unsigned short score; /* to remember the break score */
unsigned src_path_left : 1;
} *rename_src;
static int rename_src_nr, rename_src_alloc;
static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
- int src_path_left)
+ int src_path_left,
+ unsigned short score)
{
int first, last;
memmove(rename_src + first + 1, rename_src + first,
(rename_src_nr - first - 1) * sizeof(*rename_src));
rename_src[first].one = one;
+ rename_src[first].score = score;
rename_src[first].src_path_left = src_path_left;
return &(rename_src[first]);
}
* match than anything else; the destination does not even
* call into this function in that case.
*/
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
if (diffcore_count_changes(src->data, src->size,
dst->data, dst->size,
+ &src->cnt_data, &dst->cnt_data,
delta_limit,
&src_copied, &literal_added))
return 0;
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ if (!dst->size)
+ score = 0; /* should not happen */
+ else
+ score = src_copied * MAX_SCORE / max_size;
return score;
}
fill_filespec(two, dst->sha1, dst->mode);
dp = diff_queue(NULL, one, two);
- dp->score = score;
+ if (!strcmp(src->path, dst->path))
+ dp->score = rename_src[src_index].score;
+ else
+ dp->score = score;
dp->source_stays = rename_src[src_index].src_path_left;
rename_dst[dst_index].pair = dp;
}
* that means the source actually stays.
*/
int stays = (p->broken_pair && !p->score);
- register_rename_src(p->one, stays);
+ register_rename_src(p->one, stays, p->score);
}
else if (detect_rename == DIFF_DETECT_COPY)
- register_rename_src(p->one, 1);
+ register_rename_src(p->one, 1, p->score);
}
if (rename_dst_nr == 0 || rename_src_nr == 0 ||
(0 < rename_limit && rename_limit < rename_dst_nr))
m->score = estimate_similarity(one, two,
minimum_score);
}
+ /* We do not need the text anymore */
+ diff_free_filespec_data(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */