#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
/* Table of rename/copy destinations */
/* Table of rename/copy src files */
static struct diff_rename_src {
struct diff_filespec *one;
+ unsigned short score; /* to remember the break score */
unsigned src_path_left : 1;
} *rename_src;
static int rename_src_nr, rename_src_alloc;
static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
- int src_path_left)
+ int src_path_left,
+ unsigned short score)
{
int first, last;
memmove(rename_src + first + 1, rename_src + first,
(rename_src_nr - first - 1) * sizeof(*rename_src));
rename_src[first].one = one;
+ rename_src[first].score = score;
rename_src[first].src_path_left = src_path_left;
return &(rename_src[first]);
}
-static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
+static int is_exact_match(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int contents_too)
{
if (src->sha1_valid && dst->sha1_valid &&
- !memcmp(src->sha1, dst->sha1, 20))
+ !hashcmp(src->sha1, dst->sha1))
return 1;
+ if (!contents_too)
+ return 0;
if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
return 0;
if (src->size != dst->size)
* match than anything else; the destination does not even
* call into this function in that case.
*/
- void *delta;
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0; /* error but caught downstream */
+
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
- delta = diff_delta(src->data, src->size,
- dst->data, dst->size,
- &delta_size, delta_limit, NULL);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
+ if (diffcore_count_changes(src->data, src->size,
+ dst->data, dst->size,
+ &src->cnt_data, &dst->cnt_data,
+ delta_limit,
+ &src_copied, &literal_added))
return 0;
- /* A delta that has a lot of literal additions would have
- * big delta_size no matter what else it does.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) {
- free(delta);
- return 0;
- }
-
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
- free(delta);
- return 0;
- }
- free(delta);
-
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
+ if (!dst->size)
+ score = 0; /* should not happen */
else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
- */
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ score = src_copied * MAX_SCORE / max_size;
return score;
}
fill_filespec(two, dst->sha1, dst->mode);
dp = diff_queue(NULL, one, two);
- dp->score = score;
+ dp->renamed_pair = 1;
+ if (!strcmp(src->path, dst->path))
+ dp->score = rename_src[src_index].score;
+ else
+ dp->score = score;
dp->source_stays = rename_src[src_index].src_path_left;
rename_dst[dst_index].pair = dp;
}
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
struct diff_score *mx;
- int i, j, rename_count;
+ int i, j, rename_count, contents_too;
int num_create, num_src, dst_cnt;
if (!minimum_score)
* that means the source actually stays.
*/
int stays = (p->broken_pair && !p->score);
- register_rename_src(p->one, stays);
+ register_rename_src(p->one, stays, p->score);
}
else if (detect_rename == DIFF_DETECT_COPY)
- register_rename_src(p->one, 1);
+ register_rename_src(p->one, 1, p->score);
}
if (rename_dst_nr == 0 || rename_src_nr == 0 ||
(0 < rename_limit && rename_limit < rename_dst_nr))
/* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
+ * The first round matches up the up-to-date entries,
+ * and then during the second round we try to match
+ * cache-dirty entries as well.
*/
- for (i = 0; i < rename_dst_nr; i++) {
- struct diff_filespec *two = rename_dst[i].two;
- for (j = 0; j < rename_src_nr; j++) {
- struct diff_filespec *one = rename_src[j].one;
- if (!is_exact_match(one, two))
- continue;
- record_rename_pair(i, j, MAX_SCORE);
- rename_count++;
- break; /* we are done with this entry */
+ for (contents_too = 0; contents_too < 2; contents_too++) {
+ for (i = 0; i < rename_dst_nr; i++) {
+ struct diff_filespec *two = rename_dst[i].two;
+ if (rename_dst[i].pair)
+ continue; /* dealt with an earlier round */
+ for (j = 0; j < rename_src_nr; j++) {
+ struct diff_filespec *one = rename_src[j].one;
+ if (!is_exact_match(one, two, contents_too))
+ continue;
+ record_rename_pair(i, j, MAX_SCORE);
+ rename_count++;
+ break; /* we are done with this entry */
+ }
}
}
m->score = estimate_similarity(one, two,
minimum_score);
}
+ /* We do not need the text anymore */
+ diff_free_filespec_data(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */