#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
/* Table of rename/copy destinations */
/* Table of rename/copy src files */
static struct diff_rename_src {
struct diff_filespec *one;
+ unsigned short score; /* to remember the break score */
unsigned src_path_left : 1;
} *rename_src;
static int rename_src_nr, rename_src_alloc;
static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
- int src_path_left)
+ int src_path_left,
+ unsigned short score)
{
int first, last;
memmove(rename_src + first + 1, rename_src + first,
(rename_src_nr - first - 1) * sizeof(*rename_src));
rename_src[first].one = one;
+ rename_src[first].score = score;
rename_src[first].src_path_left = src_path_left;
return &(rename_src[first]);
}
* match than anything else; the destination does not even
* call into this function in that case.
*/
- void *delta;
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0; /* error but caught downstream */
+
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
- delta = diff_delta(src->data, src->size,
- dst->data, dst->size,
- &delta_size, delta_limit);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
+ if (diffcore_count_changes(src->data, src->size,
+ dst->data, dst->size,
+ &src->cnt_data, &dst->cnt_data,
+ delta_limit,
+ &src_copied, &literal_added))
return 0;
- /* A delta that has a lot of literal additions would have
- * big delta_size no matter what else it does.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
- return 0;
-
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
- free(delta);
- return 0;
- }
- free(delta);
-
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
+ if (!dst->size)
+ score = 0; /* should not happen */
else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
- */
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ score = src_copied * MAX_SCORE / max_size;
return score;
}
fill_filespec(two, dst->sha1, dst->mode);
dp = diff_queue(NULL, one, two);
- dp->score = score;
+ if (!strcmp(src->path, dst->path))
+ dp->score = rename_src[src_index].score;
+ else
+ dp->score = score;
dp->source_stays = rename_src[src_index].src_path_left;
rename_dst[dst_index].pair = dp;
}
* that means the source actually stays.
*/
int stays = (p->broken_pair && !p->score);
- register_rename_src(p->one, stays);
+ register_rename_src(p->one, stays, p->score);
}
else if (detect_rename == DIFF_DETECT_COPY)
- register_rename_src(p->one, 1);
+ register_rename_src(p->one, 1, p->score);
}
- if (rename_dst_nr == 0 ||
+ if (rename_dst_nr == 0 || rename_src_nr == 0 ||
(0 < rename_limit && rename_limit < rename_dst_nr))
goto cleanup; /* nothing to do */
if (rename_count == rename_dst_nr)
goto cleanup;
+ if (minimum_score == MAX_SCORE)
+ goto cleanup;
+
num_create = (rename_dst_nr - rename_count);
num_src = rename_src_nr;
mx = xmalloc(sizeof(*mx) * num_create * num_src);
m->score = estimate_similarity(one, two,
minimum_score);
}
+ /* We do not need the text anymore */
+ diff_free_filespec_data(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */