struct diff_filespec *s)
{
if (S_ISDIR(s->mode))
- return; /* rename/copy patch for tree does not make sense. */
+ return; /* no trees, please */
if (pool->alloc <= pool->nr) {
pool->alloc = alloc_nr(pool->alloc);
* else.
*/
void *delta;
- unsigned long delta_size;
+ unsigned long delta_size, base_size;
int score;
+ /* We deal only with regular files. Symlink renames are handled
+ * only when they are exact matches --- in other words, no edits
+ * after renaming.
+ */
+ if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+ return 0;
+
delta_size = ((src->size < dst->size) ?
(dst->size - src->size) : (src->size - dst->size));
-
- /* We would not consider rename followed by more than
- * minimum_score/MAX_SCORE edits; that is, delta_size must be smaller
- * than (src->size + dst->size)/2 * minimum_score/MAX_SCORE,
- * which means...
+ base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+ /* We would not consider edits that change the file size so
+ * drastically. delta_size must be smaller than
+ * (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
+ * Note that base_size == 0 case is handled here already
+ * and the final score computation below would not have a
+ * divide-by-zero issue.
*/
-
- if ((src->size+dst->size)*minimum_score < delta_size*MAX_SCORE*2)
+ if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);
+ /*
+ * We currently punt here, but we may later end up parsing the
+ * delta to really assess the extent of damage. A big consecutive
+ * remove would produce small delta_size that affects quite a
+ * big portion of the file.
+ */
free(delta);
- /* This "delta" is really xdiff with adler32 and all the
- * overheads but it is a quick and dirty approximation.
- *
- * Now we will give some score to it. 100% edit gets
- * 0 points and 0% edit gets MAX_SCORE points. That is, every
- * 1/MAX_SCORE edit gets 1 point penalty. The amount of penalty is:
- *
- * (delta_size * 2 / (src->size + dst->size)) * MAX_SCORE
- *
+ /*
+ * Now we will give some score to it. 100% edit gets 0 points
+ * and 0% edit gets MAX_SCORE points.
*/
- score = MAX_SCORE-(MAX_SCORE*2*delta_size/(src->size+dst->size));
+ score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
if (score < 0) return 0;
if (MAX_SCORE < score) return MAX_SCORE;
return score;
fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
x, one,
s->path,
- s->file_valid ? "valid" : "invalid",
+ DIFF_FILE_VALID(s) ? "valid" : "invalid",
s->mode,
s->sha1_valid ? sha1_to_hex(s->sha1) : "");
fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
*/
while (i < q->nr) {
struct diff_filepair *p = q->queue[i++];
- if (!p->two->file_valid)
+ if (!DIFF_FILE_VALID(p->two))
continue; /* removed is fine */
if (strcmp(p->one->path, it->path))
continue; /* not relevant */
return 0;
}
-void diff_detect_rename(struct diff_queue_struct *q,
- int detect_rename,
- int minimum_score)
+int diff_scoreopt_parse(const char *opt)
{
+ int diglen, num, scale, i;
+ if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C'))
+ return -1; /* that is not a -M nor -C option */
+ diglen = strspn(opt+2, "0123456789");
+ if (diglen == 0 || strlen(opt+2) != diglen)
+ return 0; /* use default */
+ sscanf(opt+2, "%d", &num);
+ for (i = 0, scale = 1; i < diglen; i++)
+ scale *= 10;
+
+ /* user says num divided by scale and we say internally that
+ * is MAX_SCORE * num / scale.
+ */
+ return MAX_SCORE * num / scale;
+}
+
+void diffcore_rename(int detect_rename, int minimum_score)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
struct diff_rename_pool created, deleted, stay;
struct diff_rename_pool *(srcs[2]);
int h, i, j;
int num_create, num_src, dst_cnt, src_cnt;
+ if (!minimum_score)
+ minimum_score = DEFAULT_MINIMUM_SCORE;
outq.queue = NULL;
outq.nr = outq.alloc = 0;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- if (!p->one->file_valid)
- if (!p->two->file_valid)
- continue; /* ignore nonsense */
+ if (!DIFF_FILE_VALID(p->one))
+ if (!DIFF_FILE_VALID(p->two))
+ continue; /* unmerged */
else
diff_rename_pool_add(&created, p->two);
- else if (!p->two->file_valid)
+ else if (!DIFF_FILE_VALID(p->two))
diff_rename_pool_add(&deleted, p->one);
else if (1 < detect_rename) /* find copy, too */
diff_rename_pool_add(&stay, p->one);
if (mx[i].dst->xfrm_flags & RENAME_DST_MATCHED)
continue; /* alreayd done, either exact or fuzzy. */
if (mx[i].score < minimum_score)
- continue;
+ break; /* there is not any more diffs applicable. */
record_rename_pair(&outq,
mx[i].src, mx[i].dst, mx[i].rank,
mx[i].score);
*/
for (i = 0; i < q->nr; i++) {
struct diff_filepair *dp, *p = q->queue[i];
- if (!p->one->file_valid) {
- if (p->two->file_valid) {
- /* creation */
- dp = diff_queue(&outq, p->one, p->two);
- dp->xfrm_work = 4;
- }
- /* otherwise it is a nonsense; just ignore it */
+ if (!DIFF_FILE_VALID(p->one)) {
+ /* creation or unmerged entries */
+ dp = diff_queue(&outq, p->one, p->two);
+ dp->xfrm_work = 4;
}
- else if (!p->two->file_valid) {
+ else if (!DIFF_FILE_VALID(p->two)) {
/* deletion */
dp = diff_queue(&outq, p->one, p->two);
dp->xfrm_work = 2;
/* Copy it out to q, removing duplicates. */
for (i = 0; i < outq.nr; i++) {
struct diff_filepair *p = outq.queue[i];
- if (!p->one->file_valid) {
- /* created */
+ if (!DIFF_FILE_VALID(p->one)) {
+ /* created or unmerged */
if (p->two->xfrm_flags & RENAME_DST_MATCHED)
; /* rename/copy created it already */
else
diff_queue(q, p->one, p->two);
}
- else if (!p->two->file_valid) {
+ else if (!DIFF_FILE_VALID(p->two)) {
/* deleted */
if (p->one->xfrm_flags & RENAME_SRC_GONE)
; /* rename/copy deleted it already */
else
/* otherwise it is a modified (or stayed) entry */
diff_queue(q, p->one, p->two);
- free(p);
+ diff_free_filepair(p);
}
free(outq.queue);