Document --unified/-U option
[gitweb.git] / diffcore-rename.c
index 55cf1c37f344628eb06c40393295f288fb186a50..6bde4396f212833cc1d411e723d5215c086e7c2d 100644 (file)
@@ -54,12 +54,14 @@ static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two,
 /* Table of rename/copy src files */
 static struct diff_rename_src {
        struct diff_filespec *one;
+       unsigned short score; /* to remember the break score */
        unsigned src_path_left : 1;
 } *rename_src;
 static int rename_src_nr, rename_src_alloc;
 
 static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
-                                                  int src_path_left)
+                                                  int src_path_left,
+                                                  unsigned short score)
 {
        int first, last;
 
@@ -89,19 +91,26 @@ static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
                memmove(rename_src + first + 1, rename_src + first,
                        (rename_src_nr - first - 1) * sizeof(*rename_src));
        rename_src[first].one = one;
+       rename_src[first].score = score;
        rename_src[first].src_path_left = src_path_left;
        return &(rename_src[first]);
 }
 
-static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
+static int is_exact_match(struct diff_filespec *src,
+                         struct diff_filespec *dst,
+                         int contents_too)
 {
        if (src->sha1_valid && dst->sha1_valid &&
-           !memcmp(src->sha1, dst->sha1, 20))
+           !hashcmp(src->sha1, dst->sha1))
                return 1;
+       if (!contents_too)
+               return 0;
        if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
                return 0;
        if (src->size != dst->size)
                return 0;
+       if (src->sha1_valid && dst->sha1_valid)
+           return !hashcmp(src->sha1, dst->sha1);
        if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
                return 0;
        if (src->size == dst->size &&
@@ -110,10 +119,26 @@ static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
        return 0;
 }
 
+static int basename_same(struct diff_filespec *src, struct diff_filespec *dst)
+{
+       int src_len = strlen(src->path), dst_len = strlen(dst->path);
+       while (src_len && dst_len) {
+               char c1 = src->path[--src_len];
+               char c2 = dst->path[--dst_len];
+               if (c1 != c2)
+                       return 0;
+               if (c1 == '/')
+                       return 1;
+       }
+       return (!src_len || src->path[src_len - 1] == '/') &&
+               (!dst_len || dst->path[dst_len - 1] == '/');
+}
+
 struct diff_score {
        int src; /* index in rename_src */
        int dst; /* index in rename_dst */
        int score;
+       int name_score;
 };
 
 static int estimate_similarity(struct diff_filespec *src,
@@ -133,7 +158,7 @@ static int estimate_similarity(struct diff_filespec *src,
         * match than anything else; the destination does not even
         * call into this function in that case.
         */
-       unsigned long delta_size, base_size, src_copied, literal_added;
+       unsigned long max_size, delta_size, base_size, src_copied, literal_added;
        unsigned long delta_limit;
        int score;
 
@@ -144,9 +169,9 @@ static int estimate_similarity(struct diff_filespec *src,
        if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
                return 0;
 
-       delta_size = ((src->size < dst->size) ?
-                     (dst->size - src->size) : (src->size - dst->size));
+       max_size = ((src->size > dst->size) ? src->size : dst->size);
        base_size = ((src->size < dst->size) ? src->size : dst->size);
+       delta_size = max_size - base_size;
 
        /* We would not consider edits that change the file size so
         * drastically.  delta_size must be smaller than
@@ -163,26 +188,21 @@ static int estimate_similarity(struct diff_filespec *src,
                return 0; /* error but caught downstream */
 
 
-       delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
-       if (diffcore_count_changes(src->data, src->size,
-                                  dst->data, dst->size,
+       delta_limit = (unsigned long)
+               (base_size * (MAX_SCORE-minimum_score) / MAX_SCORE);
+       if (diffcore_count_changes(src, dst,
+                                  &src->cnt_data, &dst->cnt_data,
                                   delta_limit,
                                   &src_copied, &literal_added))
                return 0;
 
-       /* Extent of damage */
-       if (src->size + literal_added < src_copied)
-               delta_size = 0;
-       else
-               delta_size = (src->size - src_copied) + literal_added;
-
-       /*
-        * Now we will give some score to it.  100% edit gets 0 points
-        * and 0% edit gets MAX_SCORE points.
+       /* How similar are they?
+        * what percentage of material in dst are from source?
         */
-       score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); 
-       if (score < 0) return 0;
-       if (MAX_SCORE < score) return MAX_SCORE;
+       if (!dst->size)
+               score = 0; /* should not happen */
+       else
+               score = (int)(src_copied * MAX_SCORE / max_size);
        return score;
 }
 
@@ -203,7 +223,11 @@ static void record_rename_pair(int dst_index, int src_index, int score)
        fill_filespec(two, dst->sha1, dst->mode);
 
        dp = diff_queue(NULL, one, two);
-       dp->score = score;
+       dp->renamed_pair = 1;
+       if (!strcmp(src->path, dst->path))
+               dp->score = rename_src[src_index].score;
+       else
+               dp->score = score;
        dp->source_stays = rename_src[src_index].src_path_left;
        rename_dst[dst_index].pair = dp;
 }
@@ -215,6 +239,10 @@ static void record_rename_pair(int dst_index, int src_index, int score)
 static int score_compare(const void *a_, const void *b_)
 {
        const struct diff_score *a = a_, *b = b_;
+
+       if (a->score == b->score)
+               return b->name_score - a->name_score;
+
        return b->score - a->score;
 }
 
@@ -241,7 +269,7 @@ void diffcore_rename(struct diff_options *options)
        struct diff_queue_struct *q = &diff_queued_diff;
        struct diff_queue_struct outq;
        struct diff_score *mx;
-       int i, j, rename_count;
+       int i, j, rename_count, contents_too;
        int num_create, num_src, dst_cnt;
 
        if (!minimum_score)
@@ -250,21 +278,25 @@ void diffcore_rename(struct diff_options *options)
 
        for (i = 0; i < q->nr; i++) {
                struct diff_filepair *p = q->queue[i];
-               if (!DIFF_FILE_VALID(p->one))
+               if (!DIFF_FILE_VALID(p->one)) {
                        if (!DIFF_FILE_VALID(p->two))
                                continue; /* unmerged */
+                       else if (options->single_follow &&
+                                strcmp(options->single_follow, p->two->path))
+                               continue; /* not interested */
                        else
                                locate_rename_dst(p->two, 1);
+               }
                else if (!DIFF_FILE_VALID(p->two)) {
                        /* If the source is a broken "delete", and
                         * they did not really want to get broken,
                         * that means the source actually stays.
                         */
                        int stays = (p->broken_pair && !p->score);
-                       register_rename_src(p->one, stays);
+                       register_rename_src(p->one, stays, p->score);
                }
                else if (detect_rename == DIFF_DETECT_COPY)
-                       register_rename_src(p->one, 1);
+                       register_rename_src(p->one, 1, p->score);
        }
        if (rename_dst_nr == 0 || rename_src_nr == 0 ||
            (0 < rename_limit && rename_limit < rename_dst_nr))
@@ -272,16 +304,36 @@ void diffcore_rename(struct diff_options *options)
 
        /* We really want to cull the candidates list early
         * with cheap tests in order to avoid doing deltas.
+        * The first round matches up the up-to-date entries,
+        * and then during the second round we try to match
+        * cache-dirty entries as well.
         */
-       for (i = 0; i < rename_dst_nr; i++) {
-               struct diff_filespec *two = rename_dst[i].two;
-               for (j = 0; j < rename_src_nr; j++) {
-                       struct diff_filespec *one = rename_src[j].one;
-                       if (!is_exact_match(one, two))
-                               continue;
-                       record_rename_pair(i, j, MAX_SCORE);
-                       rename_count++;
-                       break; /* we are done with this entry */
+       for (contents_too = 0; contents_too < 2; contents_too++) {
+               for (i = 0; i < rename_dst_nr; i++) {
+                       struct diff_filespec *two = rename_dst[i].two;
+                       if (rename_dst[i].pair)
+                               continue; /* dealt with an earlier round */
+                       for (j = 0; j < rename_src_nr; j++) {
+                               int k;
+                               struct diff_filespec *one = rename_src[j].one;
+                               if (!is_exact_match(one, two, contents_too))
+                                       continue;
+
+                               /* see if there is a basename match, too */
+                               for (k = j; k < rename_src_nr; k++) {
+                                       one = rename_src[k].one;
+                                       if (basename_same(one, two) &&
+                                               is_exact_match(one, two,
+                                                       contents_too)) {
+                                               j = k;
+                                               break;
+                                       }
+                               }
+
+                               record_rename_pair(i, j, (int)MAX_SCORE);
+                               rename_count++;
+                               break; /* we are done with this entry */
+                       }
                }
        }
 
@@ -309,7 +361,11 @@ void diffcore_rename(struct diff_options *options)
                        m->dst = i;
                        m->score = estimate_similarity(one, two,
                                                       minimum_score);
+                       m->name_score = basename_same(one, two);
+                       diff_free_filespec_data(one);
                }
+               /* We do not need the text anymore */
+               diff_free_filespec_data(two);
                dst_cnt++;
        }
        /* cost matrix sorted by most to least similar pair */