Merge branch 'mz/maint-rename-unmerged'

author Junio C Hamano <gitster@pobox.com>
Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)

committer Junio C Hamano <gitster@pobox.com>
Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)
author: Junio C Hamano <gitster@pobox.com>
Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)
committer: Junio C Hamano <gitster@pobox.com>
Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)
diff --combined diffcore-rename.c

index 3d65bb370dbfd6d6581b6b3b454b5551d3e3fe22,d5a99d66a1dc72fda5ea2872bd4e6403367405cf..f639601c762ebbd12374fa739d1d63efaf265e2a
--- 1/diffcore-rename.c
--- 2/diffcore-rename.c
+++ b/diffcore-rename.c
@@@ -5,7 -5,6 +5,7 @@@
   #include "diff.h"
   #include "diffcore.h"
   #include "hash.h"
+ +#include "progress.h"
   
   /* Table of rename/copy destinations */
   
@@@ -55,23 -54,22 +55,23 @@@ static struct diff_rename_dst *locate_r
   
   /* Table of rename/copy src files */
   static struct diff_rename_src {
- -      struct diff_filespec *one;
+ +      struct diff_filepair *p;
         unsigned short score; /* to remember the break score */
   } *rename_src;
   static int rename_src_nr, rename_src_alloc;
   
- -static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
- -                                                 unsigned short score)
+ +static struct diff_rename_src *register_rename_src(struct diff_filepair *p)
   {
         int first, last;
+ +      struct diff_filespec *one = p->one;
+ +      unsigned short score = p->score;
   
         first = 0;
         last = rename_src_nr;
         while (last > first) {
                 int next = (last + first) >> 1;
                 struct diff_rename_src *src = &(rename_src[next]);
- -              int cmp = strcmp(one->path, src->one->path);
+ +              int cmp = strcmp(one->path, src->p->one->path);
                 if (!cmp)
                         return src;
                 if (cmp < 0) {
@@@ -91,7 -89,7 +91,7 @@@
         if (first < rename_src_nr)
                 memmove(rename_src + first + 1, rename_src + first,
                         (rename_src_nr - first - 1) * sizeof(*rename_src));
- -      rename_src[first].one = one;
+ +      rename_src[first].p = p;
         rename_src[first].score = score;
         return &(rename_src[first]);
   }
@@@ -172,7 -170,7 +172,7 @@@ static int estimate_similarity(struct d
          * and the final score computation below would not have a
          * divide-by-zero issue.
          */
- -      if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
+ +      if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
                 return 0;
   
         if (!src->cnt_data && diff_populate_filespec(src, 0))
@@@ -206,7 -204,7 +206,7 @@@ static void record_rename_pair(int dst_
         if (rename_dst[dst_index].pair)
                 die("internal error: dst already matched.");
   
- -      src = rename_src[src_index].one;
+ +      src = rename_src[src_index].p->one;
         src->rename_used++;
         src->count++;
   
@@@ -249,8 -247,7 +249,8 @@@ struct file_similarity 
   };
   
   static int find_identical_files(struct file_similarity *src,
- -                              struct file_similarity *dst)
+ +                              struct file_similarity *dst,
+ +                              struct diff_options *options)
   {
         int renames = 0;
   
@@@ -280,8 -277,6 +280,8 @@@
                         }
                         /* Give higher scores to sources that haven't been used already */
                         score = !source->rename_used;
+ +                      if (source->rename_used && options->detect_rename != DIFF_DETECT_COPY)
+ +                              continue;
                         score += basename_same(source, target);
                         if (score > best_score) {
                                 best = p;
@@@ -311,12 -306,11 +311,12 @@@ static void free_similarity_list(struc
         }
   }
   
- -static int find_same_files(void *ptr)
+ +static int find_same_files(void *ptr, void *data)
   {
         int ret;
         struct file_similarity *p = ptr;
         struct file_similarity *src = NULL, *dst = NULL;
+ +      struct diff_options *options = data;
   
         /* Split the hash list up into sources and destinations */
         do {
@@@ -335,7 -329,7 +335,7 @@@
          * If we have both sources *and* destinations, see if
          * we can match them up
          */
- -      ret = (src && dst) ? find_identical_files(src, dst) : 0;
+ +      ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
   
         /* Free the hashes and return the number of renames found */
         free_similarity_list(src);
@@@ -383,20 -377,20 +383,20 @@@ static void insert_file_table(struct ha
    * and then during the second round we try to match
    * cache-dirty entries as well.
    */
- -static int find_exact_renames(void)
+ +static int find_exact_renames(struct diff_options *options)
   {
         int i;
         struct hash_table file_table;
   
         init_hash(&file_table);
         for (i = 0; i < rename_src_nr; i++)
- -              insert_file_table(&file_table, -1, i, rename_src[i].one);
+ +              insert_file_table(&file_table, -1, i, rename_src[i].p->one);
   
         for (i = 0; i < rename_dst_nr; i++)
                 insert_file_table(&file_table, 1, i, rename_dst[i].two);
   
         /* Find the renames */
- -      i = for_each_hash(&file_table, find_same_files);
+ +      i = for_each_hash(&file_table, find_same_files, options);
   
         /* .. and free the hash data structure */
         free_hash(&file_table);
@@@ -420,86 -414,16 +420,86 @@@ static void record_if_better(struct dif
                 m[worst] = *o;
   }
   
+ +/*
+ + * Returns:
+ + * 0 if we are under the limit;
+ + * 1 if we need to disable inexact rename detection;
+ + * 2 if we would be under the limit if we were given -C instead of -C -C.
+ + */
+ +static int too_many_rename_candidates(int num_create,
+ +                                    struct diff_options *options)
+ +{
+ +      int rename_limit = options->rename_limit;
+ +      int num_src = rename_src_nr;
+ +      int i;
+ +
+ +      options->needed_rename_limit = 0;
+ +
+ +      /*
+ +       * This basically does a test for the rename matrix not
+ +       * growing larger than a "rename_limit" square matrix, ie:
+ +       *
+ +       *    num_create * num_src > rename_limit * rename_limit
+ +       *
+ +       * but handles the potential overflow case specially (and we
+ +       * assume at least 32-bit integers)
+ +       */
+ +      if (rename_limit <= 0 || rename_limit > 32767)
+ +              rename_limit = 32767;
+ +      if ((num_create <= rename_limit || num_src <= rename_limit) &&
+ +          (num_create * num_src <= rename_limit * rename_limit))
+ +              return 0;
+ +
+ +      options->needed_rename_limit =
+ +              num_src > num_create ? num_src : num_create;
+ +
+ +      /* Are we running under -C -C? */
+ +      if (!DIFF_OPT_TST(options, FIND_COPIES_HARDER))
+ +              return 1;
+ +
+ +      /* Would we bust the limit if we were running under -C? */
+ +      for (num_src = i = 0; i < rename_src_nr; i++) {
+ +              if (diff_unmodified_pair(rename_src[i].p))
+ +                      continue;
+ +              num_src++;
+ +      }
+ +      if ((num_create <= rename_limit || num_src <= rename_limit) &&
+ +          (num_create * num_src <= rename_limit * rename_limit))
+ +              return 2;
+ +      return 1;
+ +}
+ +
+ +static int find_renames(struct diff_score *mx, int dst_cnt, int minimum_score, int copies)
+ +{
+ +      int count = 0, i;
+ +
+ +      for (i = 0; i < dst_cnt * NUM_CANDIDATE_PER_DST; i++) {
+ +              struct diff_rename_dst *dst;
+ +
+ +              if ((mx[i].dst < 0) ||
+ +                  (mx[i].score < minimum_score))
+ +                      break; /* there is no more usable pair. */
+ +              dst = &rename_dst[mx[i].dst];
+ +              if (dst->pair)
+ +                      continue; /* already done, either exact or fuzzy. */
+ +              if (!copies && rename_src[mx[i].src].p->one->rename_used)
+ +                      continue;
+ +              record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
+ +              count++;
+ +      }
+ +      return count;
+ +}
+ +
   void diffcore_rename(struct diff_options *options)
   {
         int detect_rename = options->detect_rename;
         int minimum_score = options->rename_score;
- -      int rename_limit = options->rename_limit;
         struct diff_queue_struct *q = &diff_queued_diff;
         struct diff_queue_struct outq;
         struct diff_score *mx;
- -      int i, j, rename_count;
- -      int num_create, num_src, dst_cnt;
+ +      int i, j, rename_count, skip_unmodified = 0;
+ +      int num_create, dst_cnt;
+ +      struct progress *progress = NULL;
   
         if (!minimum_score)
                 minimum_score = DEFAULT_RENAME_SCORE;
@@@ -515,7 -439,7 +515,7 @@@
                         else
                                 locate_rename_dst(p->two, 1);
                 }
-               else if (!DIFF_FILE_VALID(p->two)) {
+               else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) {
                         /*
                          * If the source is a broken "delete", and
                          * they did not really want to get broken,
@@@ -525,7 -449,7 +525,7 @@@
                          */
                         if (p->broken_pair && !p->score)
                                 p->one->rename_used++;
- -                      register_rename_src(p->one, p->score);
+ +                      register_rename_src(p);
                 }
                 else if (detect_rename == DIFF_DETECT_COPY) {
                         /*
@@@ -533,7 -457,7 +533,7 @@@
                          * one, to indicate ourselves as a user.
                          */
                         p->one->rename_used++;
- -                      register_rename_src(p->one, p->score);
+ +                      register_rename_src(p);
                 }
         }
         if (rename_dst_nr == 0 || rename_src_nr == 0)
@@@ -543,7 -467,7 +543,7 @@@
          * We really want to cull the candidates list early
          * with cheap tests in order to avoid doing deltas.
          */
- -      rename_count = find_exact_renames();
+ +      rename_count = find_exact_renames(options);
   
         /* Did we only want exact renames? */
         if (minimum_score == MAX_SCORE)
@@@ -554,26 -478,28 +554,26 @@@
          * files still remain as options for rename/copies!)
          */
         num_create = (rename_dst_nr - rename_count);
- -      num_src = rename_src_nr;
   
         /* All done? */
         if (!num_create)
                 goto cleanup;
   
- -      /*
- -       * This basically does a test for the rename matrix not
- -       * growing larger than a "rename_limit" square matrix, ie:
- -       *
- -       *    num_create * num_src > rename_limit * rename_limit
- -       *
- -       * but handles the potential overflow case specially (and we
- -       * assume at least 32-bit integers)
- -       */
- -      if (rename_limit <= 0 || rename_limit > 32767)
- -              rename_limit = 32767;
- -      if ((num_create > rename_limit && num_src > rename_limit) ||
- -          (num_create * num_src > rename_limit * rename_limit)) {
- -              if (options->warn_on_too_large_rename)
- -                      warning("too many files (created: %d deleted: %d), skipping inexact rename detection", num_create, num_src);
+ +      switch (too_many_rename_candidates(num_create, options)) {
+ +      case 1:
                 goto cleanup;
+ +      case 2:
+ +              options->degraded_cc_to_c = 1;
+ +              skip_unmodified = 1;
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +
+ +      if (options->show_rename_progress) {
+ +              progress = start_progress_delay(
+ +                              "Performing inexact rename detection",
+ +                              rename_dst_nr * rename_src_nr, 50, 1);
         }
   
         mx = xcalloc(num_create * NUM_CANDIDATE_PER_DST, sizeof(*mx));
@@@ -589,13 -515,8 +589,13 @@@
                         m[j].dst = -1;
   
                 for (j = 0; j < rename_src_nr; j++) {
- -                      struct diff_filespec *one = rename_src[j].one;
+ +                      struct diff_filespec *one = rename_src[j].p->one;
                         struct diff_score this_src;
+ +
+ +                      if (skip_unmodified &&
+ +                          diff_unmodified_pair(rename_src[j].p))
+ +                              continue;
+ +
                         this_src.score = estimate_similarity(one, two,
                                                              minimum_score);
                         this_src.name_score = basename_same(one, two);
@@@ -610,16 -531,38 +610,16 @@@
                         diff_free_filespec_blob(two);
                 }
                 dst_cnt++;
+ +              display_progress(progress, (i+1)*rename_src_nr);
         }
+ +      stop_progress(&progress);
   
         /* cost matrix sorted by most to least similar pair */
         qsort(mx, dst_cnt * NUM_CANDIDATE_PER_DST, sizeof(*mx), score_compare);
   
- -      for (i = 0; i < dst_cnt * NUM_CANDIDATE_PER_DST; i++) {
- -              struct diff_rename_dst *dst;
- -
- -              if ((mx[i].dst < 0) ||
- -                  (mx[i].score < minimum_score))
- -                      break; /* there is no more usable pair. */
- -              dst = &rename_dst[mx[i].dst];
- -              if (dst->pair)
- -                      continue; /* already done, either exact or fuzzy. */
- -              if (rename_src[mx[i].src].one->rename_used)
- -                      continue;
- -              record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
- -              rename_count++;
- -      }
- -
- -      for (i = 0; i < dst_cnt * NUM_CANDIDATE_PER_DST; i++) {
- -              struct diff_rename_dst *dst;
- -
- -              if ((mx[i].dst < 0) ||
- -                  (mx[i].score < minimum_score))
- -                      break; /* there is no more usable pair. */
- -              dst = &rename_dst[mx[i].dst];
- -              if (dst->pair)
- -                      continue; /* already done, either exact or fuzzy. */
- -              record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
- -              rename_count++;
- -      }
+ +      rename_count += find_renames(mx, dst_cnt, minimum_score, 0);
+ +      if (detect_rename == DIFF_DETECT_COPY)
+ +              rename_count += find_renames(mx, dst_cnt, minimum_score, 1);
         free(mx);
   
    cleanup:
@@@ -631,7 -574,10 +631,10 @@@
                 struct diff_filepair *p = q->queue[i];
                 struct diff_filepair *pair_to_free = NULL;
   
-               if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
+               if (DIFF_PAIR_UNMERGED(p)) {
+                       diff_q(&outq, p);
+               }
+               else if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
                         /*
                          * Creation
                          *
diff --combined t/t7060-wtstatus.sh

index b4fcc86a10f15178da25da159e160b38d717f318,a5b7a568bc60e49da317c2a525bdaff13f3e19d6..3a5d927f83730f53a67cfec76a0625008dfb71f2
--- 1/t/t7060-wtstatus.sh
--- 2/t/t7060-wtstatus.sh
+++ b/t/t7060-wtstatus.sh
@@@ -38,7 -38,7 +38,7 @@@ cat >expect <<EO
   no changes added to commit (use "git add" and/or "git commit -a")
   EOF
   
- -test_expect_success 'M/D conflict does not segfault' '
+ +test_expect_success C_LOCALE_OUTPUT 'M/D conflict does not segfault' '
         mkdir mdconflict &&
         (
                 cd mdconflict &&
@@@ -56,4 -56,66 +56,66 @@@
         )
   '
   
+ test_expect_success 'rename & unmerged setup' '
+       git rm -f -r . &&
+       cat "$TEST_DIRECTORY/README" >ONE &&
+       git add ONE &&
+       test_tick &&
+       git commit -m "One commit with ONE" &&
+ 
+       echo Modified >TWO &&
+       cat ONE >>TWO &&
+       cat ONE >>THREE &&
+       git add TWO THREE &&
+       sha1=$(git rev-parse :ONE) &&
+       git rm --cached ONE &&
+       (
+               echo "100644 $sha1 1    ONE" &&
+               echo "100644 $sha1 2    ONE" &&
+               echo "100644 $sha1 3    ONE"
+       ) | git update-index --index-info &&
+       echo Further >>THREE
+ '
+ 
+ test_expect_success 'rename & unmerged status' '
+       git status -suno >actual &&
+       cat >expect <<-EOF &&
+       UU ONE
+       AM THREE
+       A  TWO
+       EOF
+       test_cmp expect actual
+ '
+ 
+ test_expect_success 'git diff-index --cached shows 2 added + 1 unmerged' '
+       cat >expected <<-EOF &&
+       U       ONE
+       A       THREE
+       A       TWO
+       EOF
+       git diff-index --cached --name-status HEAD >actual &&
+       test_cmp expected actual
+ '
+ 
+ test_expect_success 'git diff-index --cached -M shows 2 added + 1 unmerged' '
+       cat >expected <<-EOF &&
+       U       ONE
+       A       THREE
+       A       TWO
+       EOF
+       git diff-index --cached --name-status HEAD >actual &&
+       test_cmp expected actual
+ '
+ 
+ test_expect_success 'git diff-index --cached -C shows 2 copies + 1 unmerged' '
+       cat >expected <<-EOF &&
+       U       ONE
+       C       ONE     THREE
+       C       ONE     TWO
+       EOF
+       git diff-index --cached -C --name-status HEAD |
+       sed "s/^C[0-9]*/C/g" >actual &&
+       test_cmp expected actual
+ '
+ 
   test_done
author	Junio C Hamano <gitster@pobox.com>
author	Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)
committer	Junio C Hamano <gitster@pobox.com>
committer	Mon, 2 May 2011 22:58:27 +0000 (15:58 -0700)
		1	2
diffcore-rename.c	patch \|	diff1 \|	diff2 \|	blob \| history
t/t7060-wtstatus.sh	patch \|	diff1 \|	diff2 \|	blob \| history