fast-export: do not copy from modified file
authorJonathan Tan <jonathantanmy@google.com>
Wed, 20 Sep 2017 23:55:02 +0000 (16:55 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 21 Sep 2017 04:12:52 +0000 (13:12 +0900)
When run with the "-C" option, fast-export writes 'C' commands in its
output whenever the internal diff mechanism detects a file copy,
indicating that fast-import should copy the given existing file to the
given new filename. However, the diff mechanism works against the
prior version of the file, whereas fast-import uses whatever is current.
This causes issues when a commit both modifies a file and uses it as the
source for a copy.

Therefore, teach fast-export to refrain from writing 'C' when it has
already written a modification command for a file.

An existing test in t9350-fast-export is also fixed in this patch. The
existing line "C file6 file7" copies the wrong version of file6, but it
has coincidentally worked because file7 was subsequently overridden.

Reported-by: Juraj Oršulić <juraj.orsulic@fer.hr>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/fast-export.c
t/t9350-fast-export.sh
index 1e815b5577cc05a6133ded56dda4b84596f80514..6a85c25ca7806f1ec064af4a70fd675b34ff969f 100644 (file)
@@ -342,6 +342,7 @@ static void show_filemodify(struct diff_queue_struct *q,
                            struct diff_options *options, void *data)
 {
        int i;
+       struct string_list *changed = data;
 
        /*
         * Handle files below a directory first, in case they are all deleted
@@ -357,20 +358,31 @@ static void show_filemodify(struct diff_queue_struct *q,
                case DIFF_STATUS_DELETED:
                        printf("D ");
                        print_path(spec->path);
+                       string_list_insert(changed, spec->path);
                        putchar('\n');
                        break;
 
                case DIFF_STATUS_COPIED:
                case DIFF_STATUS_RENAMED:
-                       printf("%c ", q->queue[i]->status);
-                       print_path(ospec->path);
-                       putchar(' ');
-                       print_path(spec->path);
-                       putchar('\n');
-
-                       if (!oidcmp(&ospec->oid, &spec->oid) &&
-                           ospec->mode == spec->mode)
-                               break;
+                       /*
+                        * If a change in the file corresponding to ospec->path
+                        * has been observed, we cannot trust its contents
+                        * because the diff is calculated based on the prior
+                        * contents, not the current contents.  So, declare a
+                        * copy or rename only if there was no change observed.
+                        */
+                       if (!string_list_has_string(changed, ospec->path)) {
+                               printf("%c ", q->queue[i]->status);
+                               print_path(ospec->path);
+                               putchar(' ');
+                               print_path(spec->path);
+                               string_list_insert(changed, spec->path);
+                               putchar('\n');
+
+                               if (!oidcmp(&ospec->oid, &spec->oid) &&
+                                   ospec->mode == spec->mode)
+                                       break;
+                       }
                        /* fallthrough */
 
                case DIFF_STATUS_TYPE_CHANGED:
@@ -391,6 +403,7 @@ static void show_filemodify(struct diff_queue_struct *q,
                                       get_object_mark(object));
                        }
                        print_path(spec->path);
+                       string_list_insert(changed, spec->path);
                        putchar('\n');
                        break;
 
@@ -526,7 +539,8 @@ static void anonymize_ident_line(const char **beg, const char **end)
        *end = out->buf + out->len;
 }
 
-static void handle_commit(struct commit *commit, struct rev_info *rev)
+static void handle_commit(struct commit *commit, struct rev_info *rev,
+                         struct string_list *paths_of_changed_objects)
 {
        int saved_output_format = rev->diffopt.output_format;
        const char *commit_buffer;
@@ -613,6 +627,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
        if (full_tree)
                printf("deleteall\n");
        log_tree_diff_flush(rev);
+       string_list_clear(paths_of_changed_objects, 0);
        rev->diffopt.output_format = saved_output_format;
 
        printf("\n");
@@ -628,14 +643,15 @@ static void *anonymize_tag(const void *old, size_t *len)
        return strbuf_detach(&out, len);
 }
 
-static void handle_tail(struct object_array *commits, struct rev_info *revs)
+static void handle_tail(struct object_array *commits, struct rev_info *revs,
+                       struct string_list *paths_of_changed_objects)
 {
        struct commit *commit;
        while (commits->nr) {
                commit = (struct commit *)commits->objects[commits->nr - 1].item;
                if (has_unshown_parent(commit))
                        return;
-               handle_commit(commit, revs);
+               handle_commit(commit, revs, paths_of_changed_objects);
                commits->nr--;
        }
 }
@@ -975,6 +991,7 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
        char *export_filename = NULL, *import_filename = NULL;
        uint32_t lastimportid;
        struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
+       struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
        struct option options[] = {
                OPT_INTEGER(0, "progress", &progress,
                            N_("show progress after <n> objects")),
@@ -1047,14 +1064,15 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
        if (prepare_revision_walk(&revs))
                die("revision walk setup failed");
        revs.diffopt.format_callback = show_filemodify;
+       revs.diffopt.format_callback_data = &paths_of_changed_objects;
        DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
        while ((commit = get_revision(&revs))) {
                if (has_unshown_parent(commit)) {
                        add_object_array(&commit->object, NULL, &commits);
                }
                else {
-                       handle_commit(commit, &revs);
-                       handle_tail(&commits, &revs);
+                       handle_commit(commit, &revs, &paths_of_changed_objects);
+                       handle_tail(&commits, &revs, &paths_of_changed_objects);
                }
        }
 
index b5149fde6ecda2b3de5ed60f43c60ab70230289c..419d540d35e20dbbb65879736cc8412ff504b7f2 100755 (executable)
@@ -234,7 +234,7 @@ test_expect_success 'fast-export -C -C | fast-import' '
        mkdir new &&
        git --git-dir=new/.git init &&
        git fast-export -C -C --signed-tags=strip --all > output &&
-       grep "^C file6 file7\$" output &&
+       grep "^C file2 file4\$" output &&
        cat output |
        (cd new &&
         git fast-import &&
@@ -522,4 +522,22 @@ test_expect_success 'delete refspec' '
        test_cmp expected actual
 '
 
+test_expect_success 'when using -C, do not declare copy when source of copy is also modified' '
+       test_create_repo src &&
+       echo a_line >src/file.txt &&
+       git -C src add file.txt &&
+       git -C src commit -m 1st_commit &&
+
+       cp src/file.txt src/file2.txt &&
+       echo another_line >>src/file.txt &&
+       git -C src add file.txt file2.txt &&
+       git -C src commit -m 2nd_commit &&
+
+       test_create_repo dst &&
+       git -C src fast-export --all -C | git -C dst fast-import &&
+       git -C src show >expected &&
+       git -C dst show >actual &&
+       test_cmp expected actual
+'
+
 test_done