Merge branch 'jk/prune-with-corrupt-refs'
authorJunio C Hamano <gitster@pobox.com>
Wed, 25 Mar 2015 19:54:26 +0000 (12:54 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 25 Mar 2015 19:54:26 +0000 (12:54 -0700)
"git prune" used to largely ignore broken refs when deciding which
objects are still being used, which could spread an existing small
damage and make it a larger one.

* jk/prune-with-corrupt-refs:
refs.c: drop curate_packed_refs
repack: turn on "ref paranoia" when doing a destructive repack
prune: turn on ref_paranoia flag
refs: introduce a "ref paranoia" flag
t5312: test object deletion code paths in a corrupted repository

Documentation/git.txt
builtin/prune.c
builtin/repack.c
cache.h
environment.c
refs.c
t/t5312-prune-corruption.sh [new file with mode: 0755]
index 4749d1b4dfb5b0209588490063d035c90b79fb76..b12e22d59788eec82ff623c4ba0e3aae02ac1141 100644 (file)
@@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS::
        variable when it is invoked as the top level command by the
        end user, to be recorded in the body of the reflog.
 
+`GIT_REF_PARANOIA`::
+       If set to `1`, include broken or badly named refs when iterating
+       over lists of refs. In a normal, non-corrupted repository, this
+       does nothing. However, enabling it may help git to detect and
+       abort some operations in the presence of broken refs. Git sets
+       this variable automatically when performing destructive
+       operations like linkgit:git-prune[1]. You should not need to set
+       it yourself unless you want to be paranoid about making sure
+       an operation has touched every ref (e.g., because you are
+       cloning a repository to make a backup).
+
 
 Discussion[[Discussion]]
 ------------------------
index 04d3b12ae4e8c13ab4c232398fe1bdb3c91a7457..17094ad954c9da68bc5e251dce1a87ec67c00146 100644 (file)
@@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
        expire = ULONG_MAX;
        save_commit_buffer = 0;
        check_replace_refs = 0;
+       ref_paranoia = 1;
        init_revisions(&revs, prefix);
 
        argc = parse_options(argc, argv, prefix, options, prune_usage, 0);
index 28fbc7099a84c7406a71128f2a8d3136c145bf57..f2edeb0f4ca2b81246ee37d7ca5f53f58161ad6f 100644 (file)
@@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
                get_non_kept_pack_filenames(&existing_packs);
 
                if (existing_packs.nr && delete_redundant) {
-                       if (unpack_unreachable)
+                       if (unpack_unreachable) {
                                argv_array_pushf(&cmd.args,
                                                "--unpack-unreachable=%s",
                                                unpack_unreachable);
-                       else if (pack_everything & LOOSEN_UNREACHABLE)
+                               argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
+                       } else if (pack_everything & LOOSEN_UNREACHABLE) {
                                argv_array_push(&cmd.args,
                                                "--unpack-unreachable");
+                       } else {
+                               argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
+                       }
                }
        } else {
                argv_array_push(&cmd.args, "--unpacked");
diff --git a/cache.h b/cache.h
index 761c5704b2e21f5dcd68fdcc9c3b1247ab0cfd18..162ea6f24a192a021f4da339349e6d550b54a5dd 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -613,6 +613,14 @@ extern int precomposed_unicode;
 extern int protect_hfs;
 extern int protect_ntfs;
 
+/*
+ * Include broken refs in all ref iterations, which will
+ * generally choke dangerous operations rather than letting
+ * them silently proceed without taking the broken ref into
+ * account.
+ */
+extern int ref_paranoia;
+
 /*
  * The character that begins a commented line in user-editable file
  * that is subject to stripspace.
index 1ade5c9684a9f901db7d0b3fff04d0cad443caf7..a40044c3bf8040a36ddfb5c260b389ec63683abd 100644 (file)
@@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */
 int log_all_ref_updates = -1; /* unspecified */
 int warn_ambiguous_refs = 1;
 int warn_on_object_refname_ambiguity = 1;
+int ref_paranoia = -1;
 int repository_format_version;
 const char *git_commit_encoding;
 const char *git_log_output_encoding;
diff --git a/refs.c b/refs.c
index e23542b3869b38e47f59f102d28648d30d506574..47e4e5380a1e0fc04f8b81837c51c023f35871cf 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -1934,6 +1934,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base,
        data.fn = fn;
        data.cb_data = cb_data;
 
+       if (ref_paranoia < 0)
+               ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
+       if (ref_paranoia)
+               data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
+
        return do_for_each_entry(refs, base, do_one_ref, &data);
 }
 
@@ -2616,68 +2621,10 @@ int pack_refs(unsigned int flags)
        return 0;
 }
 
-/*
- * If entry is no longer needed in packed-refs, add it to the string
- * list pointed to by cb_data.  Reasons for deleting entries:
- *
- * - Entry is broken.
- * - Entry is overridden by a loose ref.
- * - Entry does not point at a valid object.
- *
- * In the first and third cases, also emit an error message because these
- * are indications of repository corruption.
- */
-static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data)
-{
-       struct string_list *refs_to_delete = cb_data;
-
-       if (entry->flag & REF_ISBROKEN) {
-               /* This shouldn't happen to packed refs. */
-               error("%s is broken!", entry->name);
-               string_list_append(refs_to_delete, entry->name);
-               return 0;
-       }
-       if (!has_sha1_file(entry->u.value.sha1)) {
-               unsigned char sha1[20];
-               int flags;
-
-               if (read_ref_full(entry->name, 0, sha1, &flags))
-                       /* We should at least have found the packed ref. */
-                       die("Internal error");
-               if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) {
-                       /*
-                        * This packed reference is overridden by a
-                        * loose reference, so it is OK that its value
-                        * is no longer valid; for example, it might
-                        * refer to an object that has been garbage
-                        * collected.  For this purpose we don't even
-                        * care whether the loose reference itself is
-                        * invalid, broken, symbolic, etc.  Silently
-                        * remove the packed reference.
-                        */
-                       string_list_append(refs_to_delete, entry->name);
-                       return 0;
-               }
-               /*
-                * There is no overriding loose reference, so the fact
-                * that this reference doesn't refer to a valid object
-                * indicates some kind of repository corruption.
-                * Report the problem, then omit the reference from
-                * the output.
-                */
-               error("%s does not point to a valid object!", entry->name);
-               string_list_append(refs_to_delete, entry->name);
-               return 0;
-       }
-
-       return 0;
-}
-
 int repack_without_refs(struct string_list *refnames, struct strbuf *err)
 {
        struct ref_dir *packed;
-       struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
-       struct string_list_item *refname, *ref_to_delete;
+       struct string_list_item *refname;
        int ret, needs_repacking = 0, removed = 0;
 
        assert(err);
@@ -2713,13 +2660,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err)
                return 0;
        }
 
-       /* Remove any other accumulated cruft */
-       do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete);
-       for_each_string_list_item(ref_to_delete, &refs_to_delete) {
-               if (remove_entry(packed, ref_to_delete->string) == -1)
-                       die("internal error");
-       }
-
        /* Write what remains */
        ret = commit_packed_refs();
        if (ret)
diff --git a/t/t5312-prune-corruption.sh b/t/t5312-prune-corruption.sh
new file mode 100755 (executable)
index 0000000..8e98b44
--- /dev/null
@@ -0,0 +1,114 @@
+#!/bin/sh
+
+test_description='
+Test pruning of repositories with minor corruptions. The goal
+here is that we should always be erring on the side of safety. So
+if we see, for example, a ref with a bogus name, it is OK either to
+bail out or to proceed using it as a reachable tip, but it is _not_
+OK to proceed as if it did not exist. Otherwise we might silently
+delete objects that cannot be recovered.
+'
+. ./test-lib.sh
+
+test_expect_success 'disable reflogs' '
+       git config core.logallrefupdates false &&
+       rm -rf .git/logs
+'
+
+test_expect_success 'create history reachable only from a bogus-named ref' '
+       test_tick && git commit --allow-empty -m master &&
+       base=$(git rev-parse HEAD) &&
+       test_tick && git commit --allow-empty -m bogus &&
+       bogus=$(git rev-parse HEAD) &&
+       git cat-file commit $bogus >saved &&
+       echo $bogus >.git/refs/heads/bogus..name &&
+       git reset --hard HEAD^
+'
+
+test_expect_success 'pruning does not drop bogus object' '
+       test_when_finished "git hash-object -w -t commit saved" &&
+       test_might_fail git prune --expire=now &&
+       verbose git cat-file -e $bogus
+'
+
+test_expect_success 'put bogus object into pack' '
+       git tag reachable $bogus &&
+       git repack -ad &&
+       git tag -d reachable &&
+       verbose git cat-file -e $bogus
+'
+
+test_expect_success 'destructive repack keeps packed object' '
+       test_might_fail git repack -Ad --unpack-unreachable=now &&
+       verbose git cat-file -e $bogus &&
+       test_might_fail git repack -ad &&
+       verbose git cat-file -e $bogus
+'
+
+# subsequent tests will have different corruptions
+test_expect_success 'clean up bogus ref' '
+       rm .git/refs/heads/bogus..name
+'
+
+# We create two new objects here, "one" and "two". Our
+# master branch points to "two", which is deleted,
+# corrupting the repository. But we'd like to make sure
+# that the otherwise unreachable "one" is not pruned
+# (since it is the user's best bet for recovering
+# from the corruption).
+#
+# Note that we also point HEAD somewhere besides "two",
+# as we want to make sure we test the case where we
+# pick up the reference to "two" by iterating the refs,
+# not by resolving HEAD.
+test_expect_success 'create history with missing tip commit' '
+       test_tick && git commit --allow-empty -m one &&
+       recoverable=$(git rev-parse HEAD) &&
+       git cat-file commit $recoverable >saved &&
+       test_tick && git commit --allow-empty -m two &&
+       missing=$(git rev-parse HEAD) &&
+       git checkout --detach $base &&
+       rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
+       test_must_fail git cat-file -e $missing
+'
+
+test_expect_success 'pruning with a corrupted tip does not drop history' '
+       test_when_finished "git hash-object -w -t commit saved" &&
+       test_might_fail git prune --expire=now &&
+       verbose git cat-file -e $recoverable
+'
+
+test_expect_success 'pack-refs does not silently delete broken loose ref' '
+       git pack-refs --all --prune &&
+       echo $missing >expect &&
+       git rev-parse refs/heads/master >actual &&
+       test_cmp expect actual
+'
+
+# we do not want to count on running pack-refs to
+# actually pack it, as it is perfectly reasonable to
+# skip processing a broken ref
+test_expect_success 'create packed-refs file with broken ref' '
+       rm -f .git/refs/heads/master &&
+       cat >.git/packed-refs <<-EOF &&
+       $missing refs/heads/master
+       $recoverable refs/heads/other
+       EOF
+       echo $missing >expect &&
+       git rev-parse refs/heads/master >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'pack-refs does not silently delete broken packed ref' '
+       git pack-refs --all --prune &&
+       git rev-parse refs/heads/master >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'pack-refs does not drop broken refs during deletion' '
+       git update-ref -d refs/heads/other &&
+       git rev-parse refs/heads/master >actual &&
+       test_cmp expect actual
+'
+
+test_done