Merge branch 'jk/fsck-connectivity-check-fix'
authorJunio C Hamano <gitster@pobox.com>
Tue, 31 Jan 2017 21:15:01 +0000 (13:15 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 31 Jan 2017 21:15:01 +0000 (13:15 -0800)
"git fsck --connectivity-check" was not working at all.

* jk/fsck-connectivity-check-fix:
fsck: lazily load types under --connectivity-only
fsck: move typename() printing to its own function
t1450: use "mv -f" within loose object directory
fsck: check HAS_OBJ more consistently
fsck: do not fallback "git fsck <bogus>" to "git fsck"
fsck: tighten error-checks of "git fsck <head>"
fsck: prepare dummy objects for --connectivity-check
fsck: report trees as dangling
t1450: clean up sub-objects in duplicate-entry test

1  2 
builtin/fsck.c
t/t1450-fsck.sh
diff --combined builtin/fsck.c
index 4b91ee95e66396285d2eb8e67ab597e84d76fab7,140357b6fc9deba63e9aaf64eeae935f942322be..1a5caccd0f5ee3fa56af06e8f536271f58312665
@@@ -56,6 -56,23 +56,23 @@@ static const char *describe_object(stru
        return buf.buf;
  }
  
+ static const char *printable_type(struct object *obj)
+ {
+       const char *ret;
+       if (obj->type == OBJ_NONE) {
+               enum object_type type = sha1_object_info(obj->oid.hash, NULL);
+               if (type > 0)
+                       object_as_type(obj, type, 0);
+       }
+       ret = typename(obj->type);
+       if (!ret)
+               ret = "unknown";
+       return ret;
+ }
  static int fsck_config(const char *var, const char *value, void *cb)
  {
        if (strcmp(var, "fsck.skiplist") == 0) {
@@@ -83,7 -100,7 +100,7 @@@ static void objreport(struct object *ob
                        const char *err)
  {
        fprintf(stderr, "%s in %s %s: %s\n",
-               msg_type, typename(obj->type), describe_object(obj), err);
+               msg_type, printable_type(obj), describe_object(obj), err);
  }
  
  static int objerror(struct object *obj, const char *err)
@@@ -114,7 -131,7 +131,7 @@@ static int mark_object(struct object *o
        if (!obj) {
                /* ... these references to parent->fld are safe here */
                printf("broken link from %7s %s\n",
-                          typename(parent->type), describe_object(parent));
+                          printable_type(parent), describe_object(parent));
                printf("broken link from %7s %s\n",
                           (type == OBJ_ANY ? "unknown" : typename(type)), "unknown");
                errors_found |= ERROR_REACHABLE;
        if (!(obj->flags & HAS_OBJ)) {
                if (parent && !has_object_file(&obj->oid)) {
                        printf("broken link from %7s %s\n",
-                                typename(parent->type), describe_object(parent));
+                                printable_type(parent), describe_object(parent));
                        printf("              to %7s %s\n",
-                                typename(obj->type), describe_object(obj));
+                                printable_type(obj), describe_object(obj));
                        errors_found |= ERROR_REACHABLE;
                }
                return 1;
@@@ -205,9 -222,7 +222,7 @@@ static void check_reachable_object(stru
        if (!(obj->flags & HAS_OBJ)) {
                if (has_sha1_pack(obj->oid.hash))
                        return; /* it is in pack - forget about it */
-               if (connectivity_only && has_object_file(&obj->oid))
-                       return;
-               printf("missing %s %s\n", typename(obj->type),
+               printf("missing %s %s\n", printable_type(obj),
                        describe_object(obj));
                errors_found |= ERROR_REACHABLE;
                return;
@@@ -225,7 -240,7 +240,7 @@@ static void check_unreachable_object(st
         * to complain about it being unreachable (since it does
         * not exist).
         */
-       if (!obj->parsed)
+       if (!(obj->flags & HAS_OBJ))
                return;
  
        /*
         * since this is something that is prunable.
         */
        if (show_unreachable) {
-               printf("unreachable %s %s\n", typename(obj->type),
+               printf("unreachable %s %s\n", printable_type(obj),
                        describe_object(obj));
                return;
        }
         */
        if (!obj->used) {
                if (show_dangling)
-                       printf("dangling %s %s\n", typename(obj->type),
+                       printf("dangling %s %s\n", printable_type(obj),
                               describe_object(obj));
                if (write_lost_and_found) {
                        char *filename = git_pathdup("lost-found/%s/%s",
@@@ -326,7 -341,7 +341,7 @@@ static int fsck_obj(struct object *obj
  
        if (verbose)
                fprintf(stderr, "Checking %s %s\n",
-                       typename(obj->type), describe_object(obj));
+                       printable_type(obj), describe_object(obj));
  
        if (fsck_walk(obj, NULL, &fsck_obj_options))
                objerror(obj, "broken links");
                struct tag *tag = (struct tag *) obj;
  
                if (show_tags && tag->tagged) {
-                       printf("tagged %s %s", typename(tag->tagged->type),
+                       printf("tagged %s %s", printable_type(tag->tagged),
                                describe_object(tag->tagged));
                        printf(" (%s) in %s\n", tag->tag,
                                describe_object(&tag->object));
        return 0;
  }
  
 -static int fsck_sha1(const unsigned char *sha1)
 -{
 -      struct object *obj = parse_object(sha1);
 -      if (!obj) {
 -              errors_found |= ERROR_OBJECT;
 -              return error("%s: object corrupt or missing",
 -                           sha1_to_hex(sha1));
 -      }
 -      obj->flags |= HAS_OBJ;
 -      return fsck_obj(obj);
 -}
 -
  static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
                           unsigned long size, void *buffer, int *eaten)
  {
@@@ -388,7 -415,7 +403,7 @@@ static void fsck_handle_reflog_sha1(con
  
        if (!is_null_sha1(sha1)) {
                obj = lookup_object(sha1);
-               if (obj) {
+               if (obj && (obj->flags & HAS_OBJ)) {
                        if (timestamp && name_objects)
                                add_decoration(fsck_walk_options.object_names,
                                        obj,
@@@ -476,41 -503,9 +491,41 @@@ static void get_default_heads(void
        }
  }
  
 +static struct object *parse_loose_object(const unsigned char *sha1,
 +                                       const char *path)
 +{
 +      struct object *obj;
 +      void *contents;
 +      enum object_type type;
 +      unsigned long size;
 +      int eaten;
 +
 +      if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
 +              return NULL;
 +
 +      if (!contents && type != OBJ_BLOB)
 +              die("BUG: read_loose_object streamed a non-blob");
 +
 +      obj = parse_object_buffer(sha1, type, size, contents, &eaten);
 +
 +      if (!eaten)
 +              free(contents);
 +      return obj;
 +}
 +
  static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
  {
 -      if (fsck_sha1(sha1))
 +      struct object *obj = parse_loose_object(sha1, path);
 +
 +      if (!obj) {
 +              errors_found |= ERROR_OBJECT;
 +              error("%s: object corrupt or missing: %s",
 +                    sha1_to_hex(sha1), path);
 +              return 0; /* keep checking other objects */
 +      }
 +
 +      obj->flags = HAS_OBJ;
 +      if (fsck_obj(obj))
                errors_found |= ERROR_OBJECT;
        return 0;
  }
@@@ -604,6 -599,29 +619,29 @@@ static int fsck_cache_tree(struct cache
        return err;
  }
  
+ static void mark_object_for_connectivity(const unsigned char *sha1)
+ {
+       struct object *obj = lookup_unknown_object(sha1);
+       obj->flags |= HAS_OBJ;
+ }
+ static int mark_loose_for_connectivity(const unsigned char *sha1,
+                                      const char *path,
+                                      void *data)
+ {
+       mark_object_for_connectivity(sha1);
+       return 0;
+ }
+ static int mark_packed_for_connectivity(const unsigned char *sha1,
+                                       struct packed_git *pack,
+                                       uint32_t pos,
+                                       void *data)
+ {
+       mark_object_for_connectivity(sha1);
+       return 0;
+ }
  static char const * const fsck_usage[] = {
        N_("git fsck [<options>] [<object>...]"),
        NULL
@@@ -660,38 -678,41 +698,41 @@@ int cmd_fsck(int argc, const char **arg
        git_config(fsck_config, NULL);
  
        fsck_head_link();
-       if (!connectivity_only) {
+       if (connectivity_only) {
+               for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
+               for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+       } else {
                fsck_object_dir(get_object_directory());
  
                prepare_alt_odb();
                for (alt = alt_odb_list; alt; alt = alt->next)
                        fsck_object_dir(alt->path);
-       }
  
-       if (check_full) {
-               struct packed_git *p;
-               uint32_t total = 0, count = 0;
-               struct progress *progress = NULL;
+               if (check_full) {
+                       struct packed_git *p;
+                       uint32_t total = 0, count = 0;
+                       struct progress *progress = NULL;
  
-               prepare_packed_git();
+                       prepare_packed_git();
  
-               if (show_progress) {
+                       if (show_progress) {
+                               for (p = packed_git; p; p = p->next) {
+                                       if (open_pack_index(p))
+                                               continue;
+                                       total += p->num_objects;
+                               }
+                               progress = start_progress(_("Checking objects"), total);
+                       }
                        for (p = packed_git; p; p = p->next) {
-                               if (open_pack_index(p))
-                                       continue;
-                               total += p->num_objects;
+                               /* verify gives error messages itself */
+                               if (verify_pack(p, fsck_obj_buffer,
+                                               progress, count))
+                                       errors_found |= ERROR_PACK;
+                               count += p->num_objects;
                        }
-                       progress = start_progress(_("Checking objects"), total);
+                       stop_progress(&progress);
                }
-               for (p = packed_git; p; p = p->next) {
-                       /* verify gives error messages itself */
-                       if (verify_pack(p, fsck_obj_buffer,
-                                       progress, count))
-                               errors_found |= ERROR_PACK;
-                       count += p->num_objects;
-               }
-               stop_progress(&progress);
        }
  
        heads = 0;
                if (!get_sha1(arg, sha1)) {
                        struct object *obj = lookup_object(sha1);
  
-                       /* Error is printed by lookup_object(). */
-                       if (!obj)
+                       if (!obj || !(obj->flags & HAS_OBJ)) {
+                               error("%s: object missing", sha1_to_hex(sha1));
+                               errors_found |= ERROR_OBJECT;
                                continue;
+                       }
  
                        obj->used = 1;
                        if (name_objects)
                        continue;
                }
                error("invalid parameter: expected sha1, got '%s'", arg);
+               errors_found |= ERROR_OBJECT;
        }
  
        /*
         * default ones from .git/refs. We also consider the index file
         * in this case (ie this implies --cache).
         */
-       if (!heads) {
+       if (!argc) {
                get_default_heads();
                keep_cache_objects = 1;
        }
diff --combined t/t1450-fsck.sh
index 8975b4d1bcc4675be4d1869783ee4b7cc17c750f,d9cd99f2cb64f751fe8cb49065e0795bb2764ea9..33a51c9a67fe833e31e51099f7568b64be385d07
@@@ -43,13 -43,13 +43,13 @@@ test_expect_success 'HEAD is part of re
  
  test_expect_success 'setup: helpers for corruption tests' '
        sha1_file() {
 -              echo "$*" | sed "s#..#.git/objects/&/#"
 +              remainder=${1#??} &&
 +              firsttwo=${1%$remainder} &&
 +              echo ".git/objects/$firsttwo/$remainder"
        } &&
  
        remove_object() {
 -              file=$(sha1_file "$*") &&
 -              test -e "$file" &&
 -              rm -f "$file"
 +              rm "$(sha1_file "$1")"
        }
  '
  
@@@ -189,14 -189,16 +189,16 @@@ test_expect_success 'commit with NUL i
  '
  
  test_expect_success 'tree object with duplicate entries' '
-       test_when_finished "remove_object \$T" &&
+       test_when_finished "for i in \$T; do remove_object \$i; done" &&
        T=$(
                GIT_INDEX_FILE=test-index &&
                export GIT_INDEX_FILE &&
                rm -f test-index &&
                >x &&
                git add x &&
+               git rev-parse :x &&
                T=$(git write-tree) &&
+               echo $T &&
                (
                        git cat-file tree $T &&
                        git cat-file tree $T
@@@ -521,9 -523,21 +523,21 @@@ test_expect_success 'fsck --connectivit
                touch empty &&
                git add empty &&
                test_commit empty &&
+               # Drop the index now; we want to be sure that we
+               # recursively notice the broken objects
+               # because they are reachable from refs, not because
+               # they are in the index.
+               rm -f .git/index &&
+               # corrupt the blob, but in a way that we can still identify
+               # its type. That lets us see that --connectivity-only is
+               # not actually looking at the contents, but leaves it
+               # free to examine the type if it chooses.
                empty=.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 &&
-               rm -f $empty &&
-               echo invalid >$empty &&
+               blob=$(echo unrelated | git hash-object -w --stdin) &&
+               mv -f $(sha1_file $blob) $empty &&
                test_must_fail git fsck --strict &&
                git fsck --strict --connectivity-only &&
                tree=$(git rev-parse HEAD:) &&
        )
  '
  
 -remove_loose_object () {
 -      sha1="$(git rev-parse "$1")" &&
 -      remainder=${sha1#??} &&
 -      firsttwo=${sha1%$remainder} &&
 -      rm .git/objects/$firsttwo/$remainder
 -}
 -
+ test_expect_success 'fsck --connectivity-only with explicit head' '
+       rm -rf connectivity-only &&
+       git init connectivity-only &&
+       (
+               cd connectivity-only &&
+               test_commit foo &&
+               rm -f .git/index &&
+               tree=$(git rev-parse HEAD^{tree}) &&
+               remove_object $(git rev-parse HEAD:foo.t) &&
+               test_must_fail git fsck --connectivity-only $tree
+       )
+ '
  test_expect_success 'fsck --name-objects' '
        rm -rf name-objects &&
        git init name-objects &&
                test_commit julius caesar.t &&
                test_commit augustus &&
                test_commit caesar &&
 -              remove_loose_object $(git rev-parse julius:caesar.t) &&
 +              remove_object $(git rev-parse julius:caesar.t) &&
                test_must_fail git fsck --name-objects >out &&
                tree=$(git rev-parse --verify julius:) &&
                grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out
        )
  '
  
 +test_expect_success 'alternate objects are correctly blamed' '
 +      test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
 +      git init --bare alt.git &&
 +      echo "../../alt.git/objects" >.git/objects/info/alternates &&
 +      mkdir alt.git/objects/12 &&
 +      >alt.git/objects/12/34567890123456789012345678901234567890 &&
 +      test_must_fail git fsck >out 2>&1 &&
 +      grep alt.git out
 +'
 +
 +test_expect_success 'fsck errors in packed objects' '
 +      git cat-file commit HEAD >basis &&
 +      sed "s/</one/" basis >one &&
 +      sed "s/</foo/" basis >two &&
 +      one=$(git hash-object -t commit -w one) &&
 +      two=$(git hash-object -t commit -w two) &&
 +      pack=$(
 +              {
 +                      echo $one &&
 +                      echo $two
 +              } | git pack-objects .git/objects/pack/pack
 +      ) &&
 +      test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
 +      remove_object $one &&
 +      remove_object $two &&
 +      test_must_fail git fsck 2>out &&
 +      grep "error in commit $one.* - bad name" out &&
 +      grep "error in commit $two.* - bad name" out &&
 +      ! grep corrupt out
 +'
 +
 +test_expect_success 'fsck finds problems in duplicate loose objects' '
 +      rm -rf broken-duplicate &&
 +      git init broken-duplicate &&
 +      (
 +              cd broken-duplicate &&
 +              test_commit duplicate &&
 +              # no "-d" here, so we end up with duplicates
 +              git repack &&
 +              # now corrupt the loose copy
 +              file=$(sha1_file "$(git rev-parse HEAD)") &&
 +              rm "$file" &&
 +              echo broken >"$file" &&
 +              test_must_fail git fsck
 +      )
 +'
 +
 +test_expect_success 'fsck detects trailing loose garbage (commit)' '
 +      git cat-file commit HEAD >basis &&
 +      echo bump-commit-sha1 >>basis &&
 +      commit=$(git hash-object -w -t commit basis) &&
 +      file=$(sha1_file $commit) &&
 +      test_when_finished "remove_object $commit" &&
 +      chmod +w "$file" &&
 +      echo garbage >>"$file" &&
 +      test_must_fail git fsck 2>out &&
 +      test_i18ngrep "garbage.*$commit" out
 +'
 +
 +test_expect_success 'fsck detects trailing loose garbage (blob)' '
 +      blob=$(echo trailing | git hash-object -w --stdin) &&
 +      file=$(sha1_file $blob) &&
 +      test_when_finished "remove_object $blob" &&
 +      chmod +w "$file" &&
 +      echo garbage >>"$file" &&
 +      test_must_fail git fsck 2>out &&
 +      test_i18ngrep "garbage.*$blob" out
 +'
 +
+ # for each of type, we have one version which is referenced by another object
+ # (and so while unreachable, not dangling), and another variant which really is
+ # dangling.
+ test_expect_success 'fsck notices dangling objects' '
+       git init dangling &&
+       (
+               cd dangling &&
+               blob=$(echo not-dangling | git hash-object -w --stdin) &&
+               dblob=$(echo dangling | git hash-object -w --stdin) &&
+               tree=$(printf "100644 blob %s\t%s\n" $blob one | git mktree) &&
+               dtree=$(printf "100644 blob %s\t%s\n" $blob two | git mktree) &&
+               commit=$(git commit-tree $tree) &&
+               dcommit=$(git commit-tree -p $commit $tree) &&
+               cat >expect <<-EOF &&
+               dangling blob $dblob
+               dangling commit $dcommit
+               dangling tree $dtree
+               EOF
+               git fsck >actual &&
+               # the output order is non-deterministic, as it comes from a hash
+               sort <actual >actual.sorted &&
+               test_cmp expect actual.sorted
+       )
+ '
+ test_expect_success 'fsck $name notices bogus $name' '
+       test_must_fail git fsck bogus &&
+       test_must_fail git fsck $_z40
+ '
+ test_expect_success 'bogus head does not fallback to all heads' '
+       # set up a case that will cause a reachability complaint
+       echo to-be-deleted >foo &&
+       git add foo &&
+       blob=$(git rev-parse :foo) &&
+       test_when_finished "git rm --cached foo" &&
+       remove_object $blob &&
+       test_must_fail git fsck $_z40 >out 2>&1 &&
+       ! grep $blob out
+ '
  test_done