From: Junio C Hamano Date: Tue, 31 Jan 2017 21:15:01 +0000 (-0800) Subject: Merge branch 'jk/fsck-connectivity-check-fix' X-Git-Tag: v2.12.0-rc0~27 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/4ba6197887dc79fdc6275c9f269680ba980c3a15?ds=sidebyside;hp=-c Merge branch 'jk/fsck-connectivity-check-fix' "git fsck --connectivity-check" was not working at all. * jk/fsck-connectivity-check-fix: fsck: lazily load types under --connectivity-only fsck: move typename() printing to its own function t1450: use "mv -f" within loose object directory fsck: check HAS_OBJ more consistently fsck: do not fallback "git fsck " to "git fsck" fsck: tighten error-checks of "git fsck " fsck: prepare dummy objects for --connectivity-check fsck: report trees as dangling t1450: clean up sub-objects in duplicate-entry test --- 4ba6197887dc79fdc6275c9f269680ba980c3a15 diff --combined builtin/fsck.c index 4b91ee95e6,140357b6fc..1a5caccd0f --- a/builtin/fsck.c +++ b/builtin/fsck.c @@@ -56,6 -56,23 +56,23 @@@ static const char *describe_object(stru return buf.buf; } + static const char *printable_type(struct object *obj) + { + const char *ret; + + if (obj->type == OBJ_NONE) { + enum object_type type = sha1_object_info(obj->oid.hash, NULL); + if (type > 0) + object_as_type(obj, type, 0); + } + + ret = typename(obj->type); + if (!ret) + ret = "unknown"; + + return ret; + } + static int fsck_config(const char *var, const char *value, void *cb) { if (strcmp(var, "fsck.skiplist") == 0) { @@@ -83,7 -100,7 +100,7 @@@ static void objreport(struct object *ob const char *err) { fprintf(stderr, "%s in %s %s: %s\n", - msg_type, typename(obj->type), describe_object(obj), err); + msg_type, printable_type(obj), describe_object(obj), err); } static int objerror(struct object *obj, const char *err) @@@ -114,7 -131,7 +131,7 @@@ static int mark_object(struct object *o if (!obj) { /* ... these references to parent->fld are safe here */ printf("broken link from %7s %s\n", - typename(parent->type), describe_object(parent)); + printable_type(parent), describe_object(parent)); printf("broken link from %7s %s\n", (type == OBJ_ANY ? "unknown" : typename(type)), "unknown"); errors_found |= ERROR_REACHABLE; @@@ -131,9 -148,9 +148,9 @@@ if (!(obj->flags & HAS_OBJ)) { if (parent && !has_object_file(&obj->oid)) { printf("broken link from %7s %s\n", - typename(parent->type), describe_object(parent)); + printable_type(parent), describe_object(parent)); printf(" to %7s %s\n", - typename(obj->type), describe_object(obj)); + printable_type(obj), describe_object(obj)); errors_found |= ERROR_REACHABLE; } return 1; @@@ -205,9 -222,7 +222,7 @@@ static void check_reachable_object(stru if (!(obj->flags & HAS_OBJ)) { if (has_sha1_pack(obj->oid.hash)) return; /* it is in pack - forget about it */ - if (connectivity_only && has_object_file(&obj->oid)) - return; - printf("missing %s %s\n", typename(obj->type), + printf("missing %s %s\n", printable_type(obj), describe_object(obj)); errors_found |= ERROR_REACHABLE; return; @@@ -225,7 -240,7 +240,7 @@@ static void check_unreachable_object(st * to complain about it being unreachable (since it does * not exist). */ - if (!obj->parsed) + if (!(obj->flags & HAS_OBJ)) return; /* @@@ -233,7 -248,7 +248,7 @@@ * since this is something that is prunable. */ if (show_unreachable) { - printf("unreachable %s %s\n", typename(obj->type), + printf("unreachable %s %s\n", printable_type(obj), describe_object(obj)); return; } @@@ -252,7 -267,7 +267,7 @@@ */ if (!obj->used) { if (show_dangling) - printf("dangling %s %s\n", typename(obj->type), + printf("dangling %s %s\n", printable_type(obj), describe_object(obj)); if (write_lost_and_found) { char *filename = git_pathdup("lost-found/%s/%s", @@@ -326,7 -341,7 +341,7 @@@ static int fsck_obj(struct object *obj if (verbose) fprintf(stderr, "Checking %s %s\n", - typename(obj->type), describe_object(obj)); + printable_type(obj), describe_object(obj)); if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); @@@ -352,7 -367,7 +367,7 @@@ struct tag *tag = (struct tag *) obj; if (show_tags && tag->tagged) { - printf("tagged %s %s", typename(tag->tagged->type), + printf("tagged %s %s", printable_type(tag->tagged), describe_object(tag->tagged)); printf(" (%s) in %s\n", tag->tag, describe_object(&tag->object)); @@@ -362,6 -377,18 +377,6 @@@ return 0; } -static int fsck_sha1(const unsigned char *sha1) -{ - struct object *obj = parse_object(sha1); - if (!obj) { - errors_found |= ERROR_OBJECT; - return error("%s: object corrupt or missing", - sha1_to_hex(sha1)); - } - obj->flags |= HAS_OBJ; - return fsck_obj(obj); -} - static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, unsigned long size, void *buffer, int *eaten) { @@@ -388,7 -415,7 +403,7 @@@ static void fsck_handle_reflog_sha1(con if (!is_null_sha1(sha1)) { obj = lookup_object(sha1); - if (obj) { + if (obj && (obj->flags & HAS_OBJ)) { if (timestamp && name_objects) add_decoration(fsck_walk_options.object_names, obj, @@@ -476,41 -503,9 +491,41 @@@ static void get_default_heads(void } } +static struct object *parse_loose_object(const unsigned char *sha1, + const char *path) +{ + struct object *obj; + void *contents; + enum object_type type; + unsigned long size; + int eaten; + + if (read_loose_object(path, sha1, &type, &size, &contents) < 0) + return NULL; + + if (!contents && type != OBJ_BLOB) + die("BUG: read_loose_object streamed a non-blob"); + + obj = parse_object_buffer(sha1, type, size, contents, &eaten); + + if (!eaten) + free(contents); + return obj; +} + static int fsck_loose(const unsigned char *sha1, const char *path, void *data) { - if (fsck_sha1(sha1)) + struct object *obj = parse_loose_object(sha1, path); + + if (!obj) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + sha1_to_hex(sha1), path); + return 0; /* keep checking other objects */ + } + + obj->flags = HAS_OBJ; + if (fsck_obj(obj)) errors_found |= ERROR_OBJECT; return 0; } @@@ -604,6 -599,29 +619,29 @@@ static int fsck_cache_tree(struct cache return err; } + static void mark_object_for_connectivity(const unsigned char *sha1) + { + struct object *obj = lookup_unknown_object(sha1); + obj->flags |= HAS_OBJ; + } + + static int mark_loose_for_connectivity(const unsigned char *sha1, + const char *path, + void *data) + { + mark_object_for_connectivity(sha1); + return 0; + } + + static int mark_packed_for_connectivity(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data) + { + mark_object_for_connectivity(sha1); + return 0; + } + static char const * const fsck_usage[] = { N_("git fsck [] [...]"), NULL @@@ -660,38 -678,41 +698,41 @@@ int cmd_fsck(int argc, const char **arg git_config(fsck_config, NULL); fsck_head_link(); - if (!connectivity_only) { + if (connectivity_only) { + for_each_loose_object(mark_loose_for_connectivity, NULL, 0); + for_each_packed_object(mark_packed_for_connectivity, NULL, 0); + } else { fsck_object_dir(get_object_directory()); prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) fsck_object_dir(alt->path); - } - if (check_full) { - struct packed_git *p; - uint32_t total = 0, count = 0; - struct progress *progress = NULL; + if (check_full) { + struct packed_git *p; + uint32_t total = 0, count = 0; + struct progress *progress = NULL; - prepare_packed_git(); + prepare_packed_git(); - if (show_progress) { + if (show_progress) { + for (p = packed_git; p; p = p->next) { + if (open_pack_index(p)) + continue; + total += p->num_objects; + } + + progress = start_progress(_("Checking objects"), total); + } for (p = packed_git; p; p = p->next) { - if (open_pack_index(p)) - continue; - total += p->num_objects; + /* verify gives error messages itself */ + if (verify_pack(p, fsck_obj_buffer, + progress, count)) + errors_found |= ERROR_PACK; + count += p->num_objects; } - - progress = start_progress(_("Checking objects"), total); + stop_progress(&progress); } - for (p = packed_git; p; p = p->next) { - /* verify gives error messages itself */ - if (verify_pack(p, fsck_obj_buffer, - progress, count)) - errors_found |= ERROR_PACK; - count += p->num_objects; - } - stop_progress(&progress); } heads = 0; @@@ -701,9 -722,11 +742,11 @@@ if (!get_sha1(arg, sha1)) { struct object *obj = lookup_object(sha1); - /* Error is printed by lookup_object(). */ - if (!obj) + if (!obj || !(obj->flags & HAS_OBJ)) { + error("%s: object missing", sha1_to_hex(sha1)); + errors_found |= ERROR_OBJECT; continue; + } obj->used = 1; if (name_objects) @@@ -714,6 -737,7 +757,7 @@@ continue; } error("invalid parameter: expected sha1, got '%s'", arg); + errors_found |= ERROR_OBJECT; } /* @@@ -721,7 -745,7 +765,7 @@@ * default ones from .git/refs. We also consider the index file * in this case (ie this implies --cache). */ - if (!heads) { + if (!argc) { get_default_heads(); keep_cache_objects = 1; } diff --combined t/t1450-fsck.sh index 8975b4d1bc,d9cd99f2cb..33a51c9a67 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@@ -43,13 -43,13 +43,13 @@@ test_expect_success 'HEAD is part of re test_expect_success 'setup: helpers for corruption tests' ' sha1_file() { - echo "$*" | sed "s#..#.git/objects/&/#" + remainder=${1#??} && + firsttwo=${1%$remainder} && + echo ".git/objects/$firsttwo/$remainder" } && remove_object() { - file=$(sha1_file "$*") && - test -e "$file" && - rm -f "$file" + rm "$(sha1_file "$1")" } ' @@@ -189,14 -189,16 +189,16 @@@ test_expect_success 'commit with NUL i ' test_expect_success 'tree object with duplicate entries' ' - test_when_finished "remove_object \$T" && + test_when_finished "for i in \$T; do remove_object \$i; done" && T=$( GIT_INDEX_FILE=test-index && export GIT_INDEX_FILE && rm -f test-index && >x && git add x && + git rev-parse :x && T=$(git write-tree) && + echo $T && ( git cat-file tree $T && git cat-file tree $T @@@ -521,9 -523,21 +523,21 @@@ test_expect_success 'fsck --connectivit touch empty && git add empty && test_commit empty && + + # Drop the index now; we want to be sure that we + # recursively notice the broken objects + # because they are reachable from refs, not because + # they are in the index. + rm -f .git/index && + + # corrupt the blob, but in a way that we can still identify + # its type. That lets us see that --connectivity-only is + # not actually looking at the contents, but leaves it + # free to examine the type if it chooses. empty=.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 && - rm -f $empty && - echo invalid >$empty && + blob=$(echo unrelated | git hash-object -w --stdin) && + mv -f $(sha1_file $blob) $empty && + test_must_fail git fsck --strict && git fsck --strict --connectivity-only && tree=$(git rev-parse HEAD:) && @@@ -535,6 -549,26 +549,19 @@@ ) ' + test_expect_success 'fsck --connectivity-only with explicit head' ' + rm -rf connectivity-only && + git init connectivity-only && + ( + cd connectivity-only && + test_commit foo && + rm -f .git/index && + tree=$(git rev-parse HEAD^{tree}) && + remove_object $(git rev-parse HEAD:foo.t) && + test_must_fail git fsck --connectivity-only $tree + ) + ' + -remove_loose_object () { - sha1="$(git rev-parse "$1")" && - remainder=${sha1#??} && - firsttwo=${sha1%$remainder} && - rm .git/objects/$firsttwo/$remainder -} - test_expect_success 'fsck --name-objects' ' rm -rf name-objects && git init name-objects && @@@ -543,80 -577,54 +570,123 @@@ test_commit julius caesar.t && test_commit augustus && test_commit caesar && - remove_loose_object $(git rev-parse julius:caesar.t) && + remove_object $(git rev-parse julius:caesar.t) && test_must_fail git fsck --name-objects >out && tree=$(git rev-parse --verify julius:) && grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out ) ' +test_expect_success 'alternate objects are correctly blamed' ' + test_when_finished "rm -rf alt.git .git/objects/info/alternates" && + git init --bare alt.git && + echo "../../alt.git/objects" >.git/objects/info/alternates && + mkdir alt.git/objects/12 && + >alt.git/objects/12/34567890123456789012345678901234567890 && + test_must_fail git fsck >out 2>&1 && + grep alt.git out +' + +test_expect_success 'fsck errors in packed objects' ' + git cat-file commit HEAD >basis && + sed "s/one && + sed "s/two && + one=$(git hash-object -t commit -w one) && + two=$(git hash-object -t commit -w two) && + pack=$( + { + echo $one && + echo $two + } | git pack-objects .git/objects/pack/pack + ) && + test_when_finished "rm -f .git/objects/pack/pack-$pack.*" && + remove_object $one && + remove_object $two && + test_must_fail git fsck 2>out && + grep "error in commit $one.* - bad name" out && + grep "error in commit $two.* - bad name" out && + ! grep corrupt out +' + +test_expect_success 'fsck finds problems in duplicate loose objects' ' + rm -rf broken-duplicate && + git init broken-duplicate && + ( + cd broken-duplicate && + test_commit duplicate && + # no "-d" here, so we end up with duplicates + git repack && + # now corrupt the loose copy + file=$(sha1_file "$(git rev-parse HEAD)") && + rm "$file" && + echo broken >"$file" && + test_must_fail git fsck + ) +' + +test_expect_success 'fsck detects trailing loose garbage (commit)' ' + git cat-file commit HEAD >basis && + echo bump-commit-sha1 >>basis && + commit=$(git hash-object -w -t commit basis) && + file=$(sha1_file $commit) && + test_when_finished "remove_object $commit" && + chmod +w "$file" && + echo garbage >>"$file" && + test_must_fail git fsck 2>out && + test_i18ngrep "garbage.*$commit" out +' + +test_expect_success 'fsck detects trailing loose garbage (blob)' ' + blob=$(echo trailing | git hash-object -w --stdin) && + file=$(sha1_file $blob) && + test_when_finished "remove_object $blob" && + chmod +w "$file" && + echo garbage >>"$file" && + test_must_fail git fsck 2>out && + test_i18ngrep "garbage.*$blob" out +' + + # for each of type, we have one version which is referenced by another object + # (and so while unreachable, not dangling), and another variant which really is + # dangling. + test_expect_success 'fsck notices dangling objects' ' + git init dangling && + ( + cd dangling && + blob=$(echo not-dangling | git hash-object -w --stdin) && + dblob=$(echo dangling | git hash-object -w --stdin) && + tree=$(printf "100644 blob %s\t%s\n" $blob one | git mktree) && + dtree=$(printf "100644 blob %s\t%s\n" $blob two | git mktree) && + commit=$(git commit-tree $tree) && + dcommit=$(git commit-tree -p $commit $tree) && + + cat >expect <<-EOF && + dangling blob $dblob + dangling commit $dcommit + dangling tree $dtree + EOF + + git fsck >actual && + # the output order is non-deterministic, as it comes from a hash + sort actual.sorted && + test_cmp expect actual.sorted + ) + ' + + test_expect_success 'fsck $name notices bogus $name' ' + test_must_fail git fsck bogus && + test_must_fail git fsck $_z40 + ' + + test_expect_success 'bogus head does not fallback to all heads' ' + # set up a case that will cause a reachability complaint + echo to-be-deleted >foo && + git add foo && + blob=$(git rev-parse :foo) && + test_when_finished "git rm --cached foo" && + remove_object $blob && + test_must_fail git fsck $_z40 >out 2>&1 && + ! grep $blob out + ' + test_done