Merge branch 'jk/submodule-fsck-loose' into maint
authorJunio C Hamano <gitster@pobox.com>
Tue, 22 May 2018 05:26:05 +0000 (14:26 +0900)
committerJunio C Hamano <gitster@pobox.com>
Tue, 22 May 2018 05:26:05 +0000 (14:26 +0900)
* jk/submodule-fsck-loose:
fsck: complain when .gitmodules is a symlink
index-pack: check .gitmodules files with --strict
unpack-objects: call fsck_finish() after fscking objects
fsck: call fsck_finish() after fscking objects
fsck: check .gitmodules content
fsck: handle promisor objects in .gitmodules check
fsck: detect gitmodules files
fsck: actually fsck blob data
fsck: simplify ".git" check
index-pack: make fsck error message more specific

builtin/fsck.c
builtin/index-pack.c
builtin/unpack-objects.c
fsck.c
fsck.h
sha1_file.c
t/lib-pack.sh
t/t7415-submodule-names.sh
index ef78c6c00cbf4401ed672d6ad954bbbc68c9c115..028aba52ebaab764d1f4f91c926e54d66dcaa20f 100644 (file)
@@ -337,7 +337,7 @@ static void check_connectivity(void)
        }
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
        int err;
 
@@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj)
 
        if (fsck_walk(obj, NULL, &fsck_obj_options))
                objerror(obj, "broken links");
-       err = fsck_object(obj, NULL, 0, &fsck_obj_options);
+       err = fsck_object(obj, buffer, size, &fsck_obj_options);
        if (err)
                goto out;
 
@@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type,
        }
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       return fsck_obj(obj);
+       return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -504,44 +504,42 @@ static void get_default_heads(void)
        }
 }
 
-static struct object *parse_loose_object(const struct object_id *oid,
-                                        const char *path)
+static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 {
        struct object *obj;
-       void *contents;
        enum object_type type;
        unsigned long size;
+       void *contents;
        int eaten;
 
-       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0)
-               return NULL;
+       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) {
+               errors_found |= ERROR_OBJECT;
+               error("%s: object corrupt or missing: %s",
+                     oid_to_hex(oid), path);
+               return 0; /* keep checking other objects */
+       }
 
        if (!contents && type != OBJ_BLOB)
-               die("BUG: read_loose_object streamed a non-blob");
+               BUG("read_loose_object streamed a non-blob");
 
        obj = parse_object_buffer(oid, type, size, contents, &eaten);
-
-       if (!eaten)
-               free(contents);
-       return obj;
-}
-
-static int fsck_loose(const struct object_id *oid, const char *path, void *data)
-{
-       struct object *obj = parse_loose_object(oid, path);
-
        if (!obj) {
                errors_found |= ERROR_OBJECT;
-               error("%s: object corrupt or missing: %s",
+               error("%s: object could not be parsed: %s",
                      oid_to_hex(oid), path);
+               if (!eaten)
+                       free(contents);
                return 0; /* keep checking other objects */
        }
 
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       if (fsck_obj(obj))
+       if (fsck_obj(obj, contents, size))
                errors_found |= ERROR_OBJECT;
-       return 0;
+
+       if (!eaten)
+               free(contents);
+       return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
@@ -750,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
                        }
                        stop_progress(&progress);
                }
+
+               if (fsck_finish(&fsck_obj_options))
+                       errors_found |= ERROR_OBJECT;
        }
 
        for (i = 0; i < argc; i++) {
index bda84a92effe41adb1e50a06ff6accac0563d04a..7b2f7c04703f5eff8a1eda06ae59c0d22a8a1ab5 100644 (file)
@@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                                blob->object.flags |= FLAG_CHECKED;
                        else
                                die(_("invalid blob object %s"), oid_to_hex(oid));
+                       if (do_fsck_object &&
+                           fsck_object(&blob->object, (void *)data, size, &fsck_options))
+                               die(_("fsck error in packed object"));
                } else {
                        struct object *obj;
                        int eaten;
@@ -853,7 +856,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                                die(_("invalid %s"), type_name(type));
                        if (do_fsck_object &&
                            fsck_object(obj, buf, size, &fsck_options))
-                               die(_("Error in object"));
+                               die(_("fsck error in packed object"));
                        if (strict && fsck_walk(obj, NULL, &fsck_options))
                                die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
 
@@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
        } else
                chmod(final_index_name, 0444);
 
+       if (do_fsck_object)
+               add_packed_git(final_index_name, strlen(final_index_name), 0);
+
        if (!from_stdin) {
                printf("%s\n", sha1_to_hex(hash));
        } else {
@@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                      pack_hash);
        else
                close(input_fd);
+
+       if (do_fsck_object && fsck_finish(&fsck_options))
+               die(_("fsck error in pack objects"));
+
        free(objects);
        strbuf_release(&index_name_buf);
        if (pack_name == NULL)
index 6620feec68b15573340f4c28fe6be952e3c00e3a..c8f1406d2340c5cf2c1ac775039e32a244fede33 100644 (file)
@@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
        if (!obj_buf)
                die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
        if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
-               die("Error in object");
+               die("fsck error in packed object");
        fsck_options.walk = check_object;
        if (fsck_walk(obj, NULL, &fsck_options))
                die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
@@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
        unpack_all();
        the_hash_algo->update_fn(&ctx, buffer, offset);
        the_hash_algo->final_fn(oid.hash, &ctx);
-       if (strict)
+       if (strict) {
                write_rest();
+               if (fsck_finish(&fsck_options))
+                       die(_("fsck error in pack objects"));
+       }
        if (hashcmp(fill(the_hash_algo->rawsz), oid.hash))
                die("final sha1 did not match");
        use(the_hash_algo->rawsz);
diff --git a/fsck.c b/fsck.c
index 5c8c12dde381912eccf59bbf3ed8c774344809b8..9339f315131786c50c9fa10dd702fe20562e167e 100644 (file)
--- a/fsck.c
+++ b/fsck.c
 #include "utf8.h"
 #include "sha1-array.h"
 #include "decorate.h"
+#include "oidset.h"
+#include "packfile.h"
+#include "submodule-config.h"
+#include "config.h"
+
+static struct oidset gitmodules_found = OIDSET_INIT;
+static struct oidset gitmodules_done = OIDSET_INIT;
 
 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -44,6 +51,7 @@
        FUNC(MISSING_TAG_ENTRY, ERROR) \
        FUNC(MISSING_TAG_OBJECT, ERROR) \
        FUNC(MISSING_TREE, ERROR) \
+       FUNC(MISSING_TREE_OBJECT, ERROR) \
        FUNC(MISSING_TYPE, ERROR) \
        FUNC(MISSING_TYPE_ENTRY, ERROR) \
        FUNC(MULTIPLE_AUTHORS, ERROR) \
        FUNC(TREE_NOT_SORTED, ERROR) \
        FUNC(UNKNOWN_TYPE, ERROR) \
        FUNC(ZERO_PADDED_DATE, ERROR) \
+       FUNC(GITMODULES_MISSING, ERROR) \
+       FUNC(GITMODULES_BLOB, ERROR) \
+       FUNC(GITMODULES_PARSE, ERROR) \
+       FUNC(GITMODULES_NAME, ERROR) \
+       FUNC(GITMODULES_SYMLINK, ERROR) \
        /* warnings */ \
        FUNC(BAD_FILEMODE, WARN) \
        FUNC(EMPTY_NAME, WARN) \
@@ -561,10 +574,18 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
                has_empty_name |= !*name;
                has_dot |= !strcmp(name, ".");
                has_dotdot |= !strcmp(name, "..");
-               has_dotgit |= (!strcmp(name, ".git") ||
-                              is_hfs_dotgit(name) ||
-                              is_ntfs_dotgit(name));
+               has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
                has_zero_pad |= *(char *)desc.buffer == '0';
+
+               if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
+                       if (!S_ISLNK(mode))
+                               oidset_insert(&gitmodules_found, oid);
+                       else
+                               retval += report(options, &item->object,
+                                                FSCK_MSG_GITMODULES_SYMLINK,
+                                                ".gitmodules is a symbolic link");
+               }
+
                if (update_tree_entry_gently(&desc)) {
                        retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
                        break;
@@ -901,6 +922,66 @@ static int fsck_tag(struct tag *tag, const char *data,
        return fsck_tag_buffer(tag, data, size, options);
 }
 
+struct fsck_gitmodules_data {
+       struct object *obj;
+       struct fsck_options *options;
+       int ret;
+};
+
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
+{
+       struct fsck_gitmodules_data *data = vdata;
+       const char *subsection, *key;
+       int subsection_len;
+       char *name;
+
+       if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
+           !subsection)
+               return 0;
+
+       name = xmemdupz(subsection, subsection_len);
+       if (check_submodule_name(name) < 0)
+               data->ret |= report(data->options, data->obj,
+                                   FSCK_MSG_GITMODULES_NAME,
+                                   "disallowed submodule name: %s",
+                                   name);
+       free(name);
+
+       return 0;
+}
+
+static int fsck_blob(struct blob *blob, const char *buf,
+                    unsigned long size, struct fsck_options *options)
+{
+       struct fsck_gitmodules_data data;
+
+       if (!oidset_contains(&gitmodules_found, &blob->object.oid))
+               return 0;
+       oidset_insert(&gitmodules_done, &blob->object.oid);
+
+       if (!buf) {
+               /*
+                * A missing buffer here is a sign that the caller found the
+                * blob too gigantic to load into memory. Let's just consider
+                * that an error.
+                */
+               return report(options, &blob->object,
+                             FSCK_MSG_GITMODULES_PARSE,
+                             ".gitmodules too large to parse");
+       }
+
+       data.obj = &blob->object;
+       data.options = options;
+       data.ret = 0;
+       if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
+                               ".gitmodules", buf, size, &data))
+               data.ret |= report(options, &blob->object,
+                                  FSCK_MSG_GITMODULES_PARSE,
+                                  "could not parse gitmodules blob");
+
+       return data.ret;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
        struct fsck_options *options)
 {
@@ -908,7 +989,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
                return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
        if (obj->type == OBJ_BLOB)
-               return 0;
+               return fsck_blob((struct blob *)obj, data, size, options);
        if (obj->type == OBJ_TREE)
                return fsck_tree((struct tree *) obj, options);
        if (obj->type == OBJ_COMMIT)
@@ -932,3 +1013,52 @@ int fsck_error_function(struct fsck_options *o,
        error("object %s: %s", describe_object(o, obj), message);
        return 1;
 }
+
+int fsck_finish(struct fsck_options *options)
+{
+       int ret = 0;
+       struct oidset_iter iter;
+       const struct object_id *oid;
+
+       oidset_iter_init(&gitmodules_found, &iter);
+       while ((oid = oidset_iter_next(&iter))) {
+               struct blob *blob;
+               enum object_type type;
+               unsigned long size;
+               char *buf;
+
+               if (oidset_contains(&gitmodules_done, oid))
+                       continue;
+
+               blob = lookup_blob(oid);
+               if (!blob) {
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_BLOB,
+                                     "non-blob found at .gitmodules");
+                       continue;
+               }
+
+               buf = read_sha1_file(oid->hash, &type, &size);
+               if (!buf) {
+                       if (is_promisor_object(&blob->object.oid))
+                               continue;
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_MISSING,
+                                     "unable to read .gitmodules blob");
+                       continue;
+               }
+
+               if (type == OBJ_BLOB)
+                       ret |= fsck_blob(blob, buf, size, options);
+               else
+                       ret |= report(options, &blob->object,
+                                     FSCK_MSG_GITMODULES_BLOB,
+                                     "non-blob found at .gitmodules");
+               free(buf);
+       }
+
+
+       oidset_clear(&gitmodules_found);
+       oidset_clear(&gitmodules_done);
+       return ret;
+}
diff --git a/fsck.h b/fsck.h
index 4525510d99abfc0ae523e0d556345db84fc8c302..c3cf5e00347bee21e161a558c6f3b8fbcad0be66 100644 (file)
--- a/fsck.h
+++ b/fsck.h
@@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
 int fsck_object(struct object *obj, void *data, unsigned long size,
        struct fsck_options *options);
 
+/*
+ * Some fsck checks are context-dependent, and may end up queued; run this
+ * after completing all fsck_object() calls in order to resolve any remaining
+ * checks.
+ */
+int fsck_finish(struct fsck_options *options);
+
 #endif
index cc0f43ea849569664d1af9fe05a6b42917990a7f..53f0a3693fef1e03659cf3b41d15246ad419911e 100644 (file)
@@ -2209,7 +2209,7 @@ int read_loose_object(const char *path,
                goto out;
        }
 
-       if (*type == OBJ_BLOB) {
+       if (*type == OBJ_BLOB && *size > big_file_threshold) {
                if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
                        goto out;
        } else {
index 75098465716512a3373e64d51a5cdf848e9f1101..4674899b30871f55894750b61b66020f0d16a646 100644 (file)
@@ -79,6 +79,18 @@ pack_obj () {
                ;;
        esac
 
+       # If it's not a delta, we can convince pack-objects to generate a pack
+       # with just our entry, and then strip off the header (12 bytes) and
+       # trailer (20 bytes).
+       if test -z "$2"
+       then
+               echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
+               size=$(wc -c <pack_obj.tmp) &&
+               dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
+               rm -f pack_obj.tmp
+               return
+       fi
+
        echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
        return 1
 }
index 75fa071c6d042aad43d3255935c1d99324073a31..a770d92a5592e5aab1760d0ff509f5ce3cc8e5d0 100755 (executable)
@@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a
 real-world setup that confirms we catch this in practice.
 '
 . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
 
 test_expect_success 'check names' '
        cat >expect <<-\EOF &&
@@ -73,4 +74,81 @@ test_expect_success 'clone evil superproject' '
        ! grep "RUNNING POST CHECKOUT" output
 '
 
+test_expect_success 'fsck detects evil superproject' '
+       test_must_fail git fsck
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       git -C dst.git config transfer.fsckObjects true &&
+       test_must_fail git push dst.git HEAD
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       git -C dst.git config transfer.fsckObjects true &&
+       git -C dst.git config transfer.unpackLimit 1 &&
+       test_must_fail git push dst.git HEAD
+'
+
+# Normally our packs contain commits followed by trees followed by blobs. This
+# reverses the order, which requires backtracking to find the context of a
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
+test_expect_success 'create oddly ordered pack' '
+       git checkout --orphan odd &&
+       git rm -rf --cached . &&
+       git add .gitmodules &&
+       git commit -m odd &&
+       {
+               pack_header 3 &&
+               pack_obj $(git rev-parse HEAD:.gitmodules) &&
+               pack_obj $(git rev-parse HEAD^{tree}) &&
+               pack_obj $(git rev-parse HEAD)
+       } >odd.pack &&
+       pack_trailer odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       test_must_fail git -C dst.git unpack-objects --strict <odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
+       rm -rf dst.git &&
+       git init --bare dst.git &&
+       test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
+'
+
+test_expect_success 'fsck detects symlinked .gitmodules file' '
+       git init symlink &&
+       (
+               cd symlink &&
+
+               # Make the tree directly to avoid index restrictions.
+               #
+               # Because symlinks store the target as a blob, choose
+               # a pathname that could be parsed as a .gitmodules file
+               # to trick naive non-symlink-aware checking.
+               tricky="[foo]bar=true" &&
+               content=$(git hash-object -w ../.gitmodules) &&
+               target=$(printf "$tricky" | git hash-object -w --stdin) &&
+               tree=$(
+                       {
+                               printf "100644 blob $content\t$tricky\n" &&
+                               printf "120000 blob $target\t.gitmodules\n"
+                       } | git mktree
+               ) &&
+               commit=$(git commit-tree $tree) &&
+
+               # Check not only that we fail, but that it is due to the
+               # symlink detector; this grep string comes from the config
+               # variable name and will not be translated.
+               test_must_fail git fsck 2>output &&
+               grep gitmodulesSymlink output
+       )
+'
+
 test_done