Merge branch 'jk/detect-truncated-zlib-input' into maint
authorJunio C Hamano <gitster@pobox.com>
Wed, 21 Nov 2018 13:57:52 +0000 (22:57 +0900)
committerJunio C Hamano <gitster@pobox.com>
Wed, 21 Nov 2018 13:57:52 +0000 (22:57 +0900)
A regression in Git 2.12 era made "git fsck" fall into an infinite
loop while processing truncated loose objects.

* jk/detect-truncated-zlib-input:
cat-file: handle streaming failures consistently
check_stream_sha1(): handle input underflow
t1450: check large blob in trailing-garbage test

builtin/cat-file.c
sha1-file.c
t/t1450-fsck.sh
index 64ec1745ab2c20ef18a9292ef3b3c82efd46de17..0520cecc9a1d963841b384de6173d752e6d78fb7 100644 (file)
@@ -50,6 +50,13 @@ static int filter_object(const char *path, unsigned mode,
        return 0;
 }
 
+static int stream_blob(const struct object_id *oid)
+{
+       if (stream_blob_to_fd(1, oid, NULL, 0))
+               die("unable to stream %s to stdout", oid_to_hex(oid));
+       return 0;
+}
+
 static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
                        int unknown_type)
 {
@@ -131,7 +138,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
                }
 
                if (type == OBJ_BLOB)
-                       return stream_blob_to_fd(1, &oid, NULL, 0);
+                       return stream_blob(&oid);
                buf = read_object_file(&oid, &type, &size);
                if (!buf)
                        die("Cannot read object %s", obj_name);
@@ -154,7 +161,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
                                oidcpy(&blob_oid, &oid);
 
                        if (oid_object_info(the_repository, &blob_oid, NULL) == OBJ_BLOB)
-                               return stream_blob_to_fd(1, &blob_oid, NULL, 0);
+                               return stream_blob(&blob_oid);
                        /*
                         * we attempted to dereference a tag to a blob
                         * and failed; there may be new dereference
@@ -317,8 +324,9 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
                                BUG("invalid cmdmode: %c", opt->cmdmode);
                        batch_write(opt, contents, size);
                        free(contents);
-               } else if (stream_blob_to_fd(1, oid, NULL, 0) < 0)
-                       die("unable to stream %s to stdout", oid_to_hex(oid));
+               } else {
+                       stream_blob(oid);
+               }
        }
        else {
                enum object_type type;
index 2edf4564f6c12f33c59a101470cd2964774fcc34..03b86aec710c3afffcd989bd357d28241a0271a7 100644 (file)
@@ -2191,7 +2191,8 @@ static int check_stream_sha1(git_zstream *stream,
         * see the comment in unpack_sha1_rest for details.
         */
        while (total_read <= size &&
-              (status == Z_OK || status == Z_BUF_ERROR)) {
+              (status == Z_OK ||
+               (status == Z_BUF_ERROR && !stream->avail_out))) {
                stream->next_out = buf;
                stream->avail_out = sizeof(buf);
                if (size - total_read < stream->avail_out)
index 0f2dd26f74b4b2c19474ad563e7c34701838cb46..b5677d26a4268505139ec6cb7d65fe35db7debbe 100755 (executable)
@@ -673,16 +673,35 @@ test_expect_success 'fsck detects trailing loose garbage (commit)' '
        test_i18ngrep "garbage.*$commit" out
 '
 
-test_expect_success 'fsck detects trailing loose garbage (blob)' '
+test_expect_success 'fsck detects trailing loose garbage (large blob)' '
        blob=$(echo trailing | git hash-object -w --stdin) &&
        file=$(sha1_file $blob) &&
        test_when_finished "remove_object $blob" &&
        chmod +w "$file" &&
        echo garbage >>"$file" &&
-       test_must_fail git fsck 2>out &&
+       test_must_fail git -c core.bigfilethreshold=5 fsck 2>out &&
        test_i18ngrep "garbage.*$blob" out
 '
 
+test_expect_success 'fsck detects truncated loose object' '
+       # make it big enough that we know we will truncate in the data
+       # portion, not the header
+       test-tool genrandom truncate 4096 >file &&
+       blob=$(git hash-object -w file) &&
+       file=$(sha1_file $blob) &&
+       test_when_finished "remove_object $blob" &&
+       test_copy_bytes 1024 <"$file" >tmp &&
+       rm "$file" &&
+       mv -f tmp "$file" &&
+
+       # check both regular and streaming code paths
+       test_must_fail git fsck 2>out &&
+       test_i18ngrep corrupt.*$blob out &&
+
+       test_must_fail git -c core.bigfilethreshold=128 fsck 2>out &&
+       test_i18ngrep corrupt.*$blob out
+'
+
 # for each of type, we have one version which is referenced by another object
 # (and so while unreachable, not dangling), and another variant which really is
 # dangling.