fsck: actually fsck blob data
authorJeff King <peff@peff.net>
Wed, 2 May 2018 19:44:51 +0000 (15:44 -0400)
committerJeff King <peff@peff.net>
Tue, 22 May 2018 03:55:12 +0000 (23:55 -0400)
Because fscking a blob has always been a noop, we didn't
bother passing around the blob data. In preparation for
content-level checks, let's fix up a few things:

1. The fsck_object() function just returns success for any
blob. Let's a noop fsck_blob(), which we can fill in
with actual logic later.

2. The fsck_loose() function in builtin/fsck.c
just threw away blob content after loading it. Let's
hold onto it until after we've called fsck_object().

The easiest way to do this is to just drop the
parse_loose_object() helper entirely. Incidentally,
this also fixes a memory leak: if we successfully
loaded the object data but did not parse it, we would
have left the function without freeing it.

3. When fsck_loose() loads the object data, it
does so with a custom read_loose_object() helper. This
function streams any blobs, regardless of size, under
the assumption that we're only checking the sha1.

Instead, let's actually load blobs smaller than
big_file_threshold, as the normal object-reading
code-paths would do. This lets us fsck small files, and
a NULL return is an indication that the blob was so big
that it needed to be streamed, and we can pass that
information along to fsck_blob().

Signed-off-by: Jeff King <peff@peff.net>
builtin/fsck.c
fsck.c
sha1_file.c
index ef78c6c00cbf4401ed672d6ad954bbbc68c9c115..f91d5f360dd016505562a9d4380369aa8faee5f5 100644 (file)
@@ -337,7 +337,7 @@ static void check_connectivity(void)
        }
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
        int err;
 
@@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj)
 
        if (fsck_walk(obj, NULL, &fsck_obj_options))
                objerror(obj, "broken links");
-       err = fsck_object(obj, NULL, 0, &fsck_obj_options);
+       err = fsck_object(obj, buffer, size, &fsck_obj_options);
        if (err)
                goto out;
 
@@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type,
        }
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       return fsck_obj(obj);
+       return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -504,44 +504,42 @@ static void get_default_heads(void)
        }
 }
 
-static struct object *parse_loose_object(const struct object_id *oid,
-                                        const char *path)
+static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 {
        struct object *obj;
-       void *contents;
        enum object_type type;
        unsigned long size;
+       void *contents;
        int eaten;
 
-       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0)
-               return NULL;
+       if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) {
+               errors_found |= ERROR_OBJECT;
+               error("%s: object corrupt or missing: %s",
+                     oid_to_hex(oid), path);
+               return 0; /* keep checking other objects */
+       }
 
        if (!contents && type != OBJ_BLOB)
-               die("BUG: read_loose_object streamed a non-blob");
+               BUG("read_loose_object streamed a non-blob");
 
        obj = parse_object_buffer(oid, type, size, contents, &eaten);
-
-       if (!eaten)
-               free(contents);
-       return obj;
-}
-
-static int fsck_loose(const struct object_id *oid, const char *path, void *data)
-{
-       struct object *obj = parse_loose_object(oid, path);
-
        if (!obj) {
                errors_found |= ERROR_OBJECT;
-               error("%s: object corrupt or missing: %s",
+               error("%s: object could not be parsed: %s",
                      oid_to_hex(oid), path);
+               if (!eaten)
+                       free(contents);
                return 0; /* keep checking other objects */
        }
 
        obj->flags &= ~(REACHABLE | SEEN);
        obj->flags |= HAS_OBJ;
-       if (fsck_obj(obj))
+       if (fsck_obj(obj, contents, size))
                errors_found |= ERROR_OBJECT;
-       return 0;
+
+       if (!eaten)
+               free(contents);
+       return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
diff --git a/fsck.c b/fsck.c
index f7be966bdebd732886128d1dad4a4f3cbb538040..76a0ad36db5e27a220d11a37f5af4f98bf1eb3d7 100644 (file)
--- a/fsck.c
+++ b/fsck.c
@@ -899,6 +899,12 @@ static int fsck_tag(struct tag *tag, const char *data,
        return fsck_tag_buffer(tag, data, size, options);
 }
 
+static int fsck_blob(struct blob *blob, const char *buf,
+                    unsigned long size, struct fsck_options *options)
+{
+       return 0;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
        struct fsck_options *options)
 {
@@ -906,7 +912,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
                return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
        if (obj->type == OBJ_BLOB)
-               return 0;
+               return fsck_blob((struct blob *)obj, data, size, options);
        if (obj->type == OBJ_TREE)
                return fsck_tree((struct tree *) obj, options);
        if (obj->type == OBJ_COMMIT)
index cc0f43ea849569664d1af9fe05a6b42917990a7f..53f0a3693fef1e03659cf3b41d15246ad419911e 100644 (file)
@@ -2209,7 +2209,7 @@ int read_loose_object(const char *path,
                goto out;
        }
 
-       if (*type == OBJ_BLOB) {
+       if (*type == OBJ_BLOB && *size > big_file_threshold) {
                if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
                        goto out;
        } else {