fast-export: do not load blob objects twice
authorJeff King <peff@peff.net>
Sun, 17 Mar 2013 08:38:57 +0000 (04:38 -0400)
committerJunio C Hamano <gitster@pobox.com>
Sun, 17 Mar 2013 22:28:15 +0000 (15:28 -0700)
When fast-export wants to export a blob object, it first
calls parse_object to get a "struct object" and check
whether we have already shown the object. If we haven't
shown it, we then use read_sha1_file to pull it from disk
and write it out.

That means we load each blob from disk twice: once for
parse_object to find its type and check its sha1, and a
second time when we actually output it. We can drop this to
a single load by using lookup_object to check the SHOWN
flag, and then checking the signature on and outputting a
single buffer.

This provides modest speedups on git.git (best-of-five, "git
fast-export HEAD >/dev/null"):

[before] [after]
real 0m14.347s real 0m13.780s
user 0m14.084s user 0m13.620s
sys 0m0.208s sys 0m0.100s

and somewhat more on more blob-heavy repos (this is a
repository full of media files):

[before] [after]
real 0m52.236s real 0m44.451s
user 0m50.568s user 0m43.000s
sys 0m1.536s sys 0m1.284s

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/fast-export.c
index 9e8992531242fda31c5f19e34680667c20731ffd..ed2916595fc386fe4127dc3fe9f68b85d15b3def 100644 (file)
@@ -119,6 +119,7 @@ static void export_blob(const unsigned char *sha1)
        enum object_type type;
        char *buf;
        struct object *object;
+       int eaten;
 
        if (no_data)
                return;
@@ -126,16 +127,18 @@ static void export_blob(const unsigned char *sha1)
        if (is_null_sha1(sha1))
                return;
 
-       object = parse_object(sha1);
-       if (!object)
-               die ("Could not read blob %s", sha1_to_hex(sha1));
-
-       if (object->flags & SHOWN)
+       object = lookup_object(sha1);
+       if (object && object->flags & SHOWN)
                return;
 
        buf = read_sha1_file(sha1, &type, &size);
        if (!buf)
                die ("Could not read blob %s", sha1_to_hex(sha1));
+       if (check_sha1_signature(sha1, buf, size, typename(type)) < 0)
+               die("sha1 mismatch in blob %s", sha1_to_hex(sha1));
+       object = parse_object_buffer(sha1, type, size, buf, &eaten);
+       if (!object)
+               die("Could not read blob %s", sha1_to_hex(sha1));
 
        mark_next_object(object);
 
@@ -147,7 +150,8 @@ static void export_blob(const unsigned char *sha1)
        show_progress();
 
        object->flags |= SHOWN;
-       free(buf);
+       if (!eaten)
+               free(buf);
 }
 
 static int depth_first(const void *a_, const void *b_)