cat-file: add --buffer option
authorJeff King <peff@peff.net>
Mon, 22 Jun 2015 10:45:17 +0000 (06:45 -0400)
committerJunio C Hamano <gitster@pobox.com>
Mon, 22 Jun 2015 21:55:52 +0000 (14:55 -0700)
We use a direct write() to output the results of --batch and
--batch-check. This is good for processes feeding the input
and reading the output interactively, but it introduces
measurable overhead if you do not want this feature. For
example, on linux.git:

$ git rev-list --objects --all | cut -d' ' -f1 >objects
$ time git cat-file --batch-check='%(objectsize)' \
<objects >/dev/null
real 0m5.440s
user 0m5.060s
sys 0m0.384s

This patch adds an option to use regular stdio buffering:

$ time git cat-file --batch-check='%(objectsize)' \
--buffer <objects >/dev/null
real 0m4.975s
user 0m4.888s
sys 0m0.092s

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-cat-file.txt
builtin/cat-file.c
index 319ab4cb086874da5e2f153d69d8f9af1f18461e..0058bd42aac473e157b4a2a7c22187249f457934 100644 (file)
@@ -69,6 +69,13 @@ OPTIONS
        not be combined with any other options or arguments.  See the
        section `BATCH OUTPUT` below for details.
 
+--buffer::
+       Normally batch output is flushed after each object is output, so
+       that a process can interactively read and write from
+       `cat-file`. With this option, the output uses normal stdio
+       buffering; this is much more efficient when invoking
+       `--batch-check` on a large number of objects.
+
 --allow-unknown-type::
        Allow -s or -t to query broken/corrupt objects of unknown type.
 
index d4101b7adaba612d659f5e90d22db98be7563b24..741e100bda0d578691c42c400d956e81c1be626e 100644 (file)
@@ -14,6 +14,7 @@ struct batch_options {
        int enabled;
        int follow_symlinks;
        int print_contents;
+       int buffer_output;
        const char *format;
 };
 
@@ -211,14 +212,25 @@ static size_t expand_format(struct strbuf *sb, const char *start, void *data)
        return end - start + 1;
 }
 
-static void print_object_or_die(int fd, struct expand_data *data)
+static void batch_write(struct batch_options *opt, const void *data, int len)
+{
+       if (opt->buffer_output) {
+               if (fwrite(data, 1, len, stdout) != len)
+                       die_errno("unable to write to stdout");
+       } else
+               write_or_die(1, data, len);
+}
+
+static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
 {
        const unsigned char *sha1 = data->sha1;
 
        assert(data->info.typep);
 
        if (data->type == OBJ_BLOB) {
-               if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
+               if (opt->buffer_output)
+                       fflush(stdout);
+               if (stream_blob_to_fd(1, sha1, NULL, 0) < 0)
                        die("unable to stream %s to stdout", sha1_to_hex(sha1));
        }
        else {
@@ -234,12 +246,11 @@ static void print_object_or_die(int fd, struct expand_data *data)
                if (data->info.sizep && size != data->size)
                        die("object %s changed size!?", sha1_to_hex(sha1));
 
-               write_or_die(fd, contents, size);
+               batch_write(opt, contents, size);
                free(contents);
        }
 }
 
-
 static int batch_one_object(const char *obj_name, struct batch_options *opt,
                            struct expand_data *data)
 {
@@ -294,12 +305,12 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
 
        strbuf_expand(&buf, opt->format, expand_format, data);
        strbuf_addch(&buf, '\n');
-       write_or_die(1, buf.buf, buf.len);
+       batch_write(opt, buf.buf, buf.len);
        strbuf_release(&buf);
 
        if (opt->print_contents) {
-               print_object_or_die(1, data);
-               write_or_die(1, "\n", 1);
+               print_object_or_die(opt, data);
+               batch_write(opt, "\n", 1);
        }
        return 0;
 }
@@ -415,6 +426,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
                            N_("for blob objects, run textconv on object's content"), 'c'),
                OPT_BOOL(0, "allow-unknown-type", &unknown_type,
                          N_("allow -s and -t to work with broken/corrupt objects")),
+               OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")),
                { OPTION_CALLBACK, 0, "batch", &batch, "format",
                        N_("show info and content of objects fed from the standard input"),
                        PARSE_OPT_OPTARG, batch_option_callback },