cat-file: sort and de-dup output of --batch-all-objects
authorJeff King <peff@peff.net>
Mon, 22 Jun 2015 11:06:32 +0000 (07:06 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 26 Jun 2015 16:24:42 +0000 (09:24 -0700)
The sorting we could probably live without, but printing
duplicates is just a hassle for the user, who must then
de-dup themselves (or risk a wrong answer if they are doing
something like counting objects with a particular property).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-cat-file.txt
builtin/cat-file.c
t/t1006-cat-file.sh
index 6831b08efb4ce509258437f7db1f6115f26a4377..3105fc07205ac19805a753603940f9add612d4ae 100644 (file)
@@ -74,8 +74,7 @@ OPTIONS
        requested batch operation on all objects in the repository and
        any alternate object stores (not just reachable objects).
        Requires `--batch` or `--batch-check` be specified. Note that
-       the order of the objects is unspecified, and there may be
-       duplicate entries.
+       the objects are visited in order sorted by their hashes.
 
 --buffer::
        Normally batch output is flushed after each object is output, so
index 95604c4a63b88f7935a8bfb890cbdbee7e396ffe..07baad1e59c1977e5fce04d4c7288f8263947f4e 100644 (file)
@@ -9,6 +9,7 @@
 #include "userdiff.h"
 #include "streaming.h"
 #include "tree-walk.h"
+#include "sha1-array.h"
 
 struct batch_options {
        int enabled;
@@ -324,19 +325,19 @@ struct object_cb_data {
        struct expand_data *expand;
 };
 
-static int batch_object_cb(const unsigned char *sha1,
-                          struct object_cb_data *data)
+static void batch_object_cb(const unsigned char sha1[20], void *vdata)
 {
+       struct object_cb_data *data = vdata;
        hashcpy(data->expand->sha1, sha1);
        batch_object_write(NULL, data->opt, data->expand);
-       return 0;
 }
 
 static int batch_loose_object(const unsigned char *sha1,
                              const char *path,
                              void *data)
 {
-       return batch_object_cb(sha1, data);
+       sha1_array_append(data, sha1);
+       return 0;
 }
 
 static int batch_packed_object(const unsigned char *sha1,
@@ -344,7 +345,8 @@ static int batch_packed_object(const unsigned char *sha1,
                               uint32_t pos,
                               void *data)
 {
-       return batch_object_cb(sha1, data);
+       sha1_array_append(data, sha1);
+       return 0;
 }
 
 static int batch_objects(struct batch_options *opt)
@@ -375,11 +377,17 @@ static int batch_objects(struct batch_options *opt)
                data.info.typep = &data.type;
 
        if (opt->all_objects) {
+               struct sha1_array sa = SHA1_ARRAY_INIT;
                struct object_cb_data cb;
+
+               for_each_loose_object(batch_loose_object, &sa, 0);
+               for_each_packed_object(batch_packed_object, &sa, 0);
+
                cb.opt = opt;
                cb.expand = &data;
-               for_each_loose_object(batch_loose_object, &cb, 0);
-               for_each_packed_object(batch_packed_object, &cb, 0);
+               sha1_array_for_each_unique(&sa, batch_object_cb, &cb);
+
+               sha1_array_clear(&sa);
                return 0;
        }
 
index 2b4220a604e639bd0fab18217b269209ef39271f..4f38078ff36f5a877defc360feaf6fdba804ccc1 100755 (executable)
@@ -548,7 +548,7 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
 '
 
 test_expect_success 'cat-file --batch-all-objects shows all objects' '
-       # make new repos so we now the full set of objects; we will
+       # make new repos so we know the full set of objects; we will
        # also make sure that there are some packed and some loose
        # objects, some referenced and some not, and that there are
        # some available only via alternates.
@@ -569,8 +569,7 @@ test_expect_success 'cat-file --batch-all-objects shows all objects' '
        ) >>expect.unsorted &&
        sort <expect.unsorted >expect &&
        git -C all-two cat-file --batch-all-objects \
-                               --batch-check="%(objectname)" >actual.unsorted &&
-       sort <actual.unsorted >actual &&
+                               --batch-check="%(objectname)" >actual &&
        test_cmp expect actual
 '