Merge branch 'dk/gc-idx-wo-pack'
authorJeff King <peff@peff.net>
Fri, 20 Nov 2015 11:55:34 +0000 (06:55 -0500)
committerJeff King <peff@peff.net>
Fri, 20 Nov 2015 11:55:34 +0000 (06:55 -0500)
Having a leftover .idx file without corresponding .pack file in
the repository hurts performance; "git gc" learned to prune them.

* dk/gc-idx-wo-pack:
gc: remove garbage .idx files from pack dir
t5304: test cleaning pack garbage
prepare_packed_git(): refactor garbage reporting in pack directory

builtin/count-objects.c
builtin/gc.c
cache.h
path.c
sha1_file.c
t/t5304-prune.sh
index ad0c79954aa0dfb8b9918500d08dd841d19d4ab3..ba9291944f7752a0b7c671cd4d63309a15ba5df2 100644 (file)
@@ -15,9 +15,31 @@ static int verbose;
 static unsigned long loose, packed, packed_loose;
 static off_t loose_size;
 
-static void real_report_garbage(const char *desc, const char *path)
+static const char *bits_to_msg(unsigned seen_bits)
+{
+       switch (seen_bits) {
+       case 0:
+               return "no corresponding .idx or .pack";
+       case PACKDIR_FILE_GARBAGE:
+               return "garbage found";
+       case PACKDIR_FILE_PACK:
+               return "no corresponding .idx";
+       case PACKDIR_FILE_IDX:
+               return "no corresponding .pack";
+       case PACKDIR_FILE_PACK|PACKDIR_FILE_IDX:
+       default:
+               return NULL;
+       }
+}
+
+static void real_report_garbage(unsigned seen_bits, const char *path)
 {
        struct stat st;
+       const char *desc = bits_to_msg(seen_bits);
+
+       if (!desc)
+               return;
+
        if (!stat(path, &st))
                size_garbage += st.st_size;
        warning("%s: %s", desc, path);
@@ -27,7 +49,7 @@ static void real_report_garbage(const char *desc, const char *path)
 static void loose_garbage(const char *path)
 {
        if (verbose)
-               report_garbage("garbage found", path);
+               report_garbage(PACKDIR_FILE_GARBAGE, path);
 }
 
 static int count_loose(const unsigned char *sha1, const char *path, void *data)
index df3e454447ea4e4e34c6a7eac7ef54df1d614df6..c583aad6ec2896c8a6ad3b35671e92a3c0478bcd 100644 (file)
@@ -46,6 +46,22 @@ static struct argv_array rerere = ARGV_ARRAY_INIT;
 static struct tempfile pidfile;
 static struct lock_file log_lock;
 
+static struct string_list pack_garbage = STRING_LIST_INIT_DUP;
+
+static void clean_pack_garbage(void)
+{
+       int i;
+       for (i = 0; i < pack_garbage.nr; i++)
+               unlink_or_warn(pack_garbage.items[i].string);
+       string_list_clear(&pack_garbage, 0);
+}
+
+static void report_pack_garbage(unsigned seen_bits, const char *path)
+{
+       if (seen_bits == PACKDIR_FILE_IDX)
+               string_list_append(&pack_garbage, path);
+}
+
 static void git_config_date_string(const char *key, const char **output)
 {
        if (git_config_get_string_const(key, output))
@@ -416,6 +432,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
        if (run_command_v_opt(rerere.argv, RUN_GIT_CMD))
                return error(FAILED_RUN, rerere.argv[0]);
 
+       report_garbage = report_pack_garbage;
+       reprepare_packed_git();
+       if (pack_garbage.nr > 0)
+               clean_pack_garbage();
+
        if (auto_gc && too_many_loose_objects())
                warning(_("There are too many unreachable loose objects; "
                        "run 'git prune' to remove them."));
diff --git a/cache.h b/cache.h
index 3ba0b8f3d7a86eac8839bb175b0291b76013d263..736abc03a4b2bde987077440453e253150a584f7 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -1289,8 +1289,11 @@ struct pack_entry {
 
 extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
 
-/* A hook for count-objects to report invalid files in pack directory */
-extern void (*report_garbage)(const char *desc, const char *path);
+/* A hook to report invalid files in pack directory */
+#define PACKDIR_FILE_PACK 1
+#define PACKDIR_FILE_IDX 2
+#define PACKDIR_FILE_GARBAGE 4
+extern void (*report_garbage)(unsigned seen_bits, const char *path);
 
 extern void prepare_packed_git(void);
 extern void reprepare_packed_git(void);
diff --git a/path.c b/path.c
index c740c4ff9403bc92df4c439839bd5596ac62e48d..f28ace2963bb3f0f76af0e363add22ed73a5a2c9 100644 (file)
--- a/path.c
+++ b/path.c
@@ -363,7 +363,7 @@ void report_linked_checkout_garbage(void)
                strbuf_setlen(&sb, len);
                strbuf_addstr(&sb, path);
                if (file_exists(sb.buf))
-                       report_garbage("unused in linked checkout", sb.buf);
+                       report_garbage(PACKDIR_FILE_GARBAGE, sb.buf);
        }
        strbuf_release(&sb);
 }
index c5b31de9aa579dde37e5345d207995416f261eed..3d56746a9be12e78c2b5262948bd411acf3230a2 100644 (file)
@@ -1217,27 +1217,16 @@ void install_packed_git(struct packed_git *pack)
        packed_git = pack;
 }
 
-void (*report_garbage)(const char *desc, const char *path);
+void (*report_garbage)(unsigned seen_bits, const char *path);
 
 static void report_helper(const struct string_list *list,
                          int seen_bits, int first, int last)
 {
-       const char *msg;
-       switch (seen_bits) {
-       case 0:
-               msg = "no corresponding .idx or .pack";
-               break;
-       case 1:
-               msg = "no corresponding .idx";
-               break;
-       case 2:
-               msg = "no corresponding .pack";
-               break;
-       default:
+       if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
                return;
-       }
+
        for (; first < last; first++)
-               report_garbage(msg, list->items[first].string);
+               report_garbage(seen_bits, list->items[first].string);
 }
 
 static void report_pack_garbage(struct string_list *list)
@@ -1260,7 +1249,7 @@ static void report_pack_garbage(struct string_list *list)
                if (baselen == -1) {
                        const char *dot = strrchr(path, '.');
                        if (!dot) {
-                               report_garbage("garbage found", path);
+                               report_garbage(PACKDIR_FILE_GARBAGE, path);
                                continue;
                        }
                        baselen = dot - path + 1;
@@ -1332,7 +1321,7 @@ static void prepare_packed_git_one(char *objdir, int local)
                    ends_with(de->d_name, ".keep"))
                        string_list_append(&garbage, path.buf);
                else
-                       report_garbage("garbage found", path.buf);
+                       report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
        }
        closedir(dir);
        report_pack_garbage(&garbage);
index 023d7c6f7b6008536cbd83b1873f0b00b9148bbd..def203c7243c099e1359b9a0c44f22a07c06cb37 100755 (executable)
@@ -219,6 +219,7 @@ test_expect_success 'gc: prune old objects after local clone' '
 
 test_expect_success 'garbage report in count-objects -v' '
        test_when_finished "rm -f .git/objects/pack/fake*" &&
+       test_when_finished "rm -f .git/objects/pack/foo*" &&
        : >.git/objects/pack/foo &&
        : >.git/objects/pack/foo.bar &&
        : >.git/objects/pack/foo.keep &&
@@ -244,6 +245,26 @@ EOF
        test_cmp expected actual
 '
 
+test_expect_success 'clean pack garbage with gc' '
+       test_when_finished "rm -f .git/objects/pack/fake*" &&
+       test_when_finished "rm -f .git/objects/pack/foo*" &&
+       : >.git/objects/pack/foo.keep &&
+       : >.git/objects/pack/foo.pack &&
+       : >.git/objects/pack/fake.idx &&
+       : >.git/objects/pack/fake2.keep &&
+       : >.git/objects/pack/fake2.idx &&
+       : >.git/objects/pack/fake3.keep &&
+       git gc &&
+       git count-objects -v 2>stderr &&
+       grep "^warning:" stderr | sort >actual &&
+       cat >expected <<\EOF &&
+warning: no corresponding .idx or .pack: .git/objects/pack/fake3.keep
+warning: no corresponding .idx: .git/objects/pack/foo.keep
+warning: no corresponding .idx: .git/objects/pack/foo.pack
+EOF
+       test_cmp expected actual
+'
+
 test_expect_success 'prune .git/shallow' '
        SHA1=`echo hi|git commit-tree HEAD^{tree}` &&
        echo $SHA1 >.git/shallow &&