count-objects: report garbage files in pack directory too
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Fri, 15 Feb 2013 12:07:10 +0000 (19:07 +0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 15 Feb 2013 16:13:13 +0000 (08:13 -0800)
prepare_packed_git_one() is modified to allow count-objects to hook a
report function to so we don't need to duplicate the pack searching
logic in count-objects.c. When report_pack_garbage is NULL, the
overhead is insignificant.

The garbage is reported with warning() instead of error() in packed
garbage case because it's not an error to have garbage. Loose garbage
is still reported as errors and will be converted to warnings later.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-count-objects.txt
builtin/count-objects.c
cache.h
sha1_file.c
t/t5304-prune.sh
index e8168230b43a979cbb87c76290903968eabc7843..1611d7c768d84eb34533d41e90e9b2733e0e433e 100644 (file)
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
index 9afaa88f776468a0de33dd153eadae7621cf6267..1706c8bd818e965c21719d6083461e64e3b78377 100644 (file)
@@ -9,6 +9,14 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
+static void real_report_garbage(const char *desc, const char *path)
+{
+       warning("%s: %s", desc, path);
+       garbage++;
+}
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
                          unsigned long *loose,
                          off_t *loose_size,
@@ -76,7 +84,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
        const char *objdir = get_object_directory();
        int len = strlen(objdir);
        char *path = xmalloc(len + 50);
-       unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+       unsigned long loose = 0, packed = 0, packed_loose = 0;
        off_t loose_size = 0;
        struct option opts[] = {
                OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +95,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
        /* we do not take arguments other than flags for now */
        if (argc)
                usage_with_options(count_objects_usage, opts);
+       if (verbose)
+               report_garbage = real_report_garbage;
        memcpy(path, objdir, len);
        if (len && objdir[len-1] != '/')
                path[len++] = '/';
diff --git a/cache.h b/cache.h
index e493563f4c07e6adcd00a1b2476926d69a4a67f8..82af219e3ea905ef48f1ba9f33399eb218eea223 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -1057,6 +1057,9 @@ extern const char *parse_feature_value(const char *feature_list, const char *fea
 
 extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
 
+/* A hook for count-objects to report invalid files in pack directory */
+extern void (*report_garbage)(const char *desc, const char *path);
+
 extern void prepare_packed_git(void);
 extern void reprepare_packed_git(void);
 extern void install_packed_git(struct packed_git *pack);
index 239bee7846a62a28515eb847bae64f3b7b6a1358..16967d3b9a86dc481a5161f0a98220e05790ca01 100644 (file)
@@ -21,6 +21,7 @@
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
 #include "streaming.h"
+#include "dir.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1000,6 +1001,63 @@ void install_packed_git(struct packed_git *pack)
        packed_git = pack;
 }
 
+void (*report_garbage)(const char *desc, const char *path);
+
+static void report_helper(const struct string_list *list,
+                         int seen_bits, int first, int last)
+{
+       const char *msg;
+       switch (seen_bits) {
+       case 0:
+               msg = "no corresponding .idx nor .pack";
+               break;
+       case 1:
+               msg = "no corresponding .idx";
+               break;
+       case 2:
+               msg = "no corresponding .pack";
+               break;
+       default:
+               return;
+       }
+       for (; first < last; first++)
+               report_garbage(msg, list->items[first].string);
+}
+
+static void report_pack_garbage(struct string_list *list)
+{
+       int i, baselen = -1, first = 0, seen_bits = 0;
+
+       if (!report_garbage)
+               return;
+
+       sort_string_list(list);
+
+       for (i = 0; i < list->nr; i++) {
+               const char *path = list->items[i].string;
+               if (baselen != -1 &&
+                   strncmp(path, list->items[first].string, baselen)) {
+                       report_helper(list, seen_bits, first, i);
+                       baselen = -1;
+                       seen_bits = 0;
+               }
+               if (baselen == -1) {
+                       const char *dot = strrchr(path, '.');
+                       if (!dot) {
+                               report_garbage("garbage found", path);
+                               continue;
+                       }
+                       baselen = dot - path + 1;
+                       first = i;
+               }
+               if (!strcmp(path + baselen, "pack"))
+                       seen_bits |= 1;
+               else if (!strcmp(path + baselen, "idx"))
+                       seen_bits |= 2;
+       }
+       report_helper(list, seen_bits, first, list->nr);
+}
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
        /* Ensure that this buffer is large enough so that we can
@@ -1009,6 +1067,7 @@ static void prepare_packed_git_one(char *objdir, int local)
        int len;
        DIR *dir;
        struct dirent *de;
+       struct string_list garbage = STRING_LIST_INIT_DUP;
 
        sprintf(path, "%s/pack", objdir);
        len = strlen(path);
@@ -1024,7 +1083,17 @@ static void prepare_packed_git_one(char *objdir, int local)
                int namelen = strlen(de->d_name);
                struct packed_git *p;
 
-               if (len + namelen + 1 > sizeof(path))
+               if (len + namelen + 1 > sizeof(path)) {
+                       if (report_garbage) {
+                               struct strbuf sb = STRBUF_INIT;
+                               strbuf_addf(&sb, "%.*s/%s", len - 1, path, de->d_name);
+                               report_garbage("path too long", sb.buf);
+                               strbuf_release(&sb);
+                       }
+                       continue;
+               }
+
+               if (is_dot_or_dotdot(de->d_name))
                        continue;
 
                strcpy(path + len, de->d_name);
@@ -1043,8 +1112,20 @@ static void prepare_packed_git_one(char *objdir, int local)
                            (p = add_packed_git(path, len + namelen, local)) != NULL)
                                install_packed_git(p);
                }
+
+               if (!report_garbage)
+                       continue;
+
+               if (has_extension(de->d_name, ".idx") ||
+                   has_extension(de->d_name, ".pack") ||
+                   has_extension(de->d_name, ".keep"))
+                       string_list_append(&garbage, path);
+               else
+                       report_garbage("garbage found", path);
        }
        closedir(dir);
+       report_pack_garbage(&garbage);
+       string_list_clear(&garbage, 0);
 }
 
 static int sort_pack(const void *a_, const void *b_)
index d645328609c9ec63782a0b9f80c31a73ef745802..e4bb3a14570780b41ce4ebea9e47870d4cbcb127 100755 (executable)
@@ -195,4 +195,30 @@ test_expect_success 'gc: prune old objects after local clone' '
        )
 '
 
+test_expect_success 'garbage report in count-objects -v' '
+       : >.git/objects/pack/foo &&
+       : >.git/objects/pack/foo.bar &&
+       : >.git/objects/pack/foo.keep &&
+       : >.git/objects/pack/foo.pack &&
+       : >.git/objects/pack/fake.bar &&
+       : >.git/objects/pack/fake.keep &&
+       : >.git/objects/pack/fake.pack &&
+       : >.git/objects/pack/fake.idx &&
+       : >.git/objects/pack/fake2.keep &&
+       : >.git/objects/pack/fake3.idx &&
+       git count-objects -v 2>stderr &&
+       grep "index file .git/objects/pack/fake.idx is too small" stderr &&
+       grep "^warning:" stderr | sort >actual &&
+       cat >expected <<\EOF &&
+warning: garbage found: .git/objects/pack/fake.bar
+warning: garbage found: .git/objects/pack/foo
+warning: garbage found: .git/objects/pack/foo.bar
+warning: no corresponding .idx nor .pack: .git/objects/pack/fake2.keep
+warning: no corresponding .idx: .git/objects/pack/foo.keep
+warning: no corresponding .idx: .git/objects/pack/foo.pack
+warning: no corresponding .pack: .git/objects/pack/fake3.idx
+EOF
+       test_cmp expected actual
+'
+
 test_done