Merge branch 'nd/count-garbage'
authorJunio C Hamano <gitster@pobox.com>
Thu, 21 Mar 2013 21:02:34 +0000 (14:02 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 21 Mar 2013 21:02:34 +0000 (14:02 -0700)
"git count-objects -v" did not count leftover temporary packfiles
and other kinds of garbage.

* nd/count-garbage:
count-objects: report how much disk space taken by garbage files
count-objects: report garbage files in pack directory too
sha1_file: reorder code in prepare_packed_git_one()
git-count-objects.txt: describe each line in -v output

Documentation/git-count-objects.txt
builtin/count-objects.c
cache.h
sha1_file.c
t/t5304-prune.sh
index 23c80cea6465d23476935abcfabba8e1deb915ee..da6e72e696d4af013b3c717d31a0e98cd41922d4 100644 (file)
@@ -20,11 +20,23 @@ OPTIONS
 -------
 -v::
 --verbose::
-       In addition to the number of loose objects and disk
-       space consumed, it reports the number of in-pack
-       objects, number of packs, disk space consumed by those packs,
-       and number of objects that can be removed by running
-       `git prune-packed`.
+       Report in more detail:
++
+count: the number of loose objects
++
+size: disk space consumed by loose objects, in KiB
++
+in-pack: the number of in-pack objects
++
+size-pack: disk space consumed by the packs, in KiB
++
+prune-packable: the number of loose objects that are also present in
+the packs. These objects could be pruned using `git prune-packed`.
++
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
++
+size-garbage: disk space consumed by garbage files, in KiB
 
 GIT
 ---
index 9afaa88f776468a0de33dd153eadae7621cf6267..3a01a8d08591d93426d436ce2e1e17fccd3ebf78 100644 (file)
@@ -9,11 +9,22 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+static off_t size_garbage;
+
+static void real_report_garbage(const char *desc, const char *path)
+{
+       struct stat st;
+       if (!stat(path, &st))
+               size_garbage += st.st_size;
+       warning("%s: %s", desc, path);
+       garbage++;
+}
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
                          unsigned long *loose,
                          off_t *loose_size,
-                         unsigned long *packed_loose,
-                         unsigned long *garbage)
+                         unsigned long *packed_loose)
 {
        struct dirent *ent;
        while ((ent = readdir(d)) != NULL) {
@@ -46,9 +57,11 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
                }
                if (bad) {
                        if (verbose) {
-                               error("garbage found: %.*s/%s",
-                                     len + 2, path, ent->d_name);
-                               (*garbage)++;
+                               struct strbuf sb = STRBUF_INIT;
+                               strbuf_addf(&sb, "%.*s/%s",
+                                           len + 2, path, ent->d_name);
+                               report_garbage("garbage found", sb.buf);
+                               strbuf_release(&sb);
                        }
                        continue;
                }
@@ -76,7 +89,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
        const char *objdir = get_object_directory();
        int len = strlen(objdir);
        char *path = xmalloc(len + 50);
-       unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+       unsigned long loose = 0, packed = 0, packed_loose = 0;
        off_t loose_size = 0;
        struct option opts[] = {
                OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +100,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
        /* we do not take arguments other than flags for now */
        if (argc)
                usage_with_options(count_objects_usage, opts);
+       if (verbose)
+               report_garbage = real_report_garbage;
        memcpy(path, objdir, len);
        if (len && objdir[len-1] != '/')
                path[len++] = '/';
@@ -97,7 +112,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
                if (!d)
                        continue;
                count_objects(d, path, len, verbose,
-                             &loose, &loose_size, &packed_loose, &garbage);
+                             &loose, &loose_size, &packed_loose);
                closedir(d);
        }
        if (verbose) {
@@ -122,6 +137,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
                printf("size-pack: %lu\n", (unsigned long) (size_pack / 1024));
                printf("prune-packable: %lu\n", packed_loose);
                printf("garbage: %lu\n", garbage);
+               printf("size-garbage: %lu\n", (unsigned long) (size_garbage / 1024));
        }
        else
                printf("%lu objects, %lu kilobytes\n",
diff --git a/cache.h b/cache.h
index 6818d87fa033182c343164ace22fb21246db6a3b..c56315ccc3605654ff4a08a8b74cc8f25cfb2b41 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -1058,6 +1058,9 @@ extern const char *parse_feature_value(const char *feature_list, const char *fea
 
 extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
 
+/* A hook for count-objects to report invalid files in pack directory */
+extern void (*report_garbage)(const char *desc, const char *path);
+
 extern void prepare_packed_git(void);
 extern void reprepare_packed_git(void);
 extern void install_packed_git(struct packed_git *pack);
index 40b23297b2e1e60a3719e9c67256303e39456604..16967d3b9a86dc481a5161f0a98220e05790ca01 100644 (file)
@@ -21,6 +21,7 @@
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
 #include "streaming.h"
+#include "dir.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1000,6 +1001,63 @@ void install_packed_git(struct packed_git *pack)
        packed_git = pack;
 }
 
+void (*report_garbage)(const char *desc, const char *path);
+
+static void report_helper(const struct string_list *list,
+                         int seen_bits, int first, int last)
+{
+       const char *msg;
+       switch (seen_bits) {
+       case 0:
+               msg = "no corresponding .idx nor .pack";
+               break;
+       case 1:
+               msg = "no corresponding .idx";
+               break;
+       case 2:
+               msg = "no corresponding .pack";
+               break;
+       default:
+               return;
+       }
+       for (; first < last; first++)
+               report_garbage(msg, list->items[first].string);
+}
+
+static void report_pack_garbage(struct string_list *list)
+{
+       int i, baselen = -1, first = 0, seen_bits = 0;
+
+       if (!report_garbage)
+               return;
+
+       sort_string_list(list);
+
+       for (i = 0; i < list->nr; i++) {
+               const char *path = list->items[i].string;
+               if (baselen != -1 &&
+                   strncmp(path, list->items[first].string, baselen)) {
+                       report_helper(list, seen_bits, first, i);
+                       baselen = -1;
+                       seen_bits = 0;
+               }
+               if (baselen == -1) {
+                       const char *dot = strrchr(path, '.');
+                       if (!dot) {
+                               report_garbage("garbage found", path);
+                               continue;
+                       }
+                       baselen = dot - path + 1;
+                       first = i;
+               }
+               if (!strcmp(path + baselen, "pack"))
+                       seen_bits |= 1;
+               else if (!strcmp(path + baselen, "idx"))
+                       seen_bits |= 2;
+       }
+       report_helper(list, seen_bits, first, list->nr);
+}
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
        /* Ensure that this buffer is large enough so that we can
@@ -1009,6 +1067,7 @@ static void prepare_packed_git_one(char *objdir, int local)
        int len;
        DIR *dir;
        struct dirent *de;
+       struct string_list garbage = STRING_LIST_INIT_DUP;
 
        sprintf(path, "%s/pack", objdir);
        len = strlen(path);
@@ -1024,29 +1083,49 @@ static void prepare_packed_git_one(char *objdir, int local)
                int namelen = strlen(de->d_name);
                struct packed_git *p;
 
-               if (!has_extension(de->d_name, ".idx"))
+               if (len + namelen + 1 > sizeof(path)) {
+                       if (report_garbage) {
+                               struct strbuf sb = STRBUF_INIT;
+                               strbuf_addf(&sb, "%.*s/%s", len - 1, path, de->d_name);
+                               report_garbage("path too long", sb.buf);
+                               strbuf_release(&sb);
+                       }
                        continue;
+               }
 
-               if (len + namelen + 1 > sizeof(path))
+               if (is_dot_or_dotdot(de->d_name))
                        continue;
 
-               /* Don't reopen a pack we already have. */
                strcpy(path + len, de->d_name);
-               for (p = packed_git; p; p = p->next) {
-                       if (!memcmp(path, p->pack_name, len + namelen - 4))
-                               break;
+
+               if (has_extension(de->d_name, ".idx")) {
+                       /* Don't reopen a pack we already have. */
+                       for (p = packed_git; p; p = p->next) {
+                               if (!memcmp(path, p->pack_name, len + namelen - 4))
+                                       break;
+                       }
+                       if (p == NULL &&
+                           /*
+                            * See if it really is a valid .idx file with
+                            * corresponding .pack file that we can map.
+                            */
+                           (p = add_packed_git(path, len + namelen, local)) != NULL)
+                               install_packed_git(p);
                }
-               if (p)
-                       continue;
-               /* See if it really is a valid .idx file with corresponding
-                * .pack file that we can map.
-                */
-               p = add_packed_git(path, len + namelen, local);
-               if (!p)
+
+               if (!report_garbage)
                        continue;
-               install_packed_git(p);
+
+               if (has_extension(de->d_name, ".idx") ||
+                   has_extension(de->d_name, ".pack") ||
+                   has_extension(de->d_name, ".keep"))
+                       string_list_append(&garbage, path);
+               else
+                       report_garbage("garbage found", path);
        }
        closedir(dir);
+       report_pack_garbage(&garbage);
+       string_list_clear(&garbage, 0);
 }
 
 static int sort_pack(const void *a_, const void *b_)
index d645328609c9ec63782a0b9f80c31a73ef745802..e4bb3a14570780b41ce4ebea9e47870d4cbcb127 100755 (executable)
@@ -195,4 +195,30 @@ test_expect_success 'gc: prune old objects after local clone' '
        )
 '
 
+test_expect_success 'garbage report in count-objects -v' '
+       : >.git/objects/pack/foo &&
+       : >.git/objects/pack/foo.bar &&
+       : >.git/objects/pack/foo.keep &&
+       : >.git/objects/pack/foo.pack &&
+       : >.git/objects/pack/fake.bar &&
+       : >.git/objects/pack/fake.keep &&
+       : >.git/objects/pack/fake.pack &&
+       : >.git/objects/pack/fake.idx &&
+       : >.git/objects/pack/fake2.keep &&
+       : >.git/objects/pack/fake3.idx &&
+       git count-objects -v 2>stderr &&
+       grep "index file .git/objects/pack/fake.idx is too small" stderr &&
+       grep "^warning:" stderr | sort >actual &&
+       cat >expected <<\EOF &&
+warning: garbage found: .git/objects/pack/fake.bar
+warning: garbage found: .git/objects/pack/foo
+warning: garbage found: .git/objects/pack/foo.bar
+warning: no corresponding .idx nor .pack: .git/objects/pack/fake2.keep
+warning: no corresponding .idx: .git/objects/pack/foo.keep
+warning: no corresponding .idx: .git/objects/pack/foo.pack
+warning: no corresponding .pack: .git/objects/pack/fake3.idx
+EOF
+       test_cmp expected actual
+'
+
 test_done