gc: add --keep-largest-pack option
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sun, 15 Apr 2018 15:36:14 +0000 (17:36 +0200)
committerJunio C Hamano <gitster@pobox.com>
Mon, 16 Apr 2018 04:52:29 +0000 (13:52 +0900)
This adds a new repack mode that combines everything into a secondary
pack, leaving the largest pack alone.

This could help reduce memory pressure. On linux-2.6.git, valgrind
massif reports 1.6GB heap in "pack all" case, and 535MB in "pack
all except the base pack" case. We save roughly 1GB memory by
excluding the base pack.

This should also lower I/O because we don't have to rewrite a giant
pack every time (e.g. for linux-2.6.git that's a 1.4GB pack file)..

PS. The use of string_list here seems overkill, but we'll need it in
the next patch...

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-gc.txt
builtin/gc.c
t/t6500-gc.sh
index 3126e0dd002eca7ac420932bb9d1ace63752e8dc..8f903231da3b6b5d9a28fa3fa60004251370627d 100644 (file)
@@ -9,7 +9,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository
 SYNOPSIS
 --------
 [verse]
-'git gc' [--aggressive] [--auto] [--quiet] [--prune=<date> | --no-prune] [--force]
+'git gc' [--aggressive] [--auto] [--quiet] [--prune=<date> | --no-prune] [--force] [--keep-largest-pack]
 
 DESCRIPTION
 -----------
@@ -84,6 +84,10 @@ be performed as well.
        Force `git gc` to run even if there may be another `git gc`
        instance running on this repository.
 
+--keep-largest-pack::
+       All packs except the largest pack and those marked with a
+       `.keep` files are consolidated into a single pack.
+
 Configuration
 -------------
 
index 3e67124eaaed256f440eea2a08101e87678eee0e..f251662a8f90144b2dce4f873db7994ad21f7248 100644 (file)
@@ -166,6 +166,22 @@ static int too_many_loose_objects(void)
        return needed;
 }
 
+static void find_base_packs(struct string_list *packs)
+{
+       struct packed_git *p, *base = NULL;
+
+       for (p = get_packed_git(the_repository); p; p = p->next) {
+               if (!p->pack_local)
+                       continue;
+               if (!base || base->pack_size < p->pack_size) {
+                       base = p;
+               }
+       }
+
+       if (base)
+               string_list_append(packs, base->pack_name);
+}
+
 static int too_many_packs(void)
 {
        struct packed_git *p;
@@ -188,7 +204,13 @@ static int too_many_packs(void)
        return gc_auto_pack_limit < cnt;
 }
 
-static void add_repack_all_option(void)
+static int keep_one_pack(struct string_list_item *item, void *data)
+{
+       argv_array_pushf(&repack, "--keep-pack=%s", basename(item->string));
+       return 0;
+}
+
+static void add_repack_all_option(struct string_list *keep_pack)
 {
        if (prune_expire && !strcmp(prune_expire, "now"))
                argv_array_push(&repack, "-a");
@@ -197,6 +219,9 @@ static void add_repack_all_option(void)
                if (prune_expire)
                        argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
        }
+
+       if (keep_pack)
+               for_each_string_list(keep_pack, keep_one_pack, NULL);
 }
 
 static void add_repack_incremental_option(void)
@@ -220,7 +245,7 @@ static int need_to_gc(void)
         * there is no need.
         */
        if (too_many_packs())
-               add_repack_all_option();
+               add_repack_all_option(NULL);
        else if (too_many_loose_objects())
                add_repack_incremental_option();
        else
@@ -354,6 +379,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
        const char *name;
        pid_t pid;
        int daemonized = 0;
+       int keep_base_pack = -1;
 
        struct option builtin_gc_options[] = {
                OPT__QUIET(&quiet, N_("suppress progress reporting")),
@@ -366,6 +392,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                OPT_BOOL_F(0, "force", &force,
                           N_("force running gc even if there may be another gc running"),
                           PARSE_OPT_NOCOMPLETE),
+               OPT_BOOL(0, "keep-largest-pack", &keep_base_pack,
+                        N_("repack all other packs except the largest pack")),
                OPT_END()
        };
 
@@ -431,8 +459,17 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                         */
                        daemonized = !daemonize();
                }
-       } else
-               add_repack_all_option();
+       } else {
+               struct string_list keep_pack = STRING_LIST_INIT_NODUP;
+
+               if (keep_base_pack != -1) {
+                       if (keep_base_pack)
+                               find_base_packs(&keep_pack);
+               }
+
+               add_repack_all_option(&keep_pack);
+               string_list_clear(&keep_pack, 0);
+       }
 
        name = lock_repo_for_gc(force, &pid);
        if (name) {
index d5255dd5760389fbbabf1c2ffc787500e4184758..c42f60bc5bb7ce15cc3af3a1cbf5f33de5cc4360 100755 (executable)
@@ -43,6 +43,31 @@ test_expect_success 'gc is not aborted due to a stale symref' '
        )
 '
 
+test_expect_success 'gc --keep-largest-pack' '
+       test_create_repo keep-pack &&
+       (
+               cd keep-pack &&
+               test_commit one &&
+               test_commit two &&
+               test_commit three &&
+               git gc &&
+               ( cd .git/objects/pack && ls *.pack ) >pack-list &&
+               test_line_count = 1 pack-list &&
+               BASE_PACK=.git/objects/pack/pack-*.pack &&
+               test_commit four &&
+               git repack -d &&
+               test_commit five &&
+               git repack -d &&
+               ( cd .git/objects/pack && ls *.pack ) >pack-list &&
+               test_line_count = 3 pack-list &&
+               git gc --keep-largest-pack &&
+               ( cd .git/objects/pack && ls *.pack ) >pack-list &&
+               test_line_count = 2 pack-list &&
+               test_path_is_file $BASE_PACK &&
+               git fsck
+       )
+'
+
 test_expect_success 'auto gc with too many loose objects does not attempt to create bitmaps' '
        test_config gc.auto 3 &&
        test_config gc.autodetach false &&