gc --auto: exclude base pack if not enough mem to "repack -ad"
[gitweb.git] / builtin / gc.c
index 77fa720bd0bf374db8a24338bde8686ee0d60377..3c7c93e961160733ba6efe77722ad1ce06d887c0 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include "builtin.h"
+#include "repository.h"
 #include "config.h"
 #include "tempfile.h"
 #include "lockfile.h"
 #include "argv-array.h"
 #include "commit.h"
 #include "packfile.h"
+#include "object-store.h"
+#include "pack.h"
+#include "pack-objects.h"
+#include "blob.h"
+#include "tree.h"
 
 #define FAILED_RUN "failed to run %s"
 
@@ -39,6 +45,8 @@ static timestamp_t gc_log_expire_time;
 static const char *gc_log_expire = "1.day.ago";
 static const char *prune_expire = "2.weeks.ago";
 static const char *prune_worktrees_expire = "3.months.ago";
+static unsigned long big_pack_threshold;
+static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 
 static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
 static struct argv_array reflog = ARGV_ARRAY_INIT;
@@ -126,6 +134,9 @@ static void gc_config(void)
        git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
        git_config_get_expiry("gc.logexpiry", &gc_log_expire);
 
+       git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold);
+       git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size);
+
        git_config(git_default_config, NULL);
 }
 
@@ -164,6 +175,28 @@ static int too_many_loose_objects(void)
        return needed;
 }
 
+static struct packed_git *find_base_packs(struct string_list *packs,
+                                         unsigned long limit)
+{
+       struct packed_git *p, *base = NULL;
+
+       for (p = get_packed_git(the_repository); p; p = p->next) {
+               if (!p->pack_local)
+                       continue;
+               if (limit) {
+                       if (p->pack_size >= limit)
+                               string_list_append(packs, p->pack_name);
+               } else if (!base || base->pack_size < p->pack_size) {
+                       base = p;
+               }
+       }
+
+       if (base)
+               string_list_append(packs, base->pack_name);
+
+       return base;
+}
+
 static int too_many_packs(void)
 {
        struct packed_git *p;
@@ -172,8 +205,7 @@ static int too_many_packs(void)
        if (gc_auto_pack_limit <= 0)
                return 0;
 
-       prepare_packed_git();
-       for (cnt = 0, p = packed_git; p; p = p->next) {
+       for (cnt = 0, p = get_packed_git(the_repository); p; p = p->next) {
                if (!p->pack_local)
                        continue;
                if (p->pack_keep)
@@ -187,7 +219,86 @@ static int too_many_packs(void)
        return gc_auto_pack_limit < cnt;
 }
 
-static void add_repack_all_option(void)
+static uint64_t total_ram(void)
+{
+#if defined(HAVE_SYSINFO)
+       struct sysinfo si;
+
+       if (!sysinfo(&si))
+               return si.totalram;
+#elif defined(HAVE_BSD_SYSCTL) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM))
+       int64_t physical_memory;
+       int mib[2];
+       size_t length;
+
+       mib[0] = CTL_HW;
+# if defined(HW_MEMSIZE)
+       mib[1] = HW_MEMSIZE;
+# else
+       mib[1] = HW_PHYSMEM;
+# endif
+       length = sizeof(int64_t);
+       if (!sysctl(mib, 2, &physical_memory, &length, NULL, 0))
+               return physical_memory;
+#elif defined(GIT_WINDOWS_NATIVE)
+       MEMORYSTATUSEX memInfo;
+
+       memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+       if (GlobalMemoryStatusEx(&memInfo))
+               return memInfo.ullTotalPhys;
+#endif
+       return 0;
+}
+
+static uint64_t estimate_repack_memory(struct packed_git *pack)
+{
+       unsigned long nr_objects = approximate_object_count();
+       size_t os_cache, heap;
+
+       if (!pack || !nr_objects)
+               return 0;
+
+       /*
+        * First we have to scan through at least one pack.
+        * Assume enough room in OS file cache to keep the entire pack
+        * or we may accidentally evict data of other processes from
+        * the cache.
+        */
+       os_cache = pack->pack_size + pack->index_size;
+       /* then pack-objects needs lots more for book keeping */
+       heap = sizeof(struct object_entry) * nr_objects;
+       /*
+        * internal rev-list --all --objects takes up some memory too,
+        * let's say half of it is for blobs
+        */
+       heap += sizeof(struct blob) * nr_objects / 2;
+       /*
+        * and the other half is for trees (commits and tags are
+        * usually insignificant)
+        */
+       heap += sizeof(struct tree) * nr_objects / 2;
+       /* and then obj_hash[], underestimated in fact */
+       heap += sizeof(struct object *) * nr_objects;
+       /* revindex is used also */
+       heap += sizeof(struct revindex_entry) * nr_objects;
+       /*
+        * read_sha1_file() (either at delta calculation phase, or
+        * writing phase) also fills up the delta base cache
+        */
+       heap += delta_base_cache_limit;
+       /* and of course pack-objects has its own delta cache */
+       heap += max_delta_cache_size;
+
+       return os_cache + heap;
+}
+
+static int keep_one_pack(struct string_list_item *item, void *data)
+{
+       argv_array_pushf(&repack, "--keep-pack=%s", basename(item->string));
+       return 0;
+}
+
+static void add_repack_all_option(struct string_list *keep_pack)
 {
        if (prune_expire && !strcmp(prune_expire, "now"))
                argv_array_push(&repack, "-a");
@@ -196,6 +307,9 @@ static void add_repack_all_option(void)
                if (prune_expire)
                        argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
        }
+
+       if (keep_pack)
+               for_each_string_list(keep_pack, keep_one_pack, NULL);
 }
 
 static void add_repack_incremental_option(void)
@@ -218,9 +332,35 @@ static int need_to_gc(void)
         * we run "repack -A -d -l".  Otherwise we tell the caller
         * there is no need.
         */
-       if (too_many_packs())
-               add_repack_all_option();
-       else if (too_many_loose_objects())
+       if (too_many_packs()) {
+               struct string_list keep_pack = STRING_LIST_INIT_NODUP;
+
+               if (big_pack_threshold) {
+                       find_base_packs(&keep_pack, big_pack_threshold);
+                       if (keep_pack.nr >= gc_auto_pack_limit) {
+                               big_pack_threshold = 0;
+                               string_list_clear(&keep_pack, 0);
+                               find_base_packs(&keep_pack, 0);
+                       }
+               } else {
+                       struct packed_git *p = find_base_packs(&keep_pack, 0);
+                       uint64_t mem_have, mem_want;
+
+                       mem_have = total_ram();
+                       mem_want = estimate_repack_memory(p);
+
+                       /*
+                        * Only allow 1/2 of memory for pack-objects, leave
+                        * the rest for the OS and other processes in the
+                        * system.
+                        */
+                       if (!mem_have || mem_want < mem_have / 2)
+                               string_list_clear(&keep_pack, 0);
+               }
+
+               add_repack_all_option(&keep_pack);
+               string_list_clear(&keep_pack, 0);
+       } else if (too_many_loose_objects())
                add_repack_incremental_option();
        else
                return 0;
@@ -353,6 +493,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
        const char *name;
        pid_t pid;
        int daemonized = 0;
+       int keep_base_pack = -1;
 
        struct option builtin_gc_options[] = {
                OPT__QUIET(&quiet, N_("suppress progress reporting")),
@@ -360,8 +501,13 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                        N_("prune unreferenced objects"),
                        PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
                OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
-               OPT_BOOL(0, "auto", &auto_gc, N_("enable auto-gc mode")),
-               OPT_BOOL(0, "force", &force, N_("force running gc even if there may be another gc running")),
+               OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"),
+                          PARSE_OPT_NOCOMPLETE),
+               OPT_BOOL_F(0, "force", &force,
+                          N_("force running gc even if there may be another gc running"),
+                          PARSE_OPT_NOCOMPLETE),
+               OPT_BOOL(0, "keep-largest-pack", &keep_base_pack,
+                        N_("repack all other packs except the largest pack")),
                OPT_END()
        };
 
@@ -427,8 +573,19 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                         */
                        daemonized = !daemonize();
                }
-       } else
-               add_repack_all_option();
+       } else {
+               struct string_list keep_pack = STRING_LIST_INIT_NODUP;
+
+               if (keep_base_pack != -1) {
+                       if (keep_base_pack)
+                               find_base_packs(&keep_pack, 0);
+               } else if (big_pack_threshold) {
+                       find_base_packs(&keep_pack, big_pack_threshold);
+               }
+
+               add_repack_all_option(&keep_pack);
+               string_list_clear(&keep_pack, 0);
+       }
 
        name = lock_repo_for_gc(force, &pid);
        if (name) {
@@ -476,7 +633,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                return error(FAILED_RUN, rerere.argv[0]);
 
        report_garbage = report_pack_garbage;
-       reprepare_packed_git();
+       reprepare_packed_git(the_repository);
        if (pack_garbage.nr > 0)
                clean_pack_garbage();