Merge branch 'jc/autogc' into js/rebase-i
authorJunio C Hamano <gitster@pobox.com>
Wed, 26 Sep 2007 07:42:12 +0000 (00:42 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 26 Sep 2007 07:42:12 +0000 (00:42 -0700)
* jc/autogc:
git-gc --auto: run "repack -A -d -l" as necessary.
git-gc --auto: restructure the way "repack" command line is built.
git-gc --auto: protect ourselves from accumulated cruft
git-gc --auto: add documentation.
git-gc --auto: move threshold check to need_to_gc() function.
repack -A -d: use --keep-unreachable when repacking
pack-objects --keep-unreachable
Export matches_pack_name() and fix its return value
Invoke "git gc --auto" from commit, merge, am and rebase.
Implement git gc --auto

Conflicts:

builtin-pack-objects.c

Documentation/config.txt
Documentation/git-gc.txt
builtin-gc.c
builtin-pack-objects.c
cache.h
git-am.sh
git-commit.sh
git-merge.sh
git-rebase--interactive.sh
git-repack.sh
sha1_file.c
index 015910f27a450cdaec80f3bfc2679243126736c0..2f04226988cb070a8fede8db757949ea7cab09c7 100644 (file)
@@ -439,6 +439,19 @@ gc.aggressiveWindow::
        algorithm used by 'git gc --aggressive'.  This defaults
        to 10.
 
+gc.auto::
+       When there are approximately more than this many loose
+       objects in the repository, `git gc --auto` will pack them.
+       Some Porcelain commands use this command to perform a
+       light-weight garbage collection from time to time.  Setting
+       this to 0 disables it.
+
+gc.autopacklimit::
+       When there are more than this many packs that are not
+       marked with `*.keep` file in the repository, `git gc
+       --auto` consolidates them into one larger pack.  Setting
+       this to 0 disables this.
+
 gc.packrefs::
        `git gc` does not run `git pack-refs` in a bare repository by
        default so that older dumb-transport clients can still fetch
index c7742ca9630b13d1eeef16d175f8ca840ddff4b0..b9d5660eacee03bde2360b97b80f3378972fe678 100644 (file)
@@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository
 
 SYNOPSIS
 --------
-'git-gc' [--prune] [--aggressive]
+'git-gc' [--prune] [--aggressive] [--auto]
 
 DESCRIPTION
 -----------
@@ -43,6 +43,20 @@ OPTIONS
        persistent, so this option only needs to be used occasionally; every
        few hundred changesets or so.
 
+--auto::
+       With this option, `git gc` checks if there are too many
+       loose objects in the repository and runs
+       gitlink:git-repack[1] with `-d -l` option to pack them.
+       The threshold for loose objects is set with `gc.auto` configuration
+       variable, and can be disabled by setting it to 0.  Some
+       Porcelain commands use this after they perform operation
+       that could create many loose objects automatically.
+       Additionally, when there are too many packs are present,
+       they are consolidated into one larger pack by running
+       the `git-repack` command with `-A` option.  The
+       threshold for number of packs is set with
+       `gc.autopacklimit` configuration variable.
+
 Configuration
 -------------
 
index 939748261041049f31d62935ec08f062bdfa6e79..23ad2b6a21a81f469e813173ba04d626949e972f 100644 (file)
@@ -20,6 +20,8 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]";
 
 static int pack_refs = 1;
 static int aggressive_window = -1;
+static int gc_auto_threshold = 6700;
+static int gc_auto_pack_limit = 20;
 
 #define MAX_ADD 10
 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL};
@@ -41,6 +43,14 @@ static int gc_config(const char *var, const char *value)
                aggressive_window = git_config_int(var, value);
                return 0;
        }
+       if (!strcmp(var, "gc.auto")) {
+               gc_auto_threshold = git_config_int(var, value);
+               return 0;
+       }
+       if (!strcmp(var, "gc.autopacklimit")) {
+               gc_auto_pack_limit = git_config_int(var, value);
+               return 0;
+       }
        return git_default_config(var, value);
 }
 
@@ -57,10 +67,113 @@ static void append_option(const char **cmd, const char *opt, int max_length)
        cmd[i] = NULL;
 }
 
+static int too_many_loose_objects(void)
+{
+       /*
+        * Quickly check if a "gc" is needed, by estimating how
+        * many loose objects there are.  Because SHA-1 is evenly
+        * distributed, we can check only one and get a reasonable
+        * estimate.
+        */
+       char path[PATH_MAX];
+       const char *objdir = get_object_directory();
+       DIR *dir;
+       struct dirent *ent;
+       int auto_threshold;
+       int num_loose = 0;
+       int needed = 0;
+
+       if (gc_auto_threshold <= 0)
+               return 0;
+
+       if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) {
+               warning("insanely long object directory %.*s", 50, objdir);
+               return 0;
+       }
+       dir = opendir(path);
+       if (!dir)
+               return 0;
+
+       auto_threshold = (gc_auto_threshold + 255) / 256;
+       while ((ent = readdir(dir)) != NULL) {
+               if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
+                   ent->d_name[38] != '\0')
+                       continue;
+               if (++num_loose > auto_threshold) {
+                       needed = 1;
+                       break;
+               }
+       }
+       closedir(dir);
+       return needed;
+}
+
+static int too_many_packs(void)
+{
+       struct packed_git *p;
+       int cnt;
+
+       if (gc_auto_pack_limit <= 0)
+               return 0;
+
+       prepare_packed_git();
+       for (cnt = 0, p = packed_git; p; p = p->next) {
+               char path[PATH_MAX];
+               size_t len;
+               int keep;
+
+               if (!p->pack_local)
+                       continue;
+               len = strlen(p->pack_name);
+               if (PATH_MAX <= len + 1)
+                       continue; /* oops, give up */
+               memcpy(path, p->pack_name, len-5);
+               memcpy(path + len - 5, ".keep", 6);
+               keep = access(p->pack_name, F_OK) && (errno == ENOENT);
+               if (keep)
+                       continue;
+               /*
+                * Perhaps check the size of the pack and count only
+                * very small ones here?
+                */
+               cnt++;
+       }
+       return gc_auto_pack_limit <= cnt;
+}
+
+static int need_to_gc(void)
+{
+       int ac = 0;
+
+       /*
+        * Setting gc.auto and gc.autopacklimit to 0 or negative can
+        * disable the automatic gc.
+        */
+       if (gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0)
+               return 0;
+
+       /*
+        * If there are too many loose objects, but not too many
+        * packs, we run "repack -d -l".  If there are too many packs,
+        * we run "repack -A -d -l".  Otherwise we tell the caller
+        * there is no need.
+        */
+       argv_repack[ac++] = "repack";
+       if (too_many_packs())
+               argv_repack[ac++] = "-A";
+       else if (!too_many_loose_objects())
+               return 0;
+       argv_repack[ac++] = "-d";
+       argv_repack[ac++] = "-l";
+       argv_repack[ac++] = NULL;
+       return 1;
+}
+
 int cmd_gc(int argc, const char **argv, const char *prefix)
 {
        int i;
        int prune = 0;
+       int auto_gc = 0;
        char buf[80];
 
        git_config(gc_config);
@@ -82,12 +195,24 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
                        }
                        continue;
                }
-               /* perhaps other parameters later... */
+               if (!strcmp(arg, "--auto")) {
+                       auto_gc = 1;
+                       continue;
+               }
                break;
        }
        if (i != argc)
                usage(builtin_gc_usage);
 
+       if (auto_gc) {
+               /*
+                * Auto-gc should be least intrusive as possible.
+                */
+               prune = 0;
+               if (!need_to_gc())
+                       return 0;
+       }
+
        if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD))
                return error(FAILED_RUN, argv_pack_refs[0]);
 
@@ -103,5 +228,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
        if (run_command_v_opt(argv_rerere, RUN_GIT_CMD))
                return error(FAILED_RUN, argv_rerere[0]);
 
+       if (auto_gc && too_many_loose_objects())
+               warning("There are too many unreachable loose objects; "
+                       "run 'git prune' to remove them.");
+
        return 0;
 }
index a15906bdb2021e68a014344cad4e73e9de3367ca..0be539ed7fd9bf95bb40515b560c7615ed318f37 100644 (file)
@@ -25,7 +25,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\
        [--window=N] [--window-memory=N] [--depth=N] \n\
        [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
        [--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
-       [--stdout | base-name] [<ref-list | <object-list]";
+       [--stdout | base-name] [--keep-unreachable] [<ref-list | <object-list]";
 
 struct object_entry {
        struct pack_idx_entry idx;
@@ -61,7 +61,7 @@ static struct object_entry **written_list;
 static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
 
 static int non_empty;
-static int no_reuse_delta, no_reuse_object;
+static int no_reuse_delta, no_reuse_object, keep_unreachable;
 static int local;
 static int incremental;
 static int allow_ofs_delta;
@@ -1807,15 +1807,19 @@ static void read_object_list_from_stdin(void)
        }
 }
 
+#define OBJECT_ADDED (1u<<20)
+
 static void show_commit(struct commit *commit)
 {
        add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
+       commit->object.flags |= OBJECT_ADDED;
 }
 
 static void show_object(struct object_array_entry *p)
 {
        add_preferred_base_object(p->name);
        add_object_entry(p->item->sha1, p->item->type, p->name, 0);
+       p->item->flags |= OBJECT_ADDED;
 }
 
 static void show_edge(struct commit *commit)
@@ -1823,6 +1827,86 @@ static void show_edge(struct commit *commit)
        add_preferred_base(commit->object.sha1);
 }
 
+struct in_pack_object {
+       off_t offset;
+       struct object *object;
+};
+
+struct in_pack {
+       int alloc;
+       int nr;
+       struct in_pack_object *array;
+};
+
+static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
+{
+       in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p);
+       in_pack->array[in_pack->nr].object = object;
+       in_pack->nr++;
+}
+
+/*
+ * Compare the objects in the offset order, in order to emulate the
+ * "git-rev-list --objects" output that produced the pack originally.
+ */
+static int ofscmp(const void *a_, const void *b_)
+{
+       struct in_pack_object *a = (struct in_pack_object *)a_;
+       struct in_pack_object *b = (struct in_pack_object *)b_;
+
+       if (a->offset < b->offset)
+               return -1;
+       else if (a->offset > b->offset)
+               return 1;
+       else
+               return hashcmp(a->object->sha1, b->object->sha1);
+}
+
+static void add_objects_in_unpacked_packs(struct rev_info *revs)
+{
+       struct packed_git *p;
+       struct in_pack in_pack;
+       uint32_t i;
+
+       memset(&in_pack, 0, sizeof(in_pack));
+
+       for (p = packed_git; p; p = p->next) {
+               const unsigned char *sha1;
+               struct object *o;
+
+               for (i = 0; i < revs->num_ignore_packed; i++) {
+                       if (matches_pack_name(p, revs->ignore_packed[i]))
+                               break;
+               }
+               if (revs->num_ignore_packed <= i)
+                       continue;
+               if (open_pack_index(p))
+                       die("cannot open pack index");
+
+               ALLOC_GROW(in_pack.array,
+                          in_pack.nr + p->num_objects,
+                          in_pack.alloc);
+
+               for (i = 0; i < p->num_objects; i++) {
+                       sha1 = nth_packed_object_sha1(p, i);
+                       o = lookup_unknown_object(sha1);
+                       if (!(o->flags & OBJECT_ADDED))
+                               mark_in_pack_object(o, p, &in_pack);
+                       o->flags |= OBJECT_ADDED;
+               }
+       }
+
+       if (in_pack.nr) {
+               qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
+                     ofscmp);
+               for (i = 0; i < in_pack.nr; i++) {
+                       struct object *o = in_pack.array[i].object;
+                       add_object_entry(o->sha1, o->type, "", 0);
+               }
+       }
+       free(in_pack.array);
+}
+
 static void get_object_list(int ac, const char **av)
 {
        struct rev_info revs;
@@ -1854,6 +1938,9 @@ static void get_object_list(int ac, const char **av)
        prepare_revision_walk(&revs);
        mark_edges_uninteresting(revs.commits, &revs, show_edge);
        traverse_commit_list(&revs, show_commit, show_object);
+
+       if (keep_unreachable)
+               add_objects_in_unpacked_packs(&revs);
 }
 
 static int adjust_perm(const char *path, mode_t mode)
@@ -1983,6 +2070,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                        use_internal_rev_list = 1;
                        continue;
                }
+               if (!strcmp("--keep-unreachable", arg)) {
+                       keep_unreachable = 1;
+                       continue;
+               }
                if (!strcmp("--unpacked", arg) ||
                    !prefixcmp(arg, "--unpacked=") ||
                    !strcmp("--reflog", arg) ||
diff --git a/cache.h b/cache.h
index 824650016677353cfa8c8a140eb3d904f56d60ee..bb86fcce04f79bcd7857e346f44d38d3b7291004 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -530,6 +530,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign
 extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
+extern int matches_pack_name(struct packed_git *p, const char *name);
 
 /* Dumb servers support */
 extern int update_server_info(int);
index 32c46d7ed4b26220f4c9e7fc778bb240c85dae1c..b02ae6a76fa8265da1ea3d720df9dc29cc5d5519 100755 (executable)
--- a/git-am.sh
+++ b/git-am.sh
@@ -464,6 +464,8 @@ do
                "$GIT_DIR"/hooks/post-applypatch
        fi
 
+       git gc --auto
+
        go_next
 done
 
index cb14f0621651d2006b08d1eddf67ab3269df84d0..44ccc4418e9891dc2027c1f6bbd0c692a08f5e4c 100755 (executable)
@@ -611,6 +611,7 @@ git rerere
 
 if test "$ret" = 0
 then
+       git gc --auto
        if test -x "$GIT_DIR"/hooks/post-commit
        then
                "$GIT_DIR"/hooks/post-commit
index 6c513dcbdf44036b0207c276e765a87eceb7aa77..bf18f582da53200fb422bf35c85c1f05c5f7c88d 100755 (executable)
@@ -82,6 +82,7 @@ finish () {
                        ;;
                *)
                        git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1
+                       git gc --auto
                        ;;
                esac
                ;;
index 268a629c434c3cc1bad8a59861f3f093291ec540..8e6e9431e885660cd01ae95c4a13b8bbbf260063 100755 (executable)
@@ -326,6 +326,8 @@ do_next () {
        rm -rf "$DOTEST" &&
        warn "Successfully rebased and updated $HEADNAME."
 
+       git gc --auto
+
        exit
 }
 
index 0aae1a3ed5571a010f80438f8e8a0fc7eb0dc285..e72adc4d91efb8eb6dc96c1f431c8863c408439b 100755 (executable)
@@ -3,17 +3,19 @@
 # Copyright (c) 2005 Linus Torvalds
 #
 
-USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
+USAGE='[-a|-A] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]'
 SUBDIRECTORY_OK='Yes'
 . git-sh-setup
 
-no_update_info= all_into_one= remove_redundant=
+no_update_info= all_into_one= remove_redundant= keep_unreachable=
 local= quiet= no_reuse= extra=
 while test $# != 0
 do
        case "$1" in
        -n)     no_update_info=t ;;
        -a)     all_into_one=t ;;
+       -A)     all_into_one=t
+               keep_unreachable=--keep-unreachable ;;
        -d)     remove_redundant=t ;;
        -q)     quiet=-q ;;
        -f)     no_reuse=--no-reuse-object ;;
@@ -59,7 +61,13 @@ case ",$all_into_one," in
                        fi
                done
        fi
-       [ -z "$args" ] && args='--unpacked --incremental'
+       if test -z "$args"
+       then
+               args='--unpacked --incremental'
+       elif test -n "$keep_unreachable"
+       then
+               args="$args $keep_unreachable"
+       fi
        ;;
 esac
 
index 9978a58da68bbf6f3482545d9f290fbfa3f3fe34..5801c3e71b43d80f7d65b21b100775b23455f533 100644 (file)
@@ -1684,22 +1684,22 @@ off_t find_pack_entry_one(const unsigned char *sha1,
        return 0;
 }
 
-static int matches_pack_name(struct packed_git *p, const char *ig)
+int matches_pack_name(struct packed_git *p, const char *name)
 {
        const char *last_c, *c;
 
-       if (!strcmp(p->pack_name, ig))
-               return 0;
+       if (!strcmp(p->pack_name, name))
+               return 1;
 
        for (c = p->pack_name, last_c = c; *c;)
                if (*c == '/')
                        last_c = ++c;
                else
                        ++c;
-       if (!strcmp(last_c, ig))
-               return 0;
+       if (!strcmp(last_c, name))
+               return 1;
 
-       return 1;
+       return 0;
 }
 
 static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed)
@@ -1717,7 +1717,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons
                if (ignore_packed) {
                        const char **ig;
                        for (ig = ignore_packed; *ig; ig++)
-                               if (!matches_pack_name(p, *ig))
+                               if (matches_pack_name(p, *ig))
                                        break;
                        if (*ig)
                                goto next;