update-index: new options to enable/disable split index mode
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Fri, 13 Jun 2014 12:19:44 +0000 (19:19 +0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 13 Jun 2014 18:49:41 +0000 (11:49 -0700)
If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-update-index.txt
builtin/update-index.c
cache.h
read-cache.c
split-index.c
index d6de4a008ce74b9aa3f83035b4392e8d4505fae7..dfc09d93d85f5ca598447e98327518082c8e1a16 100644 (file)
@@ -161,6 +161,17 @@ may not support it yet.
        Only meaningful with `--stdin` or `--index-info`; paths are
        separated with NUL character instead of LF.
 
+--split-index::
+--no-split-index::
+       Enable or disable split index mode. If enabled, the index is
+       split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
+       Changes are accumulated in $GIT_DIR/index while the shared
+       index file contains all index entries stays unchanged. If
+       split-index mode is already enabled and `--split-index` is
+       given again, all changes in $GIT_DIR/index are pushed back to
+       the shared index file. This mode is designed for very large
+       indexes that take a signficant amount of time to read or write.
+
 \--::
        Do not interpret any more arguments as options.
 
index f7a19c47a6094ccc4fbb8a8b4048b744bc7f12e4..b0503f4e3d80de42b3b31858303a8e0cbda2b9e4 100644 (file)
@@ -13,6 +13,7 @@
 #include "parse-options.h"
 #include "pathspec.h"
 #include "dir.h"
+#include "split-index.h"
 
 /*
  * Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
        char set_executable_bit = 0;
        struct refresh_params refresh_args = {0, &has_errors};
        int lock_error = 0;
+       int split_index = -1;
        struct lock_file *lock_file;
        struct parse_opt_ctx_t ctx;
        int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
                        resolve_undo_clear_callback},
                OPT_INTEGER(0, "index-version", &preferred_index_format,
                        N_("write index in this format")),
+               OPT_BOOL(0, "split-index", &split_index,
+                       N_("enable or disable split index")),
                OPT_END()
        };
 
@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
                strbuf_release(&buf);
        }
 
+       if (split_index > 0) {
+               init_split_index(&the_index);
+               the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+       } else if (!split_index && the_index.split_index) {
+               /*
+                * can't discard_split_index(&the_index); because that
+                * will destroy split_index->base->cache[], which may
+                * be shared with the_index.cache[]. So yeah we're
+                * leaking a bit here.
+                */
+               the_index.split_index = NULL;
+               the_index.cache_changed |= SOMETHING_CHANGED;
+       }
+
        if (active_cache_changed) {
                if (newfd < 0) {
                        if (refresh_args.flags & REFRESH_QUIET)
diff --git a/cache.h b/cache.h
index ddb7cd26d1d787b06f366db5b0c6621d1cee5268..7523c28ec7bdc021abb4c5dfb68c6e7aeedf178a 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CE_ENTRY_ADDED         (1 << 3)
 #define RESOLVE_UNDO_CHANGED   (1 << 4)
 #define CACHE_TREE_CHANGED     (1 << 5)
+#define SPLIT_INDEX_ORDERED    (1 << 6)
 
 struct split_index;
 struct index_state {
index af475dc58cce5acce258be6d8ee3835af88a15b3..8ecb9599e58d3e08e6cf84f3d5403eae59d1e997 100644 (file)
@@ -15,6 +15,7 @@
 #include "strbuf.h"
 #include "varint.h"
 #include "split-index.h"
+#include "sigchain.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
                                               unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
-                CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
+                CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
+                SPLIT_INDEX_ORDERED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
                rollback_lock_file(lockfile);
 }
 
-static int do_write_index(struct index_state *istate, int newfd)
+static int do_write_index(struct index_state *istate, int newfd,
+                         int strip_extensions)
 {
        git_SHA_CTX c;
        struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
        strbuf_release(&previous_name_buf);
 
        /* Write extension data here */
-       if (istate->split_index) {
+       if (!strip_extensions && istate->split_index) {
                struct strbuf sb = STRBUF_INIT;
 
                err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
                if (err)
                        return -1;
        }
-       if (istate->cache_tree) {
+       if (!strip_extensions && istate->cache_tree) {
                struct strbuf sb = STRBUF_INIT;
 
                cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
                if (err)
                        return -1;
        }
-       if (istate->resolve_undo) {
+       if (!strip_extensions && istate->resolve_undo) {
                struct strbuf sb = STRBUF_INIT;
 
                resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
 static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
                                 unsigned flags)
 {
-       int ret = do_write_index(istate, lock->fd);
+       int ret = do_write_index(istate, lock->fd, 0);
        if (ret)
                return ret;
        assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
        return ret;
 }
 
+static char *temporary_sharedindex;
+
+static void remove_temporary_sharedindex(void)
+{
+       if (temporary_sharedindex) {
+               unlink_or_warn(temporary_sharedindex);
+               free(temporary_sharedindex);
+               temporary_sharedindex = NULL;
+       }
+}
+
+static void remove_temporary_sharedindex_on_signal(int signo)
+{
+       remove_temporary_sharedindex();
+       sigchain_pop(signo);
+       raise(signo);
+}
+
+static int write_shared_index(struct index_state *istate)
+{
+       struct split_index *si = istate->split_index;
+       static int installed_handler;
+       int fd, ret;
+
+       temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
+       fd = xmkstemp(temporary_sharedindex);
+       if (!installed_handler) {
+               atexit(remove_temporary_sharedindex);
+               sigchain_push_common(remove_temporary_sharedindex_on_signal);
+       }
+       move_cache_to_base_index(istate);
+       ret = do_write_index(si->base, fd, 1);
+       close(fd);
+       if (ret) {
+               remove_temporary_sharedindex();
+               return ret;
+       }
+       ret = rename(temporary_sharedindex,
+                    git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
+       free(temporary_sharedindex);
+       temporary_sharedindex = NULL;
+       if (!ret)
+               hashcpy(si->base_sha1, si->base->sha1);
+       return ret;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
                       unsigned flags)
 {
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
                return do_write_locked_index(istate, lock, flags);
        }
 
+       if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
+               int ret = write_shared_index(istate);
+               if (ret)
+                       return ret;
+       }
+
        return write_split_index(istate, lock, flags);
 }
 
index ee3246f391a6ea7524e1bc40b535f33fa662bd3c..21485e20665979458fb794ee4c3cc77c27032fa8 100644 (file)
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
                base->cache[i]->index = i + 1;
 }
 
+void move_cache_to_base_index(struct index_state *istate)
+{
+       struct split_index *si = istate->split_index;
+       int i;
+
+       /*
+        * do not delete old si->base, its index entries may be shared
+        * with istate->cache[]. Accept a bit of leaking here because
+        * this code is only used by short-lived update-index.
+        */
+       si->base = xcalloc(1, sizeof(*si->base));
+       si->base->version = istate->version;
+       /* zero timestamp disables racy test in ce_write_index() */
+       si->base->timestamp = istate->timestamp;
+       ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
+       si->base->cache_nr = istate->cache_nr;
+       memcpy(si->base->cache, istate->cache,
+              sizeof(*istate->cache) * istate->cache_nr);
+       mark_base_index_entries(si->base);
+       for (i = 0; i < si->base->cache_nr; i++)
+               si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
+}
+
 static void mark_entry_for_delete(size_t pos, void *data)
 {
        struct index_state *istate = data;