The current index file for the repository.  It is
        usually not found in a bare repository.
 
+sharedindex.<SHA-1>::
+       The shared index part, to be referenced by $GIT_DIR/index and
+       other temporary index files. Only valid in split index mode.
+
 info::
        Additional information about the repository is recorded
        in this directory.
 
   (Version 4) In version 4, the padding after the pathname does not
   exist.
 
+  Interpretation of index entries in split index mode is completely
+  different. See below for details.
+
 == Extensions
 
 === Cached tree
   - At most three 160-bit object names of the entry in stages from 1 to 3
     (nothing is written for a missing stage).
 
+=== Split index
+
+  In split index mode, the majority of index entries could be stored
+  in a separate file. This extension records the changes to be made on
+  top of that to produce the final index.
+
+  The signature for this extension is { 'l', 'i, 'n', 'k' }.
+
+  The extension consists of:
+
+  - 160-bit SHA-1 of the shared index file. The shared index file path
+    is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the
+    index does not require a shared index file.
+
+  - An ewah-encoded delete bitmap, each bit represents an entry in the
+    shared index. If a bit is set, its corresponding entry in the
+    shared index will be removed from the final index.  Note, because
+    a delete operation changes index entry positions, but we do need
+    original positions in replace phase, it's best to just mark
+    entries for removal, then do a mass deletion after replacement.
+
+  - An ewah-encoded replace bitmap, each bit represents an entry in
+    the shared index. If a bit is set, its corresponding entry in the
+    shared index will be replaced with an entry in this index
+    file. All replaced entries are stored in sorted order in this
+    index. The first "1" bit in the replace bitmap corresponds to the
+    first index entry, the second "1" bit to the second entry and so
+    on. Replaced entries may have empty path names to save space.
+
+  The remaining index entries after replaced ones will be added to the
+  final index. These added entries are also sorted by entry namme then
+  stage.
 
 LIB_OBJS += shallow.o
 LIB_OBJS += sideband.o
 LIB_OBJS += sigchain.o
+LIB_OBJS += split-index.o
 LIB_OBJS += strbuf.o
 LIB_OBJS += streaming.o
 LIB_OBJS += string-list.o
 
        unsigned int ce_mode;
        unsigned int ce_flags;
        unsigned int ce_namelen;
+       unsigned int index;     /* for link extension */
        unsigned char sha1[20];
        char name[FLEX_ARRAY]; /* more */
 };
 #define RESOLVE_UNDO_CHANGED   (1 << 4)
 #define CACHE_TREE_CHANGED     (1 << 5)
 
+struct split_index;
 struct index_state {
        struct cache_entry **cache;
        unsigned int version;
        unsigned int cache_nr, cache_alloc, cache_changed;
        struct string_list *resolve_undo;
        struct cache_tree *cache_tree;
+       struct split_index *split_index;
        struct cache_time timestamp;
        unsigned name_hash_initialized : 1,
                 initialized : 1;
 
 #include "resolve-undo.h"
 #include "strbuf.h"
 #include "varint.h"
+#include "split-index.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
                                               unsigned int options);
 #define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
 #define CACHE_EXT_TREE 0x54524545      /* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
+#define CACHE_EXT_LINK 0x6c696e6b        /* "link" */
+
+/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
+#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
        copy_cache_entry(new, old);
        new->ce_flags &= ~CE_HASHED;
        new->ce_namelen = namelen;
+       new->index = 0;
        memcpy(new->name, new_name, namelen + 1);
 
        cache_tree_invalidate_path(istate, old->name);
        case CACHE_EXT_RESOLVE_UNDO:
                istate->resolve_undo = resolve_undo_read(data, sz);
                break;
+       case CACHE_EXT_LINK:
+               if (read_link_extension(istate, data, sz))
+                       return -1;
+               break;
        default:
                if (*ext < 'A' || 'Z' < *ext)
                        return error("index uses %.4s extension, which we do not understand",
        ce->ce_stat_data.sd_size  = get_be32(&ondisk->size);
        ce->ce_flags = flags & ~CE_NAMEMASK;
        ce->ce_namelen = len;
+       ce->index = 0;
        hashcpy(ce->sha1, ondisk->sha1);
        memcpy(ce->name, name, len);
        ce->name[len] = '\0';
 }
 
 /* remember to discard_cache() before reading a different cache! */
-int read_index_from(struct index_state *istate, const char *path)
+static int do_read_index(struct index_state *istate, const char *path,
+                        int must_exist)
 {
        int fd, i;
        struct stat st;
        istate->timestamp.nsec = 0;
        fd = open(path, O_RDONLY);
        if (fd < 0) {
-               if (errno == ENOENT)
+               if (!must_exist && errno == ENOENT)
                        return 0;
-               die_errno("index file open failed");
+               die_errno("%s: index file open failed", path);
        }
 
        if (fstat(fd, &st))
        die("index file corrupt");
 }
 
+int read_index_from(struct index_state *istate, const char *path)
+{
+       struct split_index *split_index;
+       int ret;
+
+       /* istate->initialized covers both .git/index and .git/sharedindex.xxx */
+       if (istate->initialized)
+               return istate->cache_nr;
+
+       ret = do_read_index(istate, path, 0);
+       split_index = istate->split_index;
+       if (!split_index)
+               return ret;
+
+       if (is_null_sha1(split_index->base_sha1))
+               return ret;
+       if (istate->cache_nr)
+               die("index in split-index mode must contain no entries");
+
+       if (split_index->base)
+               discard_index(split_index->base);
+       else
+               split_index->base = xcalloc(1, sizeof(*split_index->base));
+       ret = do_read_index(split_index->base,
+                           git_path("sharedindex.%s",
+                                    sha1_to_hex(split_index->base_sha1)), 1);
+       if (hashcmp(split_index->base_sha1, split_index->base->sha1))
+               die("broken index, expect %s in %s, got %s",
+                   sha1_to_hex(split_index->base_sha1),
+                   git_path("sharedindex.%s",
+                                    sha1_to_hex(split_index->base_sha1)),
+                   sha1_to_hex(split_index->base->sha1));
+       merge_base_index(istate);
+       return ret;
+}
+
 int is_index_unborn(struct index_state *istate)
 {
        return (!istate->cache_nr && !istate->timestamp.sec);
 {
        int i;
 
-       for (i = 0; i < istate->cache_nr; i++)
+       for (i = 0; i < istate->cache_nr; i++) {
+               if (istate->cache[i]->index &&
+                   istate->split_index &&
+                   istate->split_index->base &&
+                   istate->cache[i]->index <= istate->split_index->base->cache_nr &&
+                   istate->cache[i] == istate->split_index->base->cache[istate->cache[i]->index - 1])
+                       continue;
                free(istate->cache[i]);
+       }
        resolve_undo_clear_index(istate);
        istate->cache_nr = 0;
        istate->cache_changed = 0;
        free(istate->cache);
        istate->cache = NULL;
        istate->cache_alloc = 0;
+       discard_split_index(istate);
        return 0;
 }
 
        strbuf_release(&previous_name_buf);
 
        /* Write extension data here */
+       if (istate->split_index) {
+               struct strbuf sb = STRBUF_INIT;
+
+               err = write_link_extension(&sb, istate) < 0 ||
+                       write_index_ext_header(&c, newfd, CACHE_EXT_LINK,
+                                              sb.len) < 0 ||
+                       ce_write(&c, newfd, sb.buf, sb.len) < 0;
+               strbuf_release(&sb);
+               if (err)
+                       return -1;
+       }
        if (istate->cache_tree) {
                struct strbuf sb = STRBUF_INIT;
 
                return ret;
 }
 
+static int write_split_index(struct index_state *istate,
+                            struct lock_file *lock,
+                            unsigned flags)
+{
+       int ret;
+       prepare_to_write_split_index(istate);
+       ret = do_write_locked_index(istate, lock, flags);
+       finish_writing_split_index(istate);
+       return ret;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
                       unsigned flags)
 {
-       return do_write_locked_index(istate, lock, flags);
+       struct split_index *si = istate->split_index;
+
+       if (!si || (istate->cache_changed & ~EXTMASK)) {
+               if (si)
+                       hashclr(si->base_sha1);
+               return do_write_locked_index(istate, lock, flags);
+       }
+
+       return write_split_index(istate, lock, flags);
 }
 
 /*
 
--- /dev/null
+#include "cache.h"
+#include "split-index.h"
+
+struct split_index *init_split_index(struct index_state *istate)
+{
+       if (!istate->split_index) {
+               istate->split_index = xcalloc(1, sizeof(*istate->split_index));
+               istate->split_index->refcount = 1;
+       }
+       return istate->split_index;
+}
+
+int read_link_extension(struct index_state *istate,
+                        const void *data_, unsigned long sz)
+{
+       const unsigned char *data = data_;
+       struct split_index *si;
+       if (sz < 20)
+               return error("corrupt link extension (too short)");
+       si = init_split_index(istate);
+       hashcpy(si->base_sha1, data);
+       data += 20;
+       sz -= 20;
+       if (sz)
+               return error("garbage at the end of link extension");
+       return 0;
+}
+
+int write_link_extension(struct strbuf *sb,
+                        struct index_state *istate)
+{
+       struct split_index *si = istate->split_index;
+       strbuf_add(sb, si->base_sha1, 20);
+       return 0;
+}
+
+static void mark_base_index_entries(struct index_state *base)
+{
+       int i;
+       /*
+        * To keep track of the shared entries between
+        * istate->base->cache[] and istate->cache[], base entry
+        * position is stored in each base entry. All positions start
+        * from 1 instead of 0, which is resrved to say "this is a new
+        * entry".
+        */
+       for (i = 0; i < base->cache_nr; i++)
+               base->cache[i]->index = i + 1;
+}
+
+void merge_base_index(struct index_state *istate)
+{
+       struct split_index *si = istate->split_index;
+
+       mark_base_index_entries(si->base);
+       istate->cache_nr = si->base->cache_nr;
+       ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
+       memcpy(istate->cache, si->base->cache,
+              sizeof(*istate->cache) * istate->cache_nr);
+}
+
+void prepare_to_write_split_index(struct index_state *istate)
+{
+       struct split_index *si = init_split_index(istate);
+       /* take cache[] out temporarily */
+       si->saved_cache_nr = istate->cache_nr;
+       istate->cache_nr = 0;
+}
+
+void finish_writing_split_index(struct index_state *istate)
+{
+       struct split_index *si = init_split_index(istate);
+       istate->cache_nr = si->saved_cache_nr;
+}
+
+void discard_split_index(struct index_state *istate)
+{
+       struct split_index *si = istate->split_index;
+       if (!si)
+               return;
+       istate->split_index = NULL;
+       si->refcount--;
+       if (si->refcount)
+               return;
+       if (si->base) {
+               discard_index(si->base);
+               free(si->base);
+       }
+       free(si);
+}
 
--- /dev/null
+#ifndef SPLIT_INDEX_H
+#define SPLIT_INDEX_H
+
+struct index_state;
+struct strbuf;
+
+struct split_index {
+       unsigned char base_sha1[20];
+       struct index_state *base;
+       unsigned int saved_cache_nr;
+       int refcount;
+};
+
+struct split_index *init_split_index(struct index_state *istate);
+int read_link_extension(struct index_state *istate,
+                       const void *data, unsigned long sz);
+int write_link_extension(struct strbuf *sb,
+                        struct index_state *istate);
+void move_cache_to_base_index(struct index_state *istate);
+void merge_base_index(struct index_state *istate);
+void prepare_to_write_split_index(struct index_state *istate);
+void finish_writing_split_index(struct index_state *istate);
+void discard_split_index(struct index_state *istate);
+
+#endif
 
 #include "progress.h"
 #include "refs.h"
 #include "attr.h"
+#include "split-index.h"
 
 /*
  * Error messages expected by scripts out of plumbing commands such as
        o->result.timestamp.sec = o->src_index->timestamp.sec;
        o->result.timestamp.nsec = o->src_index->timestamp.nsec;
        o->result.version = o->src_index->version;
+       o->result.split_index = o->src_index->split_index;
+       if (o->result.split_index)
+               o->result.split_index->refcount++;
        hashcpy(o->result.sha1, o->src_index->sha1);
        o->merge_size = len;
        mark_all_ce_unused(o->src_index);