Merge branch 'nd/unpack-trees-with-cache-tree'
authorJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
The unpack_trees() API used in checking out a branch and merging
walks one or more trees along with the index. When the cache-tree
in the index tells us that we are walking a tree whose flattened
contents is known (i.e. matches a span in the index), as linearly
scanning a span in the index is much more efficient than having to
open tree objects recursively and listing their entries, the walk
can be optimized, which is done in this topic.

* nd/unpack-trees-with-cache-tree:
Document update for nd/unpack-trees-with-cache-tree
cache-tree: verify valid cache-tree in the test suite
unpack-trees: add missing cache invalidation
unpack-trees: reuse (still valid) cache-tree from src_index
unpack-trees: reduce malloc in cache-tree walk
unpack-trees: optimize walking same trees with cache-tree
unpack-trees: add performance tracing
trace.h: support nested performance tracing

1  2 
cache-tree.c
cache-tree.h
diff-lib.c
dir.c
preload-index.c
read-cache.c
t/README
t/test-lib.sh
unpack-trees.c
diff --combined cache-tree.c
index 16ea022c46d3b281d04a3956f865d8a886a5b714,c3c206427cccbd0a468974eb59d4b41b6bd84b43..490a25adf06e4e1fcd70530943f604bc7c9f1b58
@@@ -4,6 -4,7 +4,7 @@@
  #include "tree-walk.h"
  #include "cache-tree.h"
  #include "object-store.h"
+ #include "replace-object.h"
  
  #ifndef DEBUG
  #define DEBUG 0
@@@ -433,7 -434,9 +434,9 @@@ int cache_tree_update(struct index_stat
  
        if (i)
                return i;
+       trace_performance_enter();
        i = update_one(it, cache, entries, "", 0, &skip, flags);
+       trace_performance_leave("cache_tree_update");
        if (i < 0)
                return i;
        istate->cache_changed |= CACHE_TREE_CHANGED;
@@@ -652,6 -655,11 +655,6 @@@ out
        return ret;
  }
  
 -int write_cache_as_tree(struct object_id *oid, int flags, const char *prefix)
 -{
 -      return write_index_as_tree(oid, &the_index, get_index_file(), flags, prefix);
 -}
 -
  static void prime_cache_tree_rec(struct cache_tree *it, struct tree *tree)
  {
        struct tree_desc desc;
@@@ -718,3 -726,87 +721,80 @@@ int cache_tree_matches_traversal(struc
                return it->entry_count;
        return 0;
  }
 -int update_main_cache_tree(int flags)
 -{
 -      if (!the_index.cache_tree)
 -              the_index.cache_tree = cache_tree();
 -      return cache_tree_update(&the_index, flags);
 -}
 -
+ static void verify_one(struct index_state *istate,
+                      struct cache_tree *it,
+                      struct strbuf *path)
+ {
+       int i, pos, len = path->len;
+       struct strbuf tree_buf = STRBUF_INIT;
+       struct object_id new_oid;
+       for (i = 0; i < it->subtree_nr; i++) {
+               strbuf_addf(path, "%s/", it->down[i]->name);
+               verify_one(istate, it->down[i]->cache_tree, path);
+               strbuf_setlen(path, len);
+       }
+       if (it->entry_count < 0 ||
+           /* no verification on tests (t7003) that replace trees */
+           lookup_replace_object(the_repository, &it->oid) != &it->oid)
+               return;
+       if (path->len) {
+               pos = index_name_pos(istate, path->buf, path->len);
+               pos = -pos - 1;
+       } else {
+               pos = 0;
+       }
+       i = 0;
+       while (i < it->entry_count) {
+               struct cache_entry *ce = istate->cache[pos + i];
+               const char *slash;
+               struct cache_tree_sub *sub = NULL;
+               const struct object_id *oid;
+               const char *name;
+               unsigned mode;
+               int entlen;
+               if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE))
+                       BUG("%s with flags 0x%x should not be in cache-tree",
+                           ce->name, ce->ce_flags);
+               name = ce->name + path->len;
+               slash = strchr(name, '/');
+               if (slash) {
+                       entlen = slash - name;
+                       sub = find_subtree(it, ce->name + path->len, entlen, 0);
+                       if (!sub || sub->cache_tree->entry_count < 0)
+                               BUG("bad subtree '%.*s'", entlen, name);
+                       oid = &sub->cache_tree->oid;
+                       mode = S_IFDIR;
+                       i += sub->cache_tree->entry_count;
+               } else {
+                       oid = &ce->oid;
+                       mode = ce->ce_mode;
+                       entlen = ce_namelen(ce) - path->len;
+                       i++;
+               }
+               strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0');
+               strbuf_add(&tree_buf, oid->hash, the_hash_algo->rawsz);
+       }
+       hash_object_file(tree_buf.buf, tree_buf.len, tree_type, &new_oid);
+       if (oidcmp(&new_oid, &it->oid))
+               BUG("cache-tree for path %.*s does not match. "
+                   "Expected %s got %s", len, path->buf,
+                   oid_to_hex(&new_oid), oid_to_hex(&it->oid));
+       strbuf_setlen(path, len);
+       strbuf_release(&tree_buf);
+ }
+ void cache_tree_verify(struct index_state *istate)
+ {
+       struct strbuf path = STRBUF_INIT;
+       if (!istate->cache_tree)
+               return;
+       verify_one(istate, istate->cache_tree, &path);
+       strbuf_release(&path);
+ }
diff --combined cache-tree.h
index fc0c842e773f648d64858d1891f64d222633fc3a,c1fde531f9e3353de823c7934628a1faf74dffba..0ab6784ffea5da0581c905738aaa66215266b5f9
@@@ -32,7 -32,10 +32,8 @@@ struct cache_tree *cache_tree_read(cons
  
  int cache_tree_fully_valid(struct cache_tree *);
  int cache_tree_update(struct index_state *, int);
+ void cache_tree_verify(struct index_state *);
  
 -int update_main_cache_tree(int);
 -
  /* bitmasks to write_cache_as_tree flags */
  #define WRITE_TREE_MISSING_OK 1
  #define WRITE_TREE_IGNORE_CACHE_TREE 2
  #define WRITE_TREE_PREFIX_ERROR (-3)
  
  int write_index_as_tree(struct object_id *oid, struct index_state *index_state, const char *index_path, int flags, const char *prefix);
 -int write_cache_as_tree(struct object_id *oid, int flags, const char *prefix);
  void prime_cache_tree(struct index_state *, struct tree *);
  
  int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info);
  
 +#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
 +static inline int write_cache_as_tree(struct object_id *oid, int flags, const char *prefix)
 +{
 +      return write_index_as_tree(oid, &the_index, get_index_file(), flags, prefix);
 +}
 +
 +static inline int update_main_cache_tree(int flags)
 +{
 +      if (!the_index.cache_tree)
 +              the_index.cache_tree = cache_tree();
 +      return cache_tree_update(&the_index, flags);
 +}
 +#endif
 +
  #endif
diff --combined diff-lib.c
index 88a98b1c06e8792136eced81a7da5f9aacb8542f,d5bbb7ea509f1ad15559f5af12dd0846648c2d05..70cddbf43664a42b5647ed21af279c1492abb790
@@@ -109,7 -109,7 +109,7 @@@ int run_diff_files(struct rev_info *rev
                if (diff_can_quit_early(&revs->diffopt))
                        break;
  
 -              if (!ce_path_match(ce, &revs->prune_data, NULL))
 +              if (!ce_path_match(&the_index, ce, &revs->prune_data, NULL))
                        continue;
  
                if (ce_stage(ce)) {
@@@ -474,7 -474,7 +474,7 @@@ static int oneway_diff(const struct cac
        if (tree == o->df_conflict_entry)
                tree = NULL;
  
 -      if (ce_path_match(idx ? idx : tree, &revs->prune_data, NULL)) {
 +      if (ce_path_match(&the_index, idx ? idx : tree, &revs->prune_data, NULL)) {
                do_oneway_diff(o, idx, tree);
                if (diff_can_quit_early(&revs->diffopt)) {
                        o->exiting_early = 1;
@@@ -518,11 -518,11 +518,11 @@@ static int diff_cache(struct rev_info *
  int run_diff_index(struct rev_info *revs, int cached)
  {
        struct object_array_entry *ent;
-       uint64_t start = getnanotime();
  
        if (revs->pending.nr != 1)
                BUG("run_diff_index must be passed exactly one tree");
  
+       trace_performance_enter();
        ent = revs->pending.objects;
        if (diff_cache(revs, &ent->item->oid, ent->name, cached))
                exit(128);
        diffcore_fix_diff_index(&revs->diffopt);
        diffcore_std(&revs->diffopt);
        diff_flush(&revs->diffopt);
-       trace_performance_since(start, "diff-index");
+       trace_performance_leave("diff-index");
        return 0;
  }
  
diff --combined dir.c
index aceb0d48692b7d727cfd2645ae88b0d45d660c09,18b57b94ccd5d97744b33d607fa6924dda3fd371..995b8e32616055cdb228d30e3e82e9d70d24512e
--- 1/dir.c
--- 2/dir.c
+++ b/dir.c
@@@ -276,13 -276,12 +276,13 @@@ static int do_read_blob(const struct ob
  #define DO_MATCH_DIRECTORY (1<<1)
  #define DO_MATCH_SUBMODULE (1<<2)
  
 -static int match_attrs(const char *name, int namelen,
 +static int match_attrs(const struct index_state *istate,
 +                     const char *name, int namelen,
                       const struct pathspec_item *item)
  {
        int i;
  
 -      git_check_attr(name, item->attr_check);
 +      git_check_attr(istate, name, item->attr_check);
        for (i = 0; i < item->attr_match_nr; i++) {
                const char *value;
                int matched;
   *
   * It returns 0 when there is no match.
   */
 -static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 +static int match_pathspec_item(const struct index_state *istate,
 +                             const struct pathspec_item *item, int prefix,
                               const char *name, int namelen, unsigned flags)
  {
        /* name/namelen has prefix cut off by caller */
            strncmp(item->match, name - prefix, item->prefix))
                return 0;
  
 -      if (item->attr_match_nr && !match_attrs(name, namelen, item))
 +      if (item->attr_match_nr && !match_attrs(istate, name, namelen, item))
                return 0;
  
        /* If the match was just the prefix, we matched */
   * pathspec did not match any names, which could indicate that the
   * user mistyped the nth pathspec.
   */
 -static int do_match_pathspec(const struct pathspec *ps,
 +static int do_match_pathspec(const struct index_state *istate,
 +                           const struct pathspec *ps,
                             const char *name, int namelen,
                             int prefix, char *seen,
                             unsigned flags)
                 */
                if (seen && ps->items[i].magic & PATHSPEC_EXCLUDE)
                        seen[i] = MATCHED_FNMATCH;
 -              how = match_pathspec_item(ps->items+i, prefix, name,
 +              how = match_pathspec_item(istate, ps->items+i, prefix, name,
                                          namelen, flags);
                if (ps->recursive &&
                    (ps->magic & PATHSPEC_MAXDEPTH) &&
        return retval;
  }
  
 -int match_pathspec(const struct pathspec *ps,
 +int match_pathspec(const struct index_state *istate,
 +                 const struct pathspec *ps,
                   const char *name, int namelen,
                   int prefix, char *seen, int is_dir)
  {
        int positive, negative;
        unsigned flags = is_dir ? DO_MATCH_DIRECTORY : 0;
 -      positive = do_match_pathspec(ps, name, namelen,
 +      positive = do_match_pathspec(istate, ps, name, namelen,
                                     prefix, seen, flags);
        if (!(ps->magic & PATHSPEC_EXCLUDE) || !positive)
                return positive;
 -      negative = do_match_pathspec(ps, name, namelen,
 +      negative = do_match_pathspec(istate, ps, name, namelen,
                                     prefix, seen,
                                     flags | DO_MATCH_EXCLUDE);
        return negative ? 0 : positive;
  /**
   * Check if a submodule is a superset of the pathspec
   */
 -int submodule_path_match(const struct pathspec *ps,
 +int submodule_path_match(const struct index_state *istate,
 +                       const struct pathspec *ps,
                         const char *submodule_name,
                         char *seen)
  {
 -      int matched = do_match_pathspec(ps, submodule_name,
 +      int matched = do_match_pathspec(istate, ps, submodule_name,
                                        strlen(submodule_name),
                                        0, seen,
                                        DO_MATCH_DIRECTORY |
@@@ -2268,10 -2263,13 +2268,13 @@@ int read_directory(struct dir_struct *d
                   const char *path, int len, const struct pathspec *pathspec)
  {
        struct untracked_cache_dir *untracked;
-       uint64_t start = getnanotime();
  
-       if (has_symlink_leading_path(path, len))
+       trace_performance_enter();
+       if (has_symlink_leading_path(path, len)) {
+               trace_performance_leave("read directory %.*s", len, path);
                return dir->nr;
+       }
  
        untracked = validate_untracked_cache(dir, len, pathspec);
        if (!untracked)
                dir->nr = i;
        }
  
-       trace_performance_since(start, "read directory %.*s", len, path);
+       trace_performance_leave("read directory %.*s", len, path);
        if (dir->untracked) {
                static int force_untracked_cache = -1;
                static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
diff --combined preload-index.c
index 71cd2437a3b33b343696bf96067603e8dc9e4464,d7f7919ba2ec828fce3fe95fc58884923568ea26..f7365761f4725d6f7ddbb6d8c9c8fe92488de6ce
@@@ -58,7 -58,7 +58,7 @@@ static void *preload_thread(void *_data
                        continue;
                if (ce->ce_flags & CE_FSMONITOR_VALID)
                        continue;
 -              if (!ce_path_match(ce, &p->pathspec, NULL))
 +              if (!ce_path_match(index, ce, &p->pathspec, NULL))
                        continue;
                if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
                        continue;
@@@ -78,7 -78,6 +78,6 @@@ static void preload_index(struct index_
  {
        int threads, i, work, offset;
        struct thread_data data[MAX_PARALLEL];
-       uint64_t start = getnanotime();
  
        if (!core_preload_index)
                return;
@@@ -88,6 -87,7 +87,7 @@@
                threads = 2;
        if (threads < 2)
                return;
+       trace_performance_enter();
        if (threads > MAX_PARALLEL)
                threads = MAX_PARALLEL;
        offset = 0;
                if (pthread_join(p->pthread, NULL))
                        die("unable to join threaded lstat");
        }
-       trace_performance_since(start, "preload index");
+       trace_performance_leave("preload index");
  }
  #endif
  
diff --combined read-cache.c
index 7b1354d7590a70ecbd6e508bdd95eafd4793efcc,41f313bc9e115bf51fd637dfacd116d42a5dd98d..d7b617f320939ad2e3659223d04e7537b2707658
@@@ -1476,8 -1476,8 +1476,8 @@@ int refresh_index(struct index_state *i
        const char *typechange_fmt;
        const char *added_fmt;
        const char *unmerged_fmt;
-       uint64_t start = getnanotime();
  
+       trace_performance_enter();
        modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
        deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
        typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
                if (ignore_submodules && S_ISGITLINK(ce->ce_mode))
                        continue;
  
 -              if (pathspec && !ce_path_match(ce, pathspec, seen))
 +              if (pathspec && !ce_path_match(&the_index, ce, pathspec, seen))
                        filtered = 1;
  
                if (ce_stage(ce)) {
  
                replace_index_entry(istate, i, new_entry);
        }
-       trace_performance_since(start, "refresh index");
+       trace_performance_leave("refresh index");
        return has_errors;
  }
  
@@@ -2002,7 -2002,6 +2002,6 @@@ static void freshen_shared_index(const 
  int read_index_from(struct index_state *istate, const char *path,
                    const char *gitdir)
  {
-       uint64_t start = getnanotime();
        struct split_index *split_index;
        int ret;
        char *base_oid_hex;
        if (istate->initialized)
                return istate->cache_nr;
  
+       trace_performance_enter();
        ret = do_read_index(istate, path, 0);
-       trace_performance_since(start, "read cache %s", path);
+       trace_performance_leave("read cache %s", path);
  
        split_index = istate->split_index;
        if (!split_index || is_null_oid(&split_index->base_oid)) {
                return ret;
        }
  
+       trace_performance_enter();
        if (split_index->base)
                discard_index(split_index->base);
        else
        freshen_shared_index(base_path, 0);
        merge_base_index(istate);
        post_read_index_from(istate);
-       trace_performance_since(start, "read cache %s", base_path);
        free(base_path);
+       trace_performance_leave("read cache %s", base_path);
        return ret;
  }
  
@@@ -2743,6 -2744,9 +2744,9 @@@ int write_locked_index(struct index_sta
        int new_shared_index, ret;
        struct split_index *si = istate->split_index;
  
+       if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
+               cache_tree_verify(istate);
        if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) {
                if (flags & COMMIT_LOCK)
                        rollback_lock_file(lock);
@@@ -2939,6 -2943,8 +2943,8 @@@ void move_index_extensions(struct index
  {
        dst->untracked = src->untracked;
        src->untracked = NULL;
+       dst->cache_tree = src->cache_tree;
+       src->cache_tree = NULL;
  }
  
  struct cache_entry *dup_cache_entry(const struct cache_entry *ce,
diff --combined t/README
index 9028b47d923ca027a146da82060dae395d3f7999,0e7cc2373419a7297a73978e01319e8ccc23402c..204b9f4cc5cbf8ad265423f6062cd99c24dd33d3
+++ b/t/README
@@@ -315,10 -315,10 +315,14 @@@ packs on demand. This normally only hap
  over 2GB. This variable forces the code path on any object larger than
  <n> bytes.
  
 +GIT_TEST_OE_DELTA_SIZE=<n> exercises the uncomon pack-objects code
 +path where deltas larger than this limit require extra memory
 +allocation for bookkeeping.
 +
+ GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES=<boolean> checks that cache-tree
+ records are valid when the index is written out or after a merge. This
+ is mostly to catch missing invalidation. Default is true.
  Naming Tests
  ------------
  
diff --combined t/test-lib.sh
index 44288cbb598435a5dfff05e8e895cca85e53e804,5b50f6e2e67238d94a470af3de8e5f3711776d39..3f95bfda605f7ad1f9b0b0385ffdc91e5cca415e
@@@ -867,7 -867,7 +867,7 @@@ the
                # handle only executables, unless they are shell libraries that
                # need to be in the exec-path.
                test -x "$1" ||
 -              test "# " = "$(head -c 2 <"$1")" ||
 +              test "# " = "$(test_copy_bytes 2 <"$1")" ||
                return;
  
                base=$(basename "$1")
                # do not override scripts
                if test -x "$symlink_target" &&
                    test ! -d "$symlink_target" &&
 -                  test "#!" != "$(head -c 2 < "$symlink_target")"
 +                  test "#!" != "$(test_copy_bytes 2 <"$symlink_target")"
                then
                        symlink_target=../valgrind.sh
                fi
        test_set_prereq C_LOCALE_OUTPUT
  fi
  
+ if test -z "$GIT_TEST_CHECK_CACHE_TREE"
+ then
+       GIT_TEST_CHECK_CACHE_TREE=true
+       export GIT_TEST_CHECK_CACHE_TREE
+ fi
  test_lazy_prereq PIPE '
        # test whether the filesystem supports FIFOs
        test_have_prereq !MINGW,!CYGWIN &&
@@@ -1104,20 -1110,6 +1110,20 @@@ test_lazy_prereq CASE_INSENSITIVE_FS 
        test "$(cat CamelCase)" != good
  '
  
 +test_lazy_prereq FUNNYNAMES '
 +      test_have_prereq !MINGW &&
 +      touch -- \
 +              "FUNNYNAMES tab embedded" \
 +              "FUNNYNAMES \"quote embedded\"" \
 +              "FUNNYNAMES newline
 +embedded" 2>/dev/null &&
 +      rm -- \
 +              "FUNNYNAMES tab embedded" \
 +              "FUNNYNAMES \"quote embedded\"" \
 +              "FUNNYNAMES newline
 +embedded" 2>/dev/null
 +'
 +
  test_lazy_prereq UTF8_NFD_TO_NFC '
        # check whether FS converts nfd unicode to nfc
        auml=$(printf "\303\244")
diff --combined unpack-trees.c
index cfa88bb6ec8238a6a6a41d32d646610fd2699048,515c37437308b097c7f26442de19bb35d92c8da8..55f864ac577e5951db5345c358a0ec8bb8291919
@@@ -336,46 -336,6 +336,46 @@@ static struct progress *get_progress(st
        return start_delayed_progress(_("Checking out files"), total);
  }
  
 +static void setup_collided_checkout_detection(struct checkout *state,
 +                                            struct index_state *index)
 +{
 +      int i;
 +
 +      state->clone = 1;
 +      for (i = 0; i < index->cache_nr; i++)
 +              index->cache[i]->ce_flags &= ~CE_MATCHED;
 +}
 +
 +static void report_collided_checkout(struct index_state *index)
 +{
 +      struct string_list list = STRING_LIST_INIT_NODUP;
 +      int i;
 +
 +      for (i = 0; i < index->cache_nr; i++) {
 +              struct cache_entry *ce = index->cache[i];
 +
 +              if (!(ce->ce_flags & CE_MATCHED))
 +                      continue;
 +
 +              string_list_append(&list, ce->name);
 +              ce->ce_flags &= ~CE_MATCHED;
 +      }
 +
 +      list.cmp = fspathcmp;
 +      string_list_sort(&list);
 +
 +      if (list.nr) {
 +              warning(_("the following paths have collided (e.g. case-sensitive paths\n"
 +                        "on a case-insensitive filesystem) and only one from the same\n"
 +                        "colliding group is in the working tree:\n"));
 +
 +              for (i = 0; i < list.nr; i++)
 +                      fprintf(stderr, "  '%s'\n", list.items[i].string);
 +      }
 +
 +      string_list_clear(&list, 0);
 +}
 +
  static int check_updates(struct unpack_trees_options *o)
  {
        unsigned cnt = 0;
        struct checkout state = CHECKOUT_INIT;
        int i;
  
+       trace_performance_enter();
        state.force = 1;
        state.quiet = 1;
        state.refresh_cache = 1;
        state.istate = index;
  
 +      if (o->clone)
 +              setup_collided_checkout_detection(&state, index);
 +
        progress = get_progress(o);
  
        if (o->update)
 -              git_attr_set_direction(GIT_ATTR_CHECKOUT, index);
 +              git_attr_set_direction(GIT_ATTR_CHECKOUT);
  
        if (should_update_submodules() && o->update && !o->dry_run)
                load_gitmodules_file(index, NULL);
        stop_progress(&progress);
        errs |= finish_delayed_checkout(&state);
        if (o->update)
 -              git_attr_set_direction(GIT_ATTR_CHECKIN, NULL);
 +              git_attr_set_direction(GIT_ATTR_CHECKIN);
 +
 +      if (o->clone)
 +              report_collided_checkout(index);
 +
+       trace_performance_leave("check_updates");
        return errs != 0;
  }
  
@@@ -680,6 -635,113 +682,113 @@@ static inline int are_same_oid(struct n
        return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
  }
  
+ static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
+                                       struct name_entry *names,
+                                       struct traverse_info *info)
+ {
+       struct unpack_trees_options *o = info->data;
+       int i;
+       if (!o->merge || dirmask != ((1 << n) - 1))
+               return 0;
+       for (i = 1; i < n; i++)
+               if (!are_same_oid(names, names + i))
+                       return 0;
+       return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
+ }
+ static int index_pos_by_traverse_info(struct name_entry *names,
+                                     struct traverse_info *info)
+ {
+       struct unpack_trees_options *o = info->data;
+       int len = traverse_path_len(info, names);
+       char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
+       int pos;
+       make_traverse_path(name, info, names);
+       name[len++] = '/';
+       name[len] = '\0';
+       pos = index_name_pos(o->src_index, name, len);
+       if (pos >= 0)
+               BUG("This is a directory and should not exist in index");
+       pos = -pos - 1;
+       if (!starts_with(o->src_index->cache[pos]->name, name) ||
+           (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
+               BUG("pos must point at the first entry in this directory");
+       free(name);
+       return pos;
+ }
+ /*
+  * Fast path if we detect that all trees are the same as cache-tree at this
+  * path. We'll walk these trees in an iterative loop using cache-tree/index
+  * instead of ODB since we already know what these trees contain.
+  */
+ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
+                                 struct name_entry *names,
+                                 struct traverse_info *info)
+ {
+       struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
+       struct unpack_trees_options *o = info->data;
+       struct cache_entry *tree_ce = NULL;
+       int ce_len = 0;
+       int i, d;
+       if (!o->merge)
+               BUG("We need cache-tree to do this optimization");
+       /*
+        * Do what unpack_callback() and unpack_nondirectories() normally
+        * do. But we walk all paths in an iterative loop instead.
+        *
+        * D/F conflicts and higher stage entries are not a concern
+        * because cache-tree would be invalidated and we would never
+        * get here in the first place.
+        */
+       for (i = 0; i < nr_entries; i++) {
+               int new_ce_len, len, rc;
+               src[0] = o->src_index->cache[pos + i];
+               len = ce_namelen(src[0]);
+               new_ce_len = cache_entry_size(len);
+               if (new_ce_len > ce_len) {
+                       new_ce_len <<= 1;
+                       tree_ce = xrealloc(tree_ce, new_ce_len);
+                       memset(tree_ce, 0, new_ce_len);
+                       ce_len = new_ce_len;
+                       tree_ce->ce_flags = create_ce_flags(0);
+                       for (d = 1; d <= nr_names; d++)
+                               src[d] = tree_ce;
+               }
+               tree_ce->ce_mode = src[0]->ce_mode;
+               tree_ce->ce_namelen = len;
+               oidcpy(&tree_ce->oid, &src[0]->oid);
+               memcpy(tree_ce->name, src[0]->name, len + 1);
+               rc = call_unpack_fn((const struct cache_entry * const *)src, o);
+               if (rc < 0) {
+                       free(tree_ce);
+                       return rc;
+               }
+               mark_ce_used(src[0], o);
+       }
+       free(tree_ce);
+       if (o->debug_unpack)
+               printf("Unpacked %d entries from %s to %s using cache-tree\n",
+                      nr_entries,
+                      o->src_index->cache[pos]->name,
+                      o->src_index->cache[pos + nr_entries - 1]->name);
+       return 0;
+ }
  static int traverse_trees_recursive(int n, unsigned long dirmask,
                                    unsigned long df_conflicts,
                                    struct name_entry *names,
        void *buf[MAX_UNPACK_TREES];
        struct traverse_info newinfo;
        struct name_entry *p;
+       int nr_entries;
+       nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
+       if (nr_entries > 0) {
+               struct unpack_trees_options *o = info->data;
+               int pos = index_pos_by_traverse_info(names, info);
+               if (!o->merge || df_conflicts)
+                       BUG("Wrong condition to get here buddy");
+               /*
+                * All entries up to 'pos' must have been processed
+                * (i.e. marked CE_UNPACKED) at this point. But to be safe,
+                * save and restore cache_bottom anyway to not miss
+                * unprocessed entries before 'pos'.
+                */
+               bottom = o->cache_bottom;
+               ret = traverse_by_cache_tree(pos, nr_entries, n, names, info);
+               o->cache_bottom = bottom;
+               return ret;
+       }
  
        p = names;
        while (!p->mode)
@@@ -857,6 -940,11 +987,11 @@@ static struct cache_entry *create_ce_en
        return ce;
  }
  
+ /*
+  * Note that traverse_by_cache_tree() duplicates some logic in this function
+  * without actually calling it. If you change the logic here you may need to
+  * check and change there as well.
+  */
  static int unpack_nondirectories(int n, unsigned long mask,
                                 unsigned long dirmask,
                                 struct cache_entry **src,
@@@ -1049,6 -1137,11 +1184,11 @@@ static void debug_unpack_callback(int n
                debug_name_entry(i, names + i);
  }
  
+ /*
+  * Note that traverse_by_cache_tree() duplicates some logic in this function
+  * without actually calling it. If you change the logic here you may need to
+  * check and change there as well.
+  */
  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
  {
        struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
        return mask;
  }
  
 -static int clear_ce_flags_1(struct cache_entry **cache, int nr,
 +static int clear_ce_flags_1(struct index_state *istate,
 +                          struct cache_entry **cache, int nr,
                            struct strbuf *prefix,
                            int select_mask, int clear_mask,
                            struct exclude_list *el, int defval);
  
  /* Whole directory matching */
 -static int clear_ce_flags_dir(struct cache_entry **cache, int nr,
 +static int clear_ce_flags_dir(struct index_state *istate,
 +                            struct cache_entry **cache, int nr,
                              struct strbuf *prefix,
                              char *basename,
                              int select_mask, int clear_mask,
        struct cache_entry **cache_end;
        int dtype = DT_DIR;
        int ret = is_excluded_from_list(prefix->buf, prefix->len,
 -                                      basename, &dtype, el, &the_index);
 +                                      basename, &dtype, el, istate);
        int rc;
  
        strbuf_addch(prefix, '/');
         * calling clear_ce_flags_1(). That function will call
         * the expensive is_excluded_from_list() on every entry.
         */
 -      rc = clear_ce_flags_1(cache, cache_end - cache,
 +      rc = clear_ce_flags_1(istate, cache, cache_end - cache,
                              prefix,
                              select_mask, clear_mask,
                              el, ret);
   *   cache[0]->name[0..(prefix_len-1)]
   * Top level path has prefix_len zero.
   */
 -static int clear_ce_flags_1(struct cache_entry **cache, int nr,
 +static int clear_ce_flags_1(struct index_state *istate,
 +                          struct cache_entry **cache, int nr,
                            struct strbuf *prefix,
                            int select_mask, int clear_mask,
                            struct exclude_list *el, int defval)
                        len = slash - name;
                        strbuf_add(prefix, name, len);
  
 -                      processed = clear_ce_flags_dir(cache, cache_end - cache,
 +                      processed = clear_ce_flags_dir(istate, cache, cache_end - cache,
                                                       prefix,
                                                       prefix->buf + prefix->len - len,
                                                       select_mask, clear_mask,
                        }
  
                        strbuf_addch(prefix, '/');
 -                      cache += clear_ce_flags_1(cache, cache_end - cache,
 +                      cache += clear_ce_flags_1(istate, cache, cache_end - cache,
                                                  prefix,
                                                  select_mask, clear_mask, el, defval);
                        strbuf_setlen(prefix, prefix->len - len - 1);
                /* Non-directory */
                dtype = ce_to_dtype(ce);
                ret = is_excluded_from_list(ce->name, ce_namelen(ce),
 -                                          name, &dtype, el, &the_index);
 +                                          name, &dtype, el, istate);
                if (ret < 0)
                        ret = defval;
                if (ret > 0)
        return nr - (cache_end - cache);
  }
  
 -static int clear_ce_flags(struct cache_entry **cache, int nr,
 -                          int select_mask, int clear_mask,
 -                          struct exclude_list *el)
 +static int clear_ce_flags(struct index_state *istate,
 +                        int select_mask, int clear_mask,
 +                        struct exclude_list *el)
  {
        static struct strbuf prefix = STRBUF_INIT;
  
        strbuf_reset(&prefix);
  
 -      return clear_ce_flags_1(cache, nr,
 +      return clear_ce_flags_1(istate,
 +                              istate->cache,
 +                              istate->cache_nr,
                                &prefix,
                                select_mask, clear_mask,
                                el, 0);
   * Set/Clear CE_NEW_SKIP_WORKTREE according to $GIT_DIR/info/sparse-checkout
   */
  static void mark_new_skip_worktree(struct exclude_list *el,
 -                                 struct index_state *the_index,
 +                                 struct index_state *istate,
                                   int select_flag, int skip_wt_flag)
  {
        int i;
         * 1. Pretend the narrowest worktree: only unmerged entries
         * are checked out
         */
 -      for (i = 0; i < the_index->cache_nr; i++) {
 -              struct cache_entry *ce = the_index->cache[i];
 +      for (i = 0; i < istate->cache_nr; i++) {
 +              struct cache_entry *ce = istate->cache[i];
  
                if (select_flag && !(ce->ce_flags & select_flag))
                        continue;
         * 2. Widen worktree according to sparse-checkout file.
         * Matched entries will have skip_wt_flag cleared (i.e. "in")
         */
 -      clear_ce_flags(the_index->cache, the_index->cache_nr,
 -                     select_flag, skip_wt_flag, el);
 +      clear_ce_flags(istate, select_flag, skip_wt_flag, el);
  }
  
  static int verify_absent(const struct cache_entry *,
@@@ -1336,6 -1425,7 +1476,7 @@@ int unpack_trees(unsigned len, struct t
        if (len > MAX_UNPACK_TREES)
                die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
  
+       trace_performance_enter();
        memset(&el, 0, sizeof(el));
        if (!core_apply_sparse_checkout || !o->update)
                o->skip_sparse_checkout = 1;
                        }
                }
  
-               if (traverse_trees(len, t, &info) < 0)
+               trace_performance_enter();
+               ret = traverse_trees(len, t, &info);
+               trace_performance_leave("traverse_trees");
+               if (ret < 0)
                        goto return_failed;
        }
  
  
        ret = check_updates(o) ? (-2) : 0;
        if (o->dst_index) {
+               move_index_extensions(&o->result, o->src_index);
                if (!ret) {
+                       if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
+                               cache_tree_verify(&o->result);
                        if (!o->result.cache_tree)
                                o->result.cache_tree = cache_tree();
                        if (!cache_tree_fully_valid(o->result.cache_tree))
                                                  WRITE_TREE_SILENT |
                                                  WRITE_TREE_REPAIR);
                }
-               move_index_extensions(&o->result, o->src_index);
                discard_index(o->dst_index);
                *o->dst_index = o->result;
        } else {
        o->src_index = NULL;
  
  done:
+       trace_performance_leave("unpack_trees");
        clear_exclude_list(&el);
        return ret;
  
@@@ -1603,17 -1699,6 +1750,17 @@@ static int verify_uptodate_sparse(cons
        return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE);
  }
  
 +/*
 + * TODO: We should actually invalidate o->result, not src_index [1].
 + * But since cache tree and untracked cache both are not copied to
 + * o->result until unpacking is complete, we invalidate them on
 + * src_index instead with the assumption that they will be copied to
 + * dst_index at the end.
 + *
 + * [1] src_index->cache_tree is also used in unpack_callback() so if
 + * we invalidate o->result, we need to update it to use
 + * o->result.cache_tree as well.
 + */
  static void invalidate_ce_path(const struct cache_entry *ce,
                               struct unpack_trees_options *o)
  {
@@@ -1691,6 -1776,7 +1838,7 @@@ static int verify_clean_subdirectory(co
                        if (verify_uptodate(ce2, o))
                                return -1;
                        add_entry(o, ce2, CE_REMOVE, 0);
+                       invalidate_ce_path(ce, o);
                        mark_ce_used(ce2, o);
                }
                cnt++;
        memset(&d, 0, sizeof(d));
        if (o->dir)
                d.exclude_per_dir = o->dir->exclude_per_dir;
 -      i = read_directory(&d, &the_index, pathbuf, namelen+1, NULL);
 +      i = read_directory(&d, o->src_index, pathbuf, namelen+1, NULL);
        if (i)
                return o->gently ? -1 :
                        add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name);
@@@ -1747,7 -1833,7 +1895,7 @@@ static int check_ok_to_remove(const cha
                return 0;
  
        if (o->dir &&
 -          is_excluded(o->dir, &the_index, name, &dtype))
 +          is_excluded(o->dir, o->src_index, name, &dtype))
                /*
                 * ce->name is explicitly excluded, so it is Ok to
                 * overwrite it.
@@@ -1950,6 -2036,8 +2098,8 @@@ static int keep_entry(const struct cach
                      struct unpack_trees_options *o)
  {
        add_entry(o, ce, 0, 0);
+       if (ce_stage(ce))
+               invalidate_ce_path(ce, o);
        return 1;
  }