Merge branch 'nd/unpack-trees-with-cache-tree'
authorJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 17 Sep 2018 20:53:53 +0000 (13:53 -0700)
The unpack_trees() API used in checking out a branch and merging
walks one or more trees along with the index. When the cache-tree
in the index tells us that we are walking a tree whose flattened
contents is known (i.e. matches a span in the index), as linearly
scanning a span in the index is much more efficient than having to
open tree objects recursively and listing their entries, the walk
can be optimized, which is done in this topic.

* nd/unpack-trees-with-cache-tree:
Document update for nd/unpack-trees-with-cache-tree
cache-tree: verify valid cache-tree in the test suite
unpack-trees: add missing cache invalidation
unpack-trees: reuse (still valid) cache-tree from src_index
unpack-trees: reduce malloc in cache-tree walk
unpack-trees: optimize walking same trees with cache-tree
unpack-trees: add performance tracing
trace.h: support nested performance tracing

12 files changed:
cache-tree.c
cache-tree.h
diff-lib.c
dir.c
name-hash.c
preload-index.c
read-cache.c
t/README
t/test-lib.sh
trace.c
trace.h
unpack-trees.c
index 16ea022c46d3b281d04a3956f865d8a886a5b714..490a25adf06e4e1fcd70530943f604bc7c9f1b58 100644 (file)
@@ -4,6 +4,7 @@
 #include "tree-walk.h"
 #include "cache-tree.h"
 #include "object-store.h"
+#include "replace-object.h"
 
 #ifndef DEBUG
 #define DEBUG 0
@@ -433,7 +434,9 @@ int cache_tree_update(struct index_state *istate, int flags)
 
        if (i)
                return i;
+       trace_performance_enter();
        i = update_one(it, cache, entries, "", 0, &skip, flags);
+       trace_performance_leave("cache_tree_update");
        if (i < 0)
                return i;
        istate->cache_changed |= CACHE_TREE_CHANGED;
@@ -718,3 +721,80 @@ int cache_tree_matches_traversal(struct cache_tree *root,
                return it->entry_count;
        return 0;
 }
+
+static void verify_one(struct index_state *istate,
+                      struct cache_tree *it,
+                      struct strbuf *path)
+{
+       int i, pos, len = path->len;
+       struct strbuf tree_buf = STRBUF_INIT;
+       struct object_id new_oid;
+
+       for (i = 0; i < it->subtree_nr; i++) {
+               strbuf_addf(path, "%s/", it->down[i]->name);
+               verify_one(istate, it->down[i]->cache_tree, path);
+               strbuf_setlen(path, len);
+       }
+
+       if (it->entry_count < 0 ||
+           /* no verification on tests (t7003) that replace trees */
+           lookup_replace_object(the_repository, &it->oid) != &it->oid)
+               return;
+
+       if (path->len) {
+               pos = index_name_pos(istate, path->buf, path->len);
+               pos = -pos - 1;
+       } else {
+               pos = 0;
+       }
+
+       i = 0;
+       while (i < it->entry_count) {
+               struct cache_entry *ce = istate->cache[pos + i];
+               const char *slash;
+               struct cache_tree_sub *sub = NULL;
+               const struct object_id *oid;
+               const char *name;
+               unsigned mode;
+               int entlen;
+
+               if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE))
+                       BUG("%s with flags 0x%x should not be in cache-tree",
+                           ce->name, ce->ce_flags);
+               name = ce->name + path->len;
+               slash = strchr(name, '/');
+               if (slash) {
+                       entlen = slash - name;
+                       sub = find_subtree(it, ce->name + path->len, entlen, 0);
+                       if (!sub || sub->cache_tree->entry_count < 0)
+                               BUG("bad subtree '%.*s'", entlen, name);
+                       oid = &sub->cache_tree->oid;
+                       mode = S_IFDIR;
+                       i += sub->cache_tree->entry_count;
+               } else {
+                       oid = &ce->oid;
+                       mode = ce->ce_mode;
+                       entlen = ce_namelen(ce) - path->len;
+                       i++;
+               }
+               strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0');
+               strbuf_add(&tree_buf, oid->hash, the_hash_algo->rawsz);
+       }
+       hash_object_file(tree_buf.buf, tree_buf.len, tree_type, &new_oid);
+       if (oidcmp(&new_oid, &it->oid))
+               BUG("cache-tree for path %.*s does not match. "
+                   "Expected %s got %s", len, path->buf,
+                   oid_to_hex(&new_oid), oid_to_hex(&it->oid));
+       strbuf_setlen(path, len);
+       strbuf_release(&tree_buf);
+}
+
+void cache_tree_verify(struct index_state *istate)
+{
+       struct strbuf path = STRBUF_INIT;
+
+       if (!istate->cache_tree)
+               return;
+       verify_one(istate, istate->cache_tree, &path);
+       strbuf_release(&path);
+}
index fc0c842e773f648d64858d1891f64d222633fc3a..0ab6784ffea5da0581c905738aaa66215266b5f9 100644 (file)
@@ -32,6 +32,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
 
 int cache_tree_fully_valid(struct cache_tree *);
 int cache_tree_update(struct index_state *, int);
+void cache_tree_verify(struct index_state *);
 
 /* bitmasks to write_cache_as_tree flags */
 #define WRITE_TREE_MISSING_OK 1
index 88a98b1c06e8792136eced81a7da5f9aacb8542f..70cddbf43664a42b5647ed21af279c1492abb790 100644 (file)
@@ -518,11 +518,11 @@ static int diff_cache(struct rev_info *revs,
 int run_diff_index(struct rev_info *revs, int cached)
 {
        struct object_array_entry *ent;
-       uint64_t start = getnanotime();
 
        if (revs->pending.nr != 1)
                BUG("run_diff_index must be passed exactly one tree");
 
+       trace_performance_enter();
        ent = revs->pending.objects;
        if (diff_cache(revs, &ent->item->oid, ent->name, cached))
                exit(128);
@@ -531,7 +531,7 @@ int run_diff_index(struct rev_info *revs, int cached)
        diffcore_fix_diff_index(&revs->diffopt);
        diffcore_std(&revs->diffopt);
        diff_flush(&revs->diffopt);
-       trace_performance_since(start, "diff-index");
+       trace_performance_leave("diff-index");
        return 0;
 }
 
diff --git a/dir.c b/dir.c
index aceb0d48692b7d727cfd2645ae88b0d45d660c09..995b8e32616055cdb228d30e3e82e9d70d24512e 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -2268,10 +2268,13 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
                   const char *path, int len, const struct pathspec *pathspec)
 {
        struct untracked_cache_dir *untracked;
-       uint64_t start = getnanotime();
 
-       if (has_symlink_leading_path(path, len))
+       trace_performance_enter();
+
+       if (has_symlink_leading_path(path, len)) {
+               trace_performance_leave("read directory %.*s", len, path);
                return dir->nr;
+       }
 
        untracked = validate_untracked_cache(dir, len, pathspec);
        if (!untracked)
@@ -2307,7 +2310,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
                dir->nr = i;
        }
 
-       trace_performance_since(start, "read directory %.*s", len, path);
+       trace_performance_leave("read directory %.*s", len, path);
        if (dir->untracked) {
                static int force_untracked_cache = -1;
                static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
index 163849831c9f11316ce97c649b77c32cf2eed276..1fcda73cb396e305a75aa8dc5869aaf85011761e 100644 (file)
@@ -578,10 +578,10 @@ static void threaded_lazy_init_name_hash(
 
 static void lazy_init_name_hash(struct index_state *istate)
 {
-       uint64_t start = getnanotime();
 
        if (istate->name_hash_initialized)
                return;
+       trace_performance_enter();
        hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr);
        hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);
 
@@ -602,7 +602,7 @@ static void lazy_init_name_hash(struct index_state *istate)
        }
 
        istate->name_hash_initialized = 1;
-       trace_performance_since(start, "initialize name hash");
+       trace_performance_leave("initialize name hash");
 }
 
 /*
index 71cd2437a3b33b343696bf96067603e8dc9e4464..f7365761f4725d6f7ddbb6d8c9c8fe92488de6ce 100644 (file)
@@ -78,7 +78,6 @@ static void preload_index(struct index_state *index,
 {
        int threads, i, work, offset;
        struct thread_data data[MAX_PARALLEL];
-       uint64_t start = getnanotime();
 
        if (!core_preload_index)
                return;
@@ -88,6 +87,7 @@ static void preload_index(struct index_state *index,
                threads = 2;
        if (threads < 2)
                return;
+       trace_performance_enter();
        if (threads > MAX_PARALLEL)
                threads = MAX_PARALLEL;
        offset = 0;
@@ -109,7 +109,7 @@ static void preload_index(struct index_state *index,
                if (pthread_join(p->pthread, NULL))
                        die("unable to join threaded lstat");
        }
-       trace_performance_since(start, "preload index");
+       trace_performance_leave("preload index");
 }
 #endif
 
index 7b1354d7590a70ecbd6e508bdd95eafd4793efcc..d7b617f320939ad2e3659223d04e7537b2707658 100644 (file)
@@ -1476,8 +1476,8 @@ int refresh_index(struct index_state *istate, unsigned int flags,
        const char *typechange_fmt;
        const char *added_fmt;
        const char *unmerged_fmt;
-       uint64_t start = getnanotime();
 
+       trace_performance_enter();
        modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
        deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
        typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
@@ -1547,7 +1547,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 
                replace_index_entry(istate, i, new_entry);
        }
-       trace_performance_since(start, "refresh index");
+       trace_performance_leave("refresh index");
        return has_errors;
 }
 
@@ -2002,7 +2002,6 @@ static void freshen_shared_index(const char *shared_index, int warn)
 int read_index_from(struct index_state *istate, const char *path,
                    const char *gitdir)
 {
-       uint64_t start = getnanotime();
        struct split_index *split_index;
        int ret;
        char *base_oid_hex;
@@ -2012,8 +2011,9 @@ int read_index_from(struct index_state *istate, const char *path,
        if (istate->initialized)
                return istate->cache_nr;
 
+       trace_performance_enter();
        ret = do_read_index(istate, path, 0);
-       trace_performance_since(start, "read cache %s", path);
+       trace_performance_leave("read cache %s", path);
 
        split_index = istate->split_index;
        if (!split_index || is_null_oid(&split_index->base_oid)) {
@@ -2021,6 +2021,7 @@ int read_index_from(struct index_state *istate, const char *path,
                return ret;
        }
 
+       trace_performance_enter();
        if (split_index->base)
                discard_index(split_index->base);
        else
@@ -2037,8 +2038,8 @@ int read_index_from(struct index_state *istate, const char *path,
        freshen_shared_index(base_path, 0);
        merge_base_index(istate);
        post_read_index_from(istate);
-       trace_performance_since(start, "read cache %s", base_path);
        free(base_path);
+       trace_performance_leave("read cache %s", base_path);
        return ret;
 }
 
@@ -2743,6 +2744,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
        int new_shared_index, ret;
        struct split_index *si = istate->split_index;
 
+       if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
+               cache_tree_verify(istate);
+
        if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) {
                if (flags & COMMIT_LOCK)
                        rollback_lock_file(lock);
@@ -2939,6 +2943,8 @@ void move_index_extensions(struct index_state *dst, struct index_state *src)
 {
        dst->untracked = src->untracked;
        src->untracked = NULL;
+       dst->cache_tree = src->cache_tree;
+       src->cache_tree = NULL;
 }
 
 struct cache_entry *dup_cache_entry(const struct cache_entry *ce,
index 9028b47d923ca027a146da82060dae395d3f7999..204b9f4cc5cbf8ad265423f6062cd99c24dd33d3 100644 (file)
--- a/t/README
+++ b/t/README
@@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE=<n> exercises the uncomon pack-objects code
 path where deltas larger than this limit require extra memory
 allocation for bookkeeping.
 
+GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES=<boolean> checks that cache-tree
+records are valid when the index is written out or after a merge. This
+is mostly to catch missing invalidation. Default is true.
+
 Naming Tests
 ------------
 
index 44288cbb598435a5dfff05e8e895cca85e53e804..3f95bfda605f7ad1f9b0b0385ffdc91e5cca415e 100644 (file)
@@ -1083,6 +1083,12 @@ else
        test_set_prereq C_LOCALE_OUTPUT
 fi
 
+if test -z "$GIT_TEST_CHECK_CACHE_TREE"
+then
+       GIT_TEST_CHECK_CACHE_TREE=true
+       export GIT_TEST_CHECK_CACHE_TREE
+fi
+
 test_lazy_prereq PIPE '
        # test whether the filesystem supports FIFOs
        test_have_prereq !MINGW,!CYGWIN &&
diff --git a/trace.c b/trace.c
index fc623e91fdd7ed8268922ae0460cfbd6903f3800..fa4a2e7120e405f3cf2d12422802b785f9e37fad 100644 (file)
--- a/trace.c
+++ b/trace.c
@@ -176,10 +176,30 @@ void trace_strbuf_fl(const char *file, int line, struct trace_key *key,
        strbuf_release(&buf);
 }
 
+static uint64_t perf_start_times[10];
+static int perf_indent;
+
+uint64_t trace_performance_enter(void)
+{
+       uint64_t now;
+
+       if (!trace_want(&trace_perf_key))
+               return 0;
+
+       now = getnanotime();
+       perf_start_times[perf_indent] = now;
+       if (perf_indent + 1 < ARRAY_SIZE(perf_start_times))
+               perf_indent++;
+       else
+               BUG("Too deep indentation");
+       return now;
+}
+
 static void trace_performance_vprintf_fl(const char *file, int line,
                                         uint64_t nanos, const char *format,
                                         va_list ap)
 {
+       static const char space[] = "          ";
        struct strbuf buf = STRBUF_INIT;
 
        if (!prepare_trace_line(file, line, &trace_perf_key, &buf))
@@ -188,7 +208,10 @@ static void trace_performance_vprintf_fl(const char *file, int line,
        strbuf_addf(&buf, "performance: %.9f s", (double) nanos / 1000000000);
 
        if (format && *format) {
-               strbuf_addstr(&buf, ": ");
+               if (perf_indent >= strlen(space))
+                       BUG("Too deep indentation");
+
+               strbuf_addf(&buf, ":%.*s ", perf_indent, space);
                strbuf_vaddf(&buf, format, ap);
        }
 
@@ -244,6 +267,24 @@ void trace_performance_since(uint64_t start, const char *format, ...)
        va_end(ap);
 }
 
+void trace_performance_leave(const char *format, ...)
+{
+       va_list ap;
+       uint64_t since;
+
+       if (perf_indent)
+               perf_indent--;
+
+       if (!format) /* Allow callers to leave without tracing anything */
+               return;
+
+       since = perf_start_times[perf_indent];
+       va_start(ap, format);
+       trace_performance_vprintf_fl(NULL, 0, getnanotime() - since,
+                                    format, ap);
+       va_end(ap);
+}
+
 #else
 
 void trace_printf_key_fl(const char *file, int line, struct trace_key *key,
@@ -273,6 +314,24 @@ void trace_performance_fl(const char *file, int line, uint64_t nanos,
        va_end(ap);
 }
 
+void trace_performance_leave_fl(const char *file, int line,
+                               uint64_t nanos, const char *format, ...)
+{
+       va_list ap;
+       uint64_t since;
+
+       if (perf_indent)
+               perf_indent--;
+
+       if (!format) /* Allow callers to leave without tracing anything */
+               return;
+
+       since = perf_start_times[perf_indent];
+       va_start(ap, format);
+       trace_performance_vprintf_fl(file, line, nanos - since, format, ap);
+       va_end(ap);
+}
+
 #endif /* HAVE_VARIADIC_MACROS */
 
 
@@ -411,13 +470,11 @@ uint64_t getnanotime(void)
        }
 }
 
-static uint64_t command_start_time;
 static struct strbuf command_line = STRBUF_INIT;
 
 static void print_command_performance_atexit(void)
 {
-       trace_performance_since(command_start_time, "git command:%s",
-                               command_line.buf);
+       trace_performance_leave("git command:%s", command_line.buf);
 }
 
 void trace_command_performance(const char **argv)
@@ -425,10 +482,10 @@ void trace_command_performance(const char **argv)
        if (!trace_want(&trace_perf_key))
                return;
 
-       if (!command_start_time)
+       if (!command_line.len)
                atexit(print_command_performance_atexit);
 
        strbuf_reset(&command_line);
        sq_quote_argv_pretty(&command_line, argv);
-       command_start_time = getnanotime();
+       trace_performance_enter();
 }
diff --git a/trace.h b/trace.h
index 2b6a1bc17c2cc1a8642d8c7bd460808638f28d77..171b256d261c771927541cf4d431b926b3ba102b 100644 (file)
--- a/trace.h
+++ b/trace.h
@@ -23,6 +23,7 @@ extern void trace_disable(struct trace_key *key);
 extern uint64_t getnanotime(void);
 extern void trace_command_performance(const char **argv);
 extern void trace_verbatim(struct trace_key *key, const void *buf, unsigned len);
+uint64_t trace_performance_enter(void);
 
 #ifndef HAVE_VARIADIC_MACROS
 
@@ -45,6 +46,9 @@ extern void trace_performance(uint64_t nanos, const char *format, ...);
 __attribute__((format (printf, 2, 3)))
 extern void trace_performance_since(uint64_t start, const char *format, ...);
 
+__attribute__((format (printf, 1, 2)))
+void trace_performance_leave(const char *format, ...);
+
 #else
 
 /*
@@ -118,6 +122,14 @@ extern void trace_performance_since(uint64_t start, const char *format, ...);
                                             __VA_ARGS__);                  \
        } while (0)
 
+#define trace_performance_leave(...)                                       \
+       do {                                                                \
+               if (trace_pass_fl(&trace_perf_key))                         \
+                       trace_performance_leave_fl(TRACE_CONTEXT, __LINE__, \
+                                                  getnanotime(),           \
+                                                  __VA_ARGS__);            \
+       } while (0)
+
 /* backend functions, use non-*fl macros instead */
 __attribute__((format (printf, 4, 5)))
 extern void trace_printf_key_fl(const char *file, int line, struct trace_key *key,
@@ -130,6 +142,9 @@ extern void trace_strbuf_fl(const char *file, int line, struct trace_key *key,
 __attribute__((format (printf, 4, 5)))
 extern void trace_performance_fl(const char *file, int line,
                                 uint64_t nanos, const char *fmt, ...);
+__attribute__((format (printf, 4, 5)))
+extern void trace_performance_leave_fl(const char *file, int line,
+                                      uint64_t nanos, const char *fmt, ...);
 static inline int trace_pass_fl(struct trace_key *key)
 {
        return key->fd || !key->initialized;
index cfa88bb6ec8238a6a6a41d32d646610fd2699048..55f864ac577e5951db5345c358a0ec8bb8291919 100644 (file)
@@ -385,6 +385,7 @@ static int check_updates(struct unpack_trees_options *o)
        struct checkout state = CHECKOUT_INIT;
        int i;
 
+       trace_performance_enter();
        state.force = 1;
        state.quiet = 1;
        state.refresh_cache = 1;
@@ -461,6 +462,7 @@ static int check_updates(struct unpack_trees_options *o)
        if (o->clone)
                report_collided_checkout(index);
 
+       trace_performance_leave("check_updates");
        return errs != 0;
 }
 
@@ -680,6 +682,113 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam
        return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
 }
 
+static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
+                                       struct name_entry *names,
+                                       struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int i;
+
+       if (!o->merge || dirmask != ((1 << n) - 1))
+               return 0;
+
+       for (i = 1; i < n; i++)
+               if (!are_same_oid(names, names + i))
+                       return 0;
+
+       return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
+}
+
+static int index_pos_by_traverse_info(struct name_entry *names,
+                                     struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int len = traverse_path_len(info, names);
+       char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
+       int pos;
+
+       make_traverse_path(name, info, names);
+       name[len++] = '/';
+       name[len] = '\0';
+       pos = index_name_pos(o->src_index, name, len);
+       if (pos >= 0)
+               BUG("This is a directory and should not exist in index");
+       pos = -pos - 1;
+       if (!starts_with(o->src_index->cache[pos]->name, name) ||
+           (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
+               BUG("pos must point at the first entry in this directory");
+       free(name);
+       return pos;
+}
+
+/*
+ * Fast path if we detect that all trees are the same as cache-tree at this
+ * path. We'll walk these trees in an iterative loop using cache-tree/index
+ * instead of ODB since we already know what these trees contain.
+ */
+static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
+                                 struct name_entry *names,
+                                 struct traverse_info *info)
+{
+       struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
+       struct unpack_trees_options *o = info->data;
+       struct cache_entry *tree_ce = NULL;
+       int ce_len = 0;
+       int i, d;
+
+       if (!o->merge)
+               BUG("We need cache-tree to do this optimization");
+
+       /*
+        * Do what unpack_callback() and unpack_nondirectories() normally
+        * do. But we walk all paths in an iterative loop instead.
+        *
+        * D/F conflicts and higher stage entries are not a concern
+        * because cache-tree would be invalidated and we would never
+        * get here in the first place.
+        */
+       for (i = 0; i < nr_entries; i++) {
+               int new_ce_len, len, rc;
+
+               src[0] = o->src_index->cache[pos + i];
+
+               len = ce_namelen(src[0]);
+               new_ce_len = cache_entry_size(len);
+
+               if (new_ce_len > ce_len) {
+                       new_ce_len <<= 1;
+                       tree_ce = xrealloc(tree_ce, new_ce_len);
+                       memset(tree_ce, 0, new_ce_len);
+                       ce_len = new_ce_len;
+
+                       tree_ce->ce_flags = create_ce_flags(0);
+
+                       for (d = 1; d <= nr_names; d++)
+                               src[d] = tree_ce;
+               }
+
+               tree_ce->ce_mode = src[0]->ce_mode;
+               tree_ce->ce_namelen = len;
+               oidcpy(&tree_ce->oid, &src[0]->oid);
+               memcpy(tree_ce->name, src[0]->name, len + 1);
+
+               rc = call_unpack_fn((const struct cache_entry * const *)src, o);
+               if (rc < 0) {
+                       free(tree_ce);
+                       return rc;
+               }
+
+               mark_ce_used(src[0], o);
+       }
+       free(tree_ce);
+       if (o->debug_unpack)
+               printf("Unpacked %d entries from %s to %s using cache-tree\n",
+                      nr_entries,
+                      o->src_index->cache[pos]->name,
+                      o->src_index->cache[pos + nr_entries - 1]->name);
+       return 0;
+}
+
 static int traverse_trees_recursive(int n, unsigned long dirmask,
                                    unsigned long df_conflicts,
                                    struct name_entry *names,
@@ -691,6 +800,27 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
        void *buf[MAX_UNPACK_TREES];
        struct traverse_info newinfo;
        struct name_entry *p;
+       int nr_entries;
+
+       nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
+       if (nr_entries > 0) {
+               struct unpack_trees_options *o = info->data;
+               int pos = index_pos_by_traverse_info(names, info);
+
+               if (!o->merge || df_conflicts)
+                       BUG("Wrong condition to get here buddy");
+
+               /*
+                * All entries up to 'pos' must have been processed
+                * (i.e. marked CE_UNPACKED) at this point. But to be safe,
+                * save and restore cache_bottom anyway to not miss
+                * unprocessed entries before 'pos'.
+                */
+               bottom = o->cache_bottom;
+               ret = traverse_by_cache_tree(pos, nr_entries, n, names, info);
+               o->cache_bottom = bottom;
+               return ret;
+       }
 
        p = names;
        while (!p->mode)
@@ -857,6 +987,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
        return ce;
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_nondirectories(int n, unsigned long mask,
                                 unsigned long dirmask,
                                 struct cache_entry **src,
@@ -1049,6 +1184,11 @@ static void debug_unpack_callback(int n,
                debug_name_entry(i, names + i);
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
 {
        struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
@@ -1336,6 +1476,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
        if (len > MAX_UNPACK_TREES)
                die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
 
+       trace_performance_enter();
        memset(&el, 0, sizeof(el));
        if (!core_apply_sparse_checkout || !o->update)
                o->skip_sparse_checkout = 1;
@@ -1408,7 +1549,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
                        }
                }
 
-               if (traverse_trees(len, t, &info) < 0)
+               trace_performance_enter();
+               ret = traverse_trees(len, t, &info);
+               trace_performance_leave("traverse_trees");
+               if (ret < 0)
                        goto return_failed;
        }
 
@@ -1483,7 +1627,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 
        ret = check_updates(o) ? (-2) : 0;
        if (o->dst_index) {
+               move_index_extensions(&o->result, o->src_index);
                if (!ret) {
+                       if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
+                               cache_tree_verify(&o->result);
                        if (!o->result.cache_tree)
                                o->result.cache_tree = cache_tree();
                        if (!cache_tree_fully_valid(o->result.cache_tree))
@@ -1491,7 +1638,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
                                                  WRITE_TREE_SILENT |
                                                  WRITE_TREE_REPAIR);
                }
-               move_index_extensions(&o->result, o->src_index);
                discard_index(o->dst_index);
                *o->dst_index = o->result;
        } else {
@@ -1500,6 +1646,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
        o->src_index = NULL;
 
 done:
+       trace_performance_leave("unpack_trees");
        clear_exclude_list(&el);
        return ret;
 
@@ -1691,6 +1838,7 @@ static int verify_clean_subdirectory(const struct cache_entry *ce,
                        if (verify_uptodate(ce2, o))
                                return -1;
                        add_entry(o, ce2, CE_REMOVE, 0);
+                       invalidate_ce_path(ce, o);
                        mark_ce_used(ce2, o);
                }
                cnt++;
@@ -1950,6 +2098,8 @@ static int keep_entry(const struct cache_entry *ce,
                      struct unpack_trees_options *o)
 {
        add_entry(o, ce, 0, 0);
+       if (ce_stage(ce))
+               invalidate_ce_path(ce, o);
        return 1;
 }