Document update for nd/unpack-trees-with-cache-tree
[gitweb.git] / unpack-trees.c
index cd0680f11e3580ef24e9140dd00a1e38f02557d8..515c37437308b097c7f26442de19bb35d92c8da8 100644 (file)
@@ -204,20 +204,11 @@ static int do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
                               ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
 }
 
-static struct cache_entry *dup_entry(const struct cache_entry *ce)
-{
-       unsigned int size = ce_size(ce);
-       struct cache_entry *new_entry = xmalloc(size);
-
-       memcpy(new_entry, ce, size);
-       return new_entry;
-}
-
 static void add_entry(struct unpack_trees_options *o,
                      const struct cache_entry *ce,
                      unsigned int set, unsigned int clear)
 {
-       do_add_entry(o, dup_entry(ce), set, clear);
+       do_add_entry(o, dup_cache_entry(ce, &o->result), set, clear);
 }
 
 /*
@@ -354,6 +345,7 @@ static int check_updates(struct unpack_trees_options *o)
        struct checkout state = CHECKOUT_INIT;
        int i;
 
+       trace_performance_enter();
        state.force = 1;
        state.quiet = 1;
        state.refresh_cache = 1;
@@ -423,6 +415,7 @@ static int check_updates(struct unpack_trees_options *o)
        errs |= finish_delayed_checkout(&state);
        if (o->update)
                git_attr_set_direction(GIT_ATTR_CHECKIN, NULL);
+       trace_performance_leave("check_updates");
        return errs != 0;
 }
 
@@ -642,6 +635,113 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam
        return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
 }
 
+static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
+                                       struct name_entry *names,
+                                       struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int i;
+
+       if (!o->merge || dirmask != ((1 << n) - 1))
+               return 0;
+
+       for (i = 1; i < n; i++)
+               if (!are_same_oid(names, names + i))
+                       return 0;
+
+       return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
+}
+
+static int index_pos_by_traverse_info(struct name_entry *names,
+                                     struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int len = traverse_path_len(info, names);
+       char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
+       int pos;
+
+       make_traverse_path(name, info, names);
+       name[len++] = '/';
+       name[len] = '\0';
+       pos = index_name_pos(o->src_index, name, len);
+       if (pos >= 0)
+               BUG("This is a directory and should not exist in index");
+       pos = -pos - 1;
+       if (!starts_with(o->src_index->cache[pos]->name, name) ||
+           (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
+               BUG("pos must point at the first entry in this directory");
+       free(name);
+       return pos;
+}
+
+/*
+ * Fast path if we detect that all trees are the same as cache-tree at this
+ * path. We'll walk these trees in an iterative loop using cache-tree/index
+ * instead of ODB since we already know what these trees contain.
+ */
+static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
+                                 struct name_entry *names,
+                                 struct traverse_info *info)
+{
+       struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
+       struct unpack_trees_options *o = info->data;
+       struct cache_entry *tree_ce = NULL;
+       int ce_len = 0;
+       int i, d;
+
+       if (!o->merge)
+               BUG("We need cache-tree to do this optimization");
+
+       /*
+        * Do what unpack_callback() and unpack_nondirectories() normally
+        * do. But we walk all paths in an iterative loop instead.
+        *
+        * D/F conflicts and higher stage entries are not a concern
+        * because cache-tree would be invalidated and we would never
+        * get here in the first place.
+        */
+       for (i = 0; i < nr_entries; i++) {
+               int new_ce_len, len, rc;
+
+               src[0] = o->src_index->cache[pos + i];
+
+               len = ce_namelen(src[0]);
+               new_ce_len = cache_entry_size(len);
+
+               if (new_ce_len > ce_len) {
+                       new_ce_len <<= 1;
+                       tree_ce = xrealloc(tree_ce, new_ce_len);
+                       memset(tree_ce, 0, new_ce_len);
+                       ce_len = new_ce_len;
+
+                       tree_ce->ce_flags = create_ce_flags(0);
+
+                       for (d = 1; d <= nr_names; d++)
+                               src[d] = tree_ce;
+               }
+
+               tree_ce->ce_mode = src[0]->ce_mode;
+               tree_ce->ce_namelen = len;
+               oidcpy(&tree_ce->oid, &src[0]->oid);
+               memcpy(tree_ce->name, src[0]->name, len + 1);
+
+               rc = call_unpack_fn((const struct cache_entry * const *)src, o);
+               if (rc < 0) {
+                       free(tree_ce);
+                       return rc;
+               }
+
+               mark_ce_used(src[0], o);
+       }
+       free(tree_ce);
+       if (o->debug_unpack)
+               printf("Unpacked %d entries from %s to %s using cache-tree\n",
+                      nr_entries,
+                      o->src_index->cache[pos]->name,
+                      o->src_index->cache[pos + nr_entries - 1]->name);
+       return 0;
+}
+
 static int traverse_trees_recursive(int n, unsigned long dirmask,
                                    unsigned long df_conflicts,
                                    struct name_entry *names,
@@ -653,6 +753,27 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
        void *buf[MAX_UNPACK_TREES];
        struct traverse_info newinfo;
        struct name_entry *p;
+       int nr_entries;
+
+       nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
+       if (nr_entries > 0) {
+               struct unpack_trees_options *o = info->data;
+               int pos = index_pos_by_traverse_info(names, info);
+
+               if (!o->merge || df_conflicts)
+                       BUG("Wrong condition to get here buddy");
+
+               /*
+                * All entries up to 'pos' must have been processed
+                * (i.e. marked CE_UNPACKED) at this point. But to be safe,
+                * save and restore cache_bottom anyway to not miss
+                * unprocessed entries before 'pos'.
+                */
+               bottom = o->cache_bottom;
+               ret = traverse_by_cache_tree(pos, nr_entries, n, names, info);
+               o->cache_bottom = bottom;
+               return ret;
+       }
 
        p = names;
        while (!p->mode)
@@ -798,10 +919,17 @@ static int ce_in_traverse_path(const struct cache_entry *ce,
        return (info->pathlen < ce_namelen(ce));
 }
 
-static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage)
+static struct cache_entry *create_ce_entry(const struct traverse_info *info,
+       const struct name_entry *n,
+       int stage,
+       struct index_state *istate,
+       int is_transient)
 {
        int len = traverse_path_len(info, n);
-       struct cache_entry *ce = xcalloc(1, cache_entry_size(len));
+       struct cache_entry *ce =
+               is_transient ?
+               make_empty_transient_cache_entry(len) :
+               make_empty_cache_entry(istate, len);
 
        ce->ce_mode = create_ce_mode(n->mode);
        ce->ce_flags = create_ce_flags(stage);
@@ -812,6 +940,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, con
        return ce;
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_nondirectories(int n, unsigned long mask,
                                 unsigned long dirmask,
                                 struct cache_entry **src,
@@ -847,7 +980,15 @@ static int unpack_nondirectories(int n, unsigned long mask,
                        stage = 3;
                else
                        stage = 2;
-               src[i + o->merge] = create_ce_entry(info, names + i, stage);
+
+               /*
+                * If the merge bit is set, then the cache entries are
+                * discarded in the following block.  In this case,
+                * construct "transient" cache_entries, as they are
+                * not stored in the index.  otherwise construct the
+                * cache entry from the index aware logic.
+                */
+               src[i + o->merge] = create_ce_entry(info, names + i, stage, &o->result, o->merge);
        }
 
        if (o->merge) {
@@ -856,7 +997,7 @@ static int unpack_nondirectories(int n, unsigned long mask,
                for (i = 0; i < n; i++) {
                        struct cache_entry *ce = src[i + o->merge];
                        if (ce != o->df_conflict_entry)
-                               free(ce);
+                               discard_cache_entry(ce);
                }
                return rc;
        }
@@ -996,6 +1137,11 @@ static void debug_unpack_callback(int n,
                debug_name_entry(i, names + i);
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
 {
        struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
@@ -1279,6 +1425,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
        if (len > MAX_UNPACK_TREES)
                die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
 
+       trace_performance_enter();
        memset(&el, 0, sizeof(el));
        if (!core_apply_sparse_checkout || !o->update)
                o->skip_sparse_checkout = 1;
@@ -1351,7 +1498,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
                        }
                }
 
-               if (traverse_trees(len, t, &info) < 0)
+               trace_performance_enter();
+               ret = traverse_trees(len, t, &info);
+               trace_performance_leave("traverse_trees");
+               if (ret < 0)
                        goto return_failed;
        }
 
@@ -1426,7 +1576,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 
        ret = check_updates(o) ? (-2) : 0;
        if (o->dst_index) {
+               move_index_extensions(&o->result, o->src_index);
                if (!ret) {
+                       if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
+                               cache_tree_verify(&o->result);
                        if (!o->result.cache_tree)
                                o->result.cache_tree = cache_tree();
                        if (!cache_tree_fully_valid(o->result.cache_tree))
@@ -1434,7 +1587,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
                                                  WRITE_TREE_SILENT |
                                                  WRITE_TREE_REPAIR);
                }
-               move_index_extensions(&o->result, o->src_index);
                discard_index(o->dst_index);
                *o->dst_index = o->result;
        } else {
@@ -1443,6 +1595,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
        o->src_index = NULL;
 
 done:
+       trace_performance_leave("unpack_trees");
        clear_exclude_list(&el);
        return ret;
 
@@ -1623,6 +1776,7 @@ static int verify_clean_subdirectory(const struct cache_entry *ce,
                        if (verify_uptodate(ce2, o))
                                return -1;
                        add_entry(o, ce2, CE_REMOVE, 0);
+                       invalidate_ce_path(ce, o);
                        mark_ce_used(ce2, o);
                }
                cnt++;
@@ -1788,7 +1942,7 @@ static int merged_entry(const struct cache_entry *ce,
                        struct unpack_trees_options *o)
 {
        int update = CE_UPDATE;
-       struct cache_entry *merge = dup_entry(ce);
+       struct cache_entry *merge = dup_cache_entry(ce, &o->result);
 
        if (!old) {
                /*
@@ -1808,7 +1962,7 @@ static int merged_entry(const struct cache_entry *ce,
 
                if (verify_absent(merge,
                                  ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) {
-                       free(merge);
+                       discard_cache_entry(merge);
                        return -1;
                }
                invalidate_ce_path(merge, o);
@@ -1834,7 +1988,7 @@ static int merged_entry(const struct cache_entry *ce,
                        update = 0;
                } else {
                        if (verify_uptodate(old, o)) {
-                               free(merge);
+                               discard_cache_entry(merge);
                                return -1;
                        }
                        /* Migrate old flags over */
@@ -1882,6 +2036,8 @@ static int keep_entry(const struct cache_entry *ce,
                      struct unpack_trees_options *o)
 {
        add_entry(o, ce, 0, 0);
+       if (ce_stage(ce))
+               invalidate_ce_path(ce, o);
        return 1;
 }