Merge branch 'dt/unpack-compare-entry-optim'
authorJunio C Hamano <gitster@pobox.com>
Wed, 3 Feb 2016 22:16:06 +0000 (14:16 -0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 3 Feb 2016 22:16:06 +0000 (14:16 -0800)
"git checkout $branch" (and other operations that share the same
underlying machinery) has been optimized.

* dt/unpack-compare-entry-optim:
unpack-trees: fix accidentally quadratic behavior
do_compare_entry: use already-computed path

tree-walk.c
tree-walk.h
unpack-trees.c
index 6dccd2d5dd78e0ee71c1ebc771a99010a56db157..cd4bb2c38bdf40f497ece781ce3a9e61de6b4399 100644 (file)
@@ -320,6 +320,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
        struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
        struct strbuf base = STRBUF_INIT;
        int interesting = 1;
+       char *traverse_path;
 
        for (i = 0; i < n; i++)
                tx[i].d = t[i];
@@ -329,7 +330,11 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
                make_traverse_path(base.buf, info->prev, &info->name);
                base.buf[info->pathlen-1] = '/';
                strbuf_setlen(&base, info->pathlen);
+               traverse_path = xstrndup(base.buf, info->pathlen);
+       } else {
+               traverse_path = xstrndup(info->name.path, info->pathlen);
        }
+       info->traverse_path = traverse_path;
        for (;;) {
                int trees_used;
                unsigned long mask, dirmask;
@@ -411,6 +416,8 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
        for (i = 0; i < n; i++)
                free_extended_entry(tx + i);
        free(tx);
+       free(traverse_path);
+       info->traverse_path = NULL;
        strbuf_release(&base);
        return error;
 }
index 3b2f7bf17d37de5b475c415b41c6abb4281d5469..174eb617dfb9d6a85bcf1e309227c8b724089765 100644 (file)
@@ -59,6 +59,7 @@ enum follow_symlinks_result {
 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
 
 struct traverse_info {
+       const char *traverse_path;
        struct traverse_info *prev;
        struct name_entry name;
        int pathlen;
index 8e2032f4e592910d6f3336f95019647992113877..9f55cc28b9dd41231644053b49875671f5a505fa 100644 (file)
@@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
  * itself - the caller needs to do the final check for the cache
  * entry having more data at the end!
  */
-static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
+static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
        int len, pathlen, ce_len;
        const char *ce_name;
 
        if (info->prev) {
-               int cmp = do_compare_entry(ce, info->prev, &info->name);
+               int cmp = do_compare_entry_piecewise(ce, info->prev,
+                                                    &info->name);
                if (cmp)
                        return cmp;
        }
@@ -522,6 +523,39 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
        return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
 }
 
+static int do_compare_entry(const struct cache_entry *ce,
+                           const struct traverse_info *info,
+                           const struct name_entry *n)
+{
+       int len, pathlen, ce_len;
+       const char *ce_name;
+       int cmp;
+
+       /*
+        * If we have not precomputed the traverse path, it is quicker
+        * to avoid doing so.  But if we have precomputed it,
+        * it is quicker to use the precomputed version.
+        */
+       if (!info->traverse_path)
+               return do_compare_entry_piecewise(ce, info, n);
+
+       cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
+       if (cmp)
+               return cmp;
+
+       pathlen = info->pathlen;
+       ce_len = ce_namelen(ce);
+
+       if (ce_len < pathlen)
+               return -1;
+
+       ce_len -= pathlen;
+       ce_name = ce->name + pathlen;
+
+       len = tree_entry_len(n);
+       return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+}
+
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
        int cmp = do_compare_entry(ce, info, n);
@@ -661,8 +695,19 @@ static int find_cache_pos(struct traverse_info *info,
                                ++o->cache_bottom;
                        continue;
                }
-               if (!ce_in_traverse_path(ce, info))
+               if (!ce_in_traverse_path(ce, info)) {
+                       /*
+                        * Check if we can skip future cache checks
+                        * (because we're already past all possible
+                        * entries in the traverse path).
+                        */
+                       if (info->traverse_path) {
+                               if (strncmp(ce->name, info->traverse_path,
+                                           info->pathlen) > 0)
+                                       break;
+                       }
                        continue;
+               }
                ce_name = ce->name + pfxlen;
                ce_slash = strchr(ce_name, '/');
                if (ce_slash)