checkout: avoid unnecessary match_pathspec calls
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Wed, 27 Mar 2013 05:58:21 +0000 (12:58 +0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 27 Mar 2013 15:53:15 +0000 (08:53 -0700)
In checkout_paths() we do this

- for all updated items, call match_pathspec
- for all items, call match_pathspec (inside unmerge_cache)
- for all items, call match_pathspec (for showing "path .. is unmerged)
- for updated items, call match_pathspec and update paths

That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per updated index entry,
save the result in ce_flags and reuse the results in the following
loops.

The changes in 0a1283b (checkout $tree $path: do not clobber local
changes in $path not in $tree - 2011-09-30) limit the affected paths
to ones we read from $tree. We do not do anything to other modified
entries in this case, so the "for all items" above could be modified
to "for all updated items". But..

The command's behavior now is modified slightly: unmerged entries that
match $path, but not updated by $tree, are now NOT touched. Although
this should be considered a bug fix, not a regression. A new test is
added for this change.

And while at there, free ps_matched after use.

The following command is tested on webkit, 215k entries. The pattern
is chosen mainly to make match_pathspec sweat:

git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"

before after
real 0m3.493s 0m2.737s
user 0m2.239s 0m1.586s
sys 0m1.252s 0m1.151s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/checkout.c
cache.h
resolve-undo.c
resolve-undo.h
t/t2022-checkout-paths.sh
index a9c1b5a95fea0a805819f995da2d2f1ba194433d..f8033f446e8f9ef1807aa28c2f9eca4e821c42c9 100644 (file)
@@ -271,24 +271,55 @@ static int checkout_paths(const struct checkout_opts *opts,
                ;
        ps_matched = xcalloc(1, pos);
 
+       /*
+        * Make sure all pathspecs participated in locating the paths
+        * to be checked out.
+        */
        for (pos = 0; pos < active_nr; pos++) {
                struct cache_entry *ce = active_cache[pos];
+               ce->ce_flags &= ~CE_MATCHED;
                if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
+                       /*
+                        * "git checkout tree-ish -- path", but this entry
+                        * is in the original index; it will not be checked
+                        * out to the working tree and it does not matter
+                        * if pathspec matched this entry.  We will not do
+                        * anything to this entry at all.
+                        */
                        continue;
-               match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+               /*
+                * Either this entry came from the tree-ish we are
+                * checking the paths out of, or we are checking out
+                * of the index.
+                *
+                * If it comes from the tree-ish, we already know it
+                * matches the pathspec and could just stamp
+                * CE_MATCHED to it from update_some(). But we still
+                * need ps_matched and read_tree_recursive (and
+                * eventually tree_entry_interesting) cannot fill
+                * ps_matched yet. Once it can, we can avoid calling
+                * match_pathspec() for _all_ entries when
+                * opts->source_tree != NULL.
+                */
+               if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
+                                  0, ps_matched))
+                       ce->ce_flags |= CE_MATCHED;
        }
 
-       if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
+       if (report_path_error(ps_matched, opts->pathspec, opts->prefix)) {
+               free(ps_matched);
                return 1;
+       }
+       free(ps_matched);
 
        /* "checkout -m path" to recreate conflicted state */
        if (opts->merge)
-               unmerge_cache(opts->pathspec);
+               unmerge_marked_index(&the_index);
 
        /* Any unmerged paths? */
        for (pos = 0; pos < active_nr; pos++) {
                struct cache_entry *ce = active_cache[pos];
-               if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+               if (ce->ce_flags & CE_MATCHED) {
                        if (!ce_stage(ce))
                                continue;
                        if (opts->force) {
@@ -313,9 +344,7 @@ static int checkout_paths(const struct checkout_opts *opts,
        state.refresh_cache = 1;
        for (pos = 0; pos < active_nr; pos++) {
                struct cache_entry *ce = active_cache[pos];
-               if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
-                       continue;
-               if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+               if (ce->ce_flags & CE_MATCHED) {
                        if (!ce_stage(ce)) {
                                errs |= checkout_entry(ce, &state, NULL);
                                continue;
diff --git a/cache.h b/cache.h
index c56315ccc3605654ff4a08a8b74cc8f25cfb2b41..54a42a11786c183a79aa8b87ae021f132cc998ed 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -162,6 +162,9 @@ struct cache_entry {
 #define CE_UNPACKED          (1 << 24)
 #define CE_NEW_SKIP_WORKTREE (1 << 25)
 
+/* used to temporarily mark paths matched by pathspecs */
+#define CE_MATCHED           (1 << 26)
+
 /*
  * Extended on-disk flags
  */
index 72b46125b719861641a55ee8fd534e0d1f47a94f..639eb9c59f355e46bdd53cf13fd94e8a6a9537da 100644 (file)
@@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
        struct cache_entry *ce;
        struct string_list_item *item;
        struct resolve_undo_info *ru;
-       int i, err = 0;
+       int i, err = 0, matched;
 
        if (!istate->resolve_undo)
                return pos;
@@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
        ru = item->util;
        if (!ru)
                return pos;
+       matched = ce->ce_flags & CE_MATCHED;
        remove_index_entry_at(istate, pos);
        for (i = 0; i < 3; i++) {
                struct cache_entry *nce;
@@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
                        continue;
                nce = make_cache_entry(ru->mode[i], ru->sha1[i],
                                       ce->name, i + 1, 0);
+               if (matched)
+                       nce->ce_flags |= CE_MATCHED;
                if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
                        err = 1;
                        error("cannot unmerge '%s'", ce->name);
@@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
        return unmerge_index_entry_at(istate, pos);
 }
 
+void unmerge_marked_index(struct index_state *istate)
+{
+       int i;
+
+       if (!istate->resolve_undo)
+               return;
+
+       for (i = 0; i < istate->cache_nr; i++) {
+               struct cache_entry *ce = istate->cache[i];
+               if (ce->ce_flags & CE_MATCHED)
+                       i = unmerge_index_entry_at(istate, i);
+       }
+}
+
 void unmerge_index(struct index_state *istate, const char **pathspec)
 {
        int i;
index 845876911db978c6262dacd9aa122ce9d55bf234..7a30206aad1fdee74f7e7b6e5967f9f8d9048dbf 100644 (file)
@@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
 extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
+extern void unmerge_marked_index(struct index_state *);
 
 #endif
index 56090d2ebadcdfb127b9db005ae8d092b0255a75..8e3545d8680c5f5179977082849388b1b31c17d8 100755 (executable)
@@ -39,4 +39,26 @@ test_expect_success 'checking out paths out of a tree does not clobber unrelated
        test_cmp expect.next2 dir/next2
 '
 
+test_expect_success 'do not touch unmerged entries matching $path but not in $tree' '
+       git checkout next &&
+       git reset --hard &&
+
+       cat dir/common >expect.common &&
+       EMPTY_SHA1=$(git hash-object -w --stdin </dev/null) &&
+       git rm dir/next0 &&
+       cat >expect.next0 <<-EOF &&
+       100644 $EMPTY_SHA1 1    dir/next0
+       100644 $EMPTY_SHA1 2    dir/next0
+       EOF
+       git update-index --index-info <expect.next0 &&
+
+       git checkout master dir &&
+
+       test_cmp expect.common dir/common &&
+       test_path_is_file dir/master &&
+       git diff --exit-code master dir/master &&
+       git ls-files -s dir/next0 >actual.next0 &&
+       test_cmp expect.next0 actual.next0
+'
+
 test_done