Merge branch 'jc/ls-files-killed-optim'
authorJunio C Hamano <gitster@pobox.com>
Wed, 11 Sep 2013 22:03:28 +0000 (15:03 -0700)
committerJunio C Hamano <gitster@pobox.com>
Wed, 11 Sep 2013 22:03:28 +0000 (15:03 -0700)
"git ls-files -k" needs to crawl only the part of the working tree
that may overlap the paths in the index to find killed files, but
shared code with the logic to find all the untracked files, which
made it unnecessarily inefficient.

* jc/ls-files-killed-optim:
dir.c::test_one_path(): work around directory_exists_in_index_icase() breakage
t3010: update to demonstrate "ls-files -k" optimization pitfalls
ls-files -k: a directory only can be killed if the index has a non-directory
dir.c: use the cache_* macro to access the current index

builtin/ls-files.c
dir.c
dir.h
t/t3010-ls-files-killed-modified.sh
index d4823c9d38e5e22dff62d979601e0b06b9c857f3..e1cf6d8547d4aa7f548fb80f0efb8f4e7b1d9c8e 100644 (file)
@@ -220,6 +220,8 @@ static void show_files(struct dir_struct *dir)
 
        /* For cached/deleted files we don't need to even do the readdir */
        if (show_others || show_killed) {
+               if (!show_others)
+                       dir->flags |= DIR_COLLECT_KILLED_ONLY;
                fill_directory(dir, &pathspec);
                if (show_others)
                        show_other_files(dir);
diff --git a/dir.c b/dir.c
index 1128110a441538215e5bc046183b11b6b49f129e..b439ff0e63481bebb4039901b029bad978b46929 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -429,15 +429,14 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
        unsigned long sz;
        enum object_type type;
        void *data;
-       struct index_state *istate = &the_index;
 
        len = strlen(path);
-       pos = index_name_pos(istate, path, len);
+       pos = cache_name_pos(path, len);
        if (pos < 0)
                return NULL;
-       if (!ce_skip_worktree(istate->cache[pos]))
+       if (!ce_skip_worktree(active_cache[pos]))
                return NULL;
-       data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz);
+       data = read_sha1_file(active_cache[pos]->sha1, &type, &sz);
        if (!data || type != OBJ_BLOB) {
                free(data);
                return NULL;
@@ -884,13 +883,13 @@ enum exist_status {
 };
 
 /*
- * Do not use the alphabetically stored index to look up
+ * Do not use the alphabetically sorted index to look up
  * the directory name; instead, use the case insensitive
  * name hash.
  */
 static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
 {
-       const struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
+       const struct cache_entry *ce = cache_name_exists(dirname, len + 1, ignore_case);
        unsigned char endchar;
 
        if (!ce)
@@ -1132,14 +1131,51 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
                                          int dtype, struct dirent *de)
 {
        int exclude;
+       int has_path_in_index = !!cache_name_exists(path->buf, path->len, ignore_case);
+
        if (dtype == DT_UNKNOWN)
                dtype = get_dtype(de, path->buf, path->len);
 
        /* Always exclude indexed files */
-       if (dtype != DT_DIR &&
-           cache_name_exists(path->buf, path->len, ignore_case))
+       if (dtype != DT_DIR && has_path_in_index)
                return path_none;
 
+       /*
+        * When we are looking at a directory P in the working tree,
+        * there are three cases:
+        *
+        * (1) P exists in the index.  Everything inside the directory P in
+        * the working tree needs to go when P is checked out from the
+        * index.
+        *
+        * (2) P does not exist in the index, but there is P/Q in the index.
+        * We know P will stay a directory when we check out the contents
+        * of the index, but we do not know yet if there is a directory
+        * P/Q in the working tree to be killed, so we need to recurse.
+        *
+        * (3) P does not exist in the index, and there is no P/Q in the index
+        * to require P to be a directory, either.  Only in this case, we
+        * know that everything inside P will not be killed without
+        * recursing.
+        */
+       if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
+           (dtype == DT_DIR) &&
+           !has_path_in_index) {
+               /*
+                * NEEDSWORK: directory_exists_in_index_icase()
+                * assumes that one byte past the given path is
+                * readable and has '/', which needs to be fixed, but
+                * until then, work it around in the caller.
+                */
+               strbuf_addch(path, '/');
+               if (directory_exists_in_index(path->buf, path->len - 1) ==
+                   index_nonexistent) {
+                       strbuf_setlen(path, path->len - 1);
+                       return path_none;
+               }
+               strbuf_setlen(path, path->len - 1);
+       }
+
        exclude = is_excluded(dir, path->buf, &dtype);
 
        /*
diff --git a/dir.h b/dir.h
index 343ec7aa3188a904f4517241b31250a57f11eddb..9b7e4e77d8b11bab92a91a6ce3e8920e50d23f9b 100644 (file)
--- a/dir.h
+++ b/dir.h
@@ -80,7 +80,8 @@ struct dir_struct {
                DIR_HIDE_EMPTY_DIRECTORIES = 1<<2,
                DIR_NO_GITLINKS = 1<<3,
                DIR_COLLECT_IGNORED = 1<<4,
-               DIR_SHOW_IGNORED_TOO = 1<<5
+               DIR_SHOW_IGNORED_TOO = 1<<5,
+               DIR_COLLECT_KILLED_ONLY = 1<<6
        } flags;
        struct dir_entry **entries;
        struct dir_entry **ignored;
index f611d799b690457e306571adf16813b238a27dc9..6d3b828a951e4c03b886087393269dbe4d5c5c8f 100755 (executable)
@@ -11,6 +11,7 @@ This test prepares the following in the cache:
     path1       - a symlink
     path2/file2 - a file in a directory
     path3/file3 - a file in a directory
+    pathx/ju    - a file in a directory
     submod1/   - a submodule
     submod2/   - another submodule
 
@@ -23,6 +24,7 @@ and the following on the filesystem:
     path4      - a file
     path5      - a symlink
     path6/file6 - a file in a directory
+    pathx/ju/nk - a file in a directory to be killed
     submod1/   - a submodule (modified from the cache)
     submod2/   - a submodule (matches the cache)
 
@@ -44,14 +46,15 @@ modified without reporting path9 and path10.  submod1 is also modified.
 test_expect_success 'git update-index --add to add various paths.' '
        date >path0 &&
        test_ln_s_add xyzzy path1 &&
-       mkdir path2 path3 &&
+       mkdir path2 path3 pathx &&
        date >path2/file2 &&
        date >path3/file3 &&
+       >pathx/ju &&
        : >path7 &&
        date >path8 &&
        : >path9 &&
        date >path10 &&
-       git update-index --add -- path0 path?/file? path7 path8 path9 path10 &&
+       git update-index --add -- path0 path?/file? pathx/ju path7 path8 path9 path10 &&
        for i in 1 2
        do
                git init submod$i &&
@@ -77,7 +80,7 @@ test_expect_success 'git ls-files -k to show killed files.' '
                date >path3 &&
                date >path5
        fi &&
-       mkdir path0 path1 path6 &&
+       mkdir -p path0 path1 path6 pathx/ju &&
        date >path0/file0 &&
        date >path1/file1 &&
        date >path6/file6 &&
@@ -85,16 +88,23 @@ test_expect_success 'git ls-files -k to show killed files.' '
        : >path8 &&
        : >path9 &&
        touch path10 &&
-       git ls-files -k >.output
-'
-
-test_expect_success 'validate git ls-files -k output.' '
-       cat >.expected <<-\EOF &&
+       >pathx/ju/nk &&
+       cat >.expected <<-\EOF
        path0/file0
        path1/file1
        path2
        path3
+       pathx/ju/nk
        EOF
+'
+
+test_expect_success 'git ls-files -k output (w/o icase)' '
+       git ls-files -k >.output
+       test_cmp .expected .output
+'
+
+test_expect_success 'git ls-files -k output (w/ icase)' '
+       git -c core.ignorecase=true ls-files -k >.output
        test_cmp .expected .output
 '
 
@@ -110,6 +120,7 @@ test_expect_success 'validate git ls-files -m output.' '
        path3/file3
        path7
        path8
+       pathx/ju
        submod1
        EOF
        test_cmp .expected .output