tag.c: use 'ref-filter' APIs
[gitweb.git] / dir.c
diff --git a/dir.c b/dir.c
index 56106c9ae8a954104d2f79aa2a352e7790eb21af..1ac8d5dd286837893af711037f718c01d3e62f10 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -13,6 +13,8 @@
 #include "wildmatch.h"
 #include "pathspec.h"
 #include "utf8.h"
+#include "varint.h"
+#include "ewah/ewok.h"
 
 struct path_simplify {
        int len;
@@ -32,8 +34,22 @@ enum path_treatment {
        path_untracked
 };
 
+/*
+ * Support data structure for our opendir/readdir/closedir wrappers
+ */
+struct cached_dir {
+       DIR *fdir;
+       struct untracked_cache_dir *untracked;
+       int nr_files;
+       int nr_dirs;
+
+       struct dirent *de;
+       const char *file;
+       struct untracked_cache_dir *ucd;
+};
+
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
-       const char *path, int len,
+       const char *path, int len, struct untracked_cache_dir *untracked,
        int check_only, const struct path_simplify *simplify);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
@@ -508,7 +524,8 @@ void add_exclude(const char *string, const char *base,
        x->el = el;
 }
 
-static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
+static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
+                                               struct sha1_stat *sha1_stat)
 {
        int pos, len;
        unsigned long sz;
@@ -527,6 +544,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
                return NULL;
        }
        *size = xsize_t(sz);
+       if (sha1_stat) {
+               memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
+               hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+       }
        return data;
 }
 
@@ -571,11 +592,93 @@ static void trim_trailing_spaces(char *buf)
                *last_space = '\0';
 }
 
-int add_excludes_from_file_to_list(const char *fname,
-                                  const char *base,
-                                  int baselen,
-                                  struct exclude_list *el,
-                                  int check_index)
+/*
+ * Given a subdirectory name and "dir" of the current directory,
+ * search the subdir in "dir" and return it, or create a new one if it
+ * does not exist in "dir".
+ *
+ * If "name" has the trailing slash, it'll be excluded in the search.
+ */
+static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+                                                   struct untracked_cache_dir *dir,
+                                                   const char *name, int len)
+{
+       int first, last;
+       struct untracked_cache_dir *d;
+       if (!dir)
+               return NULL;
+       if (len && name[len - 1] == '/')
+               len--;
+       first = 0;
+       last = dir->dirs_nr;
+       while (last > first) {
+               int cmp, next = (last + first) >> 1;
+               d = dir->dirs[next];
+               cmp = strncmp(name, d->name, len);
+               if (!cmp && strlen(d->name) > len)
+                       cmp = -1;
+               if (!cmp)
+                       return d;
+               if (cmp < 0) {
+                       last = next;
+                       continue;
+               }
+               first = next+1;
+       }
+
+       uc->dir_created++;
+       d = xmalloc(sizeof(*d) + len + 1);
+       memset(d, 0, sizeof(*d));
+       memcpy(d->name, name, len);
+       d->name[len] = '\0';
+
+       ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
+       memmove(dir->dirs + first + 1, dir->dirs + first,
+               (dir->dirs_nr - first) * sizeof(*dir->dirs));
+       dir->dirs_nr++;
+       dir->dirs[first] = d;
+       return d;
+}
+
+static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
+{
+       int i;
+       dir->valid = 0;
+       dir->untracked_nr = 0;
+       for (i = 0; i < dir->dirs_nr; i++)
+               do_invalidate_gitignore(dir->dirs[i]);
+}
+
+static void invalidate_gitignore(struct untracked_cache *uc,
+                                struct untracked_cache_dir *dir)
+{
+       uc->gitignore_invalidated++;
+       do_invalidate_gitignore(dir);
+}
+
+static void invalidate_directory(struct untracked_cache *uc,
+                                struct untracked_cache_dir *dir)
+{
+       int i;
+       uc->dir_invalidated++;
+       dir->valid = 0;
+       dir->untracked_nr = 0;
+       for (i = 0; i < dir->dirs_nr; i++)
+               dir->dirs[i]->recurse = 0;
+}
+
+/*
+ * Given a file with name "fname", read it (either from disk, or from
+ * the index if "check_index" is non-zero), parse it and store the
+ * exclude rules in "el".
+ *
+ * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
+ * stat data from disk (only valid if add_excludes returns zero). If
+ * ss_valid is non-zero, "ss" must contain good value as input.
+ */
+static int add_excludes(const char *fname, const char *base, int baselen,
+                       struct exclude_list *el, int check_index,
+                       struct sha1_stat *sha1_stat)
 {
        struct stat st;
        int fd, i, lineno = 1;
@@ -589,7 +692,7 @@ int add_excludes_from_file_to_list(const char *fname,
                if (0 <= fd)
                        close(fd);
                if (!check_index ||
-                   (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
+                   (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL)
                        return -1;
                if (size == 0) {
                        free(buf);
@@ -602,6 +705,11 @@ int add_excludes_from_file_to_list(const char *fname,
        } else {
                size = xsize_t(st.st_size);
                if (size == 0) {
+                       if (sha1_stat) {
+                               fill_stat_data(&sha1_stat->stat, &st);
+                               hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN);
+                               sha1_stat->valid = 1;
+                       }
                        close(fd);
                        return 0;
                }
@@ -613,6 +721,22 @@ int add_excludes_from_file_to_list(const char *fname,
                }
                buf[size++] = '\n';
                close(fd);
+               if (sha1_stat) {
+                       int pos;
+                       if (sha1_stat->valid &&
+                           !match_stat_data_racy(&the_index, &sha1_stat->stat, &st))
+                               ; /* no content change, ss->sha1 still good */
+                       else if (check_index &&
+                                (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
+                                !ce_stage(active_cache[pos]) &&
+                                ce_uptodate(active_cache[pos]) &&
+                                !would_convert_to_git(fname))
+                               hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+                       else
+                               hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
+                       fill_stat_data(&sha1_stat->stat, &st);
+                       sha1_stat->valid = 1;
+               }
        }
 
        el->filebuf = buf;
@@ -636,6 +760,13 @@ int add_excludes_from_file_to_list(const char *fname,
        return 0;
 }
 
+int add_excludes_from_file_to_list(const char *fname, const char *base,
+                                  int baselen, struct exclude_list *el,
+                                  int check_index)
+{
+       return add_excludes(fname, base, baselen, el, check_index, NULL);
+}
+
 struct exclude_list *add_exclude_list(struct dir_struct *dir,
                                      int group_type, const char *src)
 {
@@ -653,14 +784,28 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
 /*
  * Used to set up core.excludesfile and .git/info/exclude lists.
  */
-void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
+                                    struct sha1_stat *sha1_stat)
 {
        struct exclude_list *el;
+       /*
+        * catch setup_standard_excludes() that's called before
+        * dir->untracked is assigned. That function behaves
+        * differently when dir->untracked is non-NULL.
+        */
+       if (!dir->untracked)
+               dir->unmanaged_exclude_files++;
        el = add_exclude_list(dir, EXC_FILE, fname);
-       if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0)
+       if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
                die("cannot use %s as an exclude file", fname);
 }
 
+void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+{
+       dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
+       add_excludes_from_file_1(dir, fname, NULL);
+}
+
 int match_basename(const char *basename, int basenamelen,
                   const char *pattern, int prefix, int patternlen,
                   int flags)
@@ -835,6 +980,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
        struct exclude_list_group *group;
        struct exclude_list *el;
        struct exclude_stack *stk = NULL;
+       struct untracked_cache_dir *untracked;
        int current;
 
        group = &dir->exclude_list_group[EXC_DIRS];
@@ -872,8 +1018,14 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
        /* Read from the parent directories and push them down. */
        current = stk ? stk->baselen : -1;
        strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
+       if (dir->untracked)
+               untracked = stk ? stk->ucd : dir->untracked->root;
+       else
+               untracked = NULL;
+
        while (current < baselen) {
                const char *cp;
+               struct sha1_stat sha1_stat;
 
                stk = xcalloc(1, sizeof(*stk));
                if (current < 0) {
@@ -884,10 +1036,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
                        if (!cp)
                                die("oops in prep_exclude");
                        cp++;
+                       untracked =
+                               lookup_untracked(dir->untracked, untracked,
+                                                base + current,
+                                                cp - base - current);
                }
                stk->prev = dir->exclude_stack;
                stk->baselen = cp - base;
                stk->exclude_ix = group->nr;
+               stk->ucd = untracked;
                el = add_exclude_list(dir, EXC_DIRS, NULL);
                strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
                assert(stk->baselen == dir->basebuf.len);
@@ -910,7 +1067,23 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
                }
 
                /* Try to read per-directory file */
-               if (dir->exclude_per_dir) {
+               hashclr(sha1_stat.sha1);
+               sha1_stat.valid = 0;
+               if (dir->exclude_per_dir &&
+                   /*
+                    * If we know that no files have been added in
+                    * this directory (i.e. valid_cached_dir() has
+                    * been executed and set untracked->valid) ..
+                    */
+                   (!untracked || !untracked->valid ||
+                    /*
+                     * .. and .gitignore does not exist before
+                     * (i.e. null exclude_sha1 and skip_worktree is
+                     * not set). Then we can skip loading .gitignore,
+                     * which would result in ENOENT anyway.
+                     * skip_worktree is taken care in read_directory()
+                     */
+                    !is_null_sha1(untracked->exclude_sha1))) {
                        /*
                         * dir->basebuf gets reused by the traversal, but we
                         * need fname to remain unchanged to ensure the src
@@ -923,8 +1096,27 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
                        strbuf_addbuf(&sb, &dir->basebuf);
                        strbuf_addstr(&sb, dir->exclude_per_dir);
                        el->src = strbuf_detach(&sb, NULL);
-                       add_excludes_from_file_to_list(el->src, el->src,
-                                                      stk->baselen, el, 1);
+                       add_excludes(el->src, el->src, stk->baselen, el, 1,
+                                    untracked ? &sha1_stat : NULL);
+               }
+               /*
+                * NEEDSWORK: when untracked cache is enabled, prep_exclude()
+                * will first be called in valid_cached_dir() then maybe many
+                * times more in last_exclude_matching(). When the cache is
+                * used, last_exclude_matching() will not be called and
+                * reading .gitignore content will be a waste.
+                *
+                * So when it's called by valid_cached_dir() and we can get
+                * .gitignore SHA-1 from the index (i.e. .gitignore is not
+                * modified on work tree), we could delay reading the
+                * .gitignore content until we absolutely need it in
+                * last_exclude_matching(). Be careful about ignore rule
+                * order, though, if you do that.
+                */
+               if (untracked &&
+                   hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) {
+                       invalidate_gitignore(dir->untracked, untracked);
+                       hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
                }
                dir->exclude_stack = stk;
                current = stk->baselen;
@@ -1105,6 +1297,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
  *  (c) otherwise, we recurse into it.
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
+       struct untracked_cache_dir *untracked,
        const char *dirname, int len, int exclude,
        const struct path_simplify *simplify)
 {
@@ -1132,7 +1325,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
        if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
                return exclude ? path_excluded : path_untracked;
 
-       return read_directory_recursive(dir, dirname, len, 1, simplify);
+       untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+       return read_directory_recursive(dir, dirname, len,
+                                       untracked, 1, simplify);
 }
 
 /*
@@ -1248,6 +1443,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
 }
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
+                                         struct untracked_cache_dir *untracked,
                                          struct strbuf *path,
                                          const struct path_simplify *simplify,
                                          int dtype, struct dirent *de)
@@ -1300,7 +1496,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
                return path_none;
        case DT_DIR:
                strbuf_addch(path, '/');
-               return treat_directory(dir, path->buf, path->len, exclude,
+               return treat_directory(dir, untracked, path->buf, path->len, exclude,
                        simplify);
        case DT_REG:
        case DT_LNK:
@@ -1308,14 +1504,52 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
        }
 }
 
+static enum path_treatment treat_path_fast(struct dir_struct *dir,
+                                          struct untracked_cache_dir *untracked,
+                                          struct cached_dir *cdir,
+                                          struct strbuf *path,
+                                          int baselen,
+                                          const struct path_simplify *simplify)
+{
+       strbuf_setlen(path, baselen);
+       if (!cdir->ucd) {
+               strbuf_addstr(path, cdir->file);
+               return path_untracked;
+       }
+       strbuf_addstr(path, cdir->ucd->name);
+       /* treat_one_path() does this before it calls treat_directory() */
+       if (path->buf[path->len - 1] != '/')
+               strbuf_addch(path, '/');
+       if (cdir->ucd->check_only)
+               /*
+                * check_only is set as a result of treat_directory() getting
+                * to its bottom. Verify again the same set of directories
+                * with check_only set.
+                */
+               return read_directory_recursive(dir, path->buf, path->len,
+                                               cdir->ucd, 1, simplify);
+       /*
+        * We get path_recurse in the first run when
+        * directory_exists_in_index() returns index_nonexistent. We
+        * are sure that new changes in the index does not impact the
+        * outcome. Return now.
+        */
+       return path_recurse;
+}
+
 static enum path_treatment treat_path(struct dir_struct *dir,
-                                     struct dirent *de,
+                                     struct untracked_cache_dir *untracked,
+                                     struct cached_dir *cdir,
                                      struct strbuf *path,
                                      int baselen,
                                      const struct path_simplify *simplify)
 {
        int dtype;
+       struct dirent *de = cdir->de;
 
+       if (!de)
+               return treat_path_fast(dir, untracked, cdir, path,
+                                      baselen, simplify);
        if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
                return path_none;
        strbuf_setlen(path, baselen);
@@ -1324,7 +1558,121 @@ static enum path_treatment treat_path(struct dir_struct *dir,
                return path_none;
 
        dtype = DTYPE(de);
-       return treat_one_path(dir, path, simplify, dtype, de);
+       return treat_one_path(dir, untracked, path, simplify, dtype, de);
+}
+
+static void add_untracked(struct untracked_cache_dir *dir, const char *name)
+{
+       if (!dir)
+               return;
+       ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
+                  dir->untracked_alloc);
+       dir->untracked[dir->untracked_nr++] = xstrdup(name);
+}
+
+static int valid_cached_dir(struct dir_struct *dir,
+                           struct untracked_cache_dir *untracked,
+                           struct strbuf *path,
+                           int check_only)
+{
+       struct stat st;
+
+       if (!untracked)
+               return 0;
+
+       if (stat(path->len ? path->buf : ".", &st)) {
+               invalidate_directory(dir->untracked, untracked);
+               memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
+               return 0;
+       }
+       if (!untracked->valid ||
+           match_stat_data_racy(&the_index, &untracked->stat_data, &st)) {
+               if (untracked->valid)
+                       invalidate_directory(dir->untracked, untracked);
+               fill_stat_data(&untracked->stat_data, &st);
+               return 0;
+       }
+
+       if (untracked->check_only != !!check_only) {
+               invalidate_directory(dir->untracked, untracked);
+               return 0;
+       }
+
+       /*
+        * prep_exclude will be called eventually on this directory,
+        * but it's called much later in last_exclude_matching(). We
+        * need it now to determine the validity of the cache for this
+        * path. The next calls will be nearly no-op, the way
+        * prep_exclude() is designed.
+        */
+       if (path->len && path->buf[path->len - 1] != '/') {
+               strbuf_addch(path, '/');
+               prep_exclude(dir, path->buf, path->len);
+               strbuf_setlen(path, path->len - 1);
+       } else
+               prep_exclude(dir, path->buf, path->len);
+
+       /* hopefully prep_exclude() haven't invalidated this entry... */
+       return untracked->valid;
+}
+
+static int open_cached_dir(struct cached_dir *cdir,
+                          struct dir_struct *dir,
+                          struct untracked_cache_dir *untracked,
+                          struct strbuf *path,
+                          int check_only)
+{
+       memset(cdir, 0, sizeof(*cdir));
+       cdir->untracked = untracked;
+       if (valid_cached_dir(dir, untracked, path, check_only))
+               return 0;
+       cdir->fdir = opendir(path->len ? path->buf : ".");
+       if (dir->untracked)
+               dir->untracked->dir_opened++;
+       if (!cdir->fdir)
+               return -1;
+       return 0;
+}
+
+static int read_cached_dir(struct cached_dir *cdir)
+{
+       if (cdir->fdir) {
+               cdir->de = readdir(cdir->fdir);
+               if (!cdir->de)
+                       return -1;
+               return 0;
+       }
+       while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
+               struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+               if (!d->recurse) {
+                       cdir->nr_dirs++;
+                       continue;
+               }
+               cdir->ucd = d;
+               cdir->nr_dirs++;
+               return 0;
+       }
+       cdir->ucd = NULL;
+       if (cdir->nr_files < cdir->untracked->untracked_nr) {
+               struct untracked_cache_dir *d = cdir->untracked;
+               cdir->file = d->untracked[cdir->nr_files++];
+               return 0;
+       }
+       return -1;
+}
+
+static void close_cached_dir(struct cached_dir *cdir)
+{
+       if (cdir->fdir)
+               closedir(cdir->fdir);
+       /*
+        * We have gone through this directory and found no untracked
+        * entries. Mark it valid.
+        */
+       if (cdir->untracked) {
+               cdir->untracked->valid = 1;
+               cdir->untracked->recurse = 1;
+       }
 }
 
 /*
@@ -1340,38 +1688,48 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                                    const char *base, int baselen,
-                                   int check_only,
+                                   struct untracked_cache_dir *untracked, int check_only,
                                    const struct path_simplify *simplify)
 {
-       DIR *fdir;
+       struct cached_dir cdir;
        enum path_treatment state, subdir_state, dir_state = path_none;
-       struct dirent *de;
        struct strbuf path = STRBUF_INIT;
 
        strbuf_add(&path, base, baselen);
 
-       fdir = opendir(path.len ? path.buf : ".");
-       if (!fdir)
+       if (open_cached_dir(&cdir, dir, untracked, &path, check_only))
                goto out;
 
-       while ((de = readdir(fdir)) != NULL) {
+       if (untracked)
+               untracked->check_only = !!check_only;
+
+       while (!read_cached_dir(&cdir)) {
                /* check how the file or directory should be treated */
-               state = treat_path(dir, de, &path, baselen, simplify);
+               state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
+
                if (state > dir_state)
                        dir_state = state;
 
                /* recurse into subdir if instructed by treat_path */
                if (state == path_recurse) {
-                       subdir_state = read_directory_recursive(dir, path.buf,
-                               path.len, check_only, simplify);
+                       struct untracked_cache_dir *ud;
+                       ud = lookup_untracked(dir->untracked, untracked,
+                                             path.buf + baselen,
+                                             path.len - baselen);
+                       subdir_state =
+                               read_directory_recursive(dir, path.buf, path.len,
+                                                        ud, check_only, simplify);
                        if (subdir_state > dir_state)
                                dir_state = subdir_state;
                }
 
                if (check_only) {
                        /* abort early if maximum state has been reached */
-                       if (dir_state == path_untracked)
+                       if (dir_state == path_untracked) {
+                               if (cdir.fdir)
+                                       add_untracked(untracked, path.buf + baselen);
                                break;
+                       }
                        /* skip the dir_add_* part */
                        continue;
                }
@@ -1389,15 +1747,18 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                        break;
 
                case path_untracked:
-                       if (!(dir->flags & DIR_SHOW_IGNORED))
-                               dir_add_name(dir, path.buf, path.len);
+                       if (dir->flags & DIR_SHOW_IGNORED)
+                               break;
+                       dir_add_name(dir, path.buf, path.len);
+                       if (cdir.fdir)
+                               add_untracked(untracked, path.buf + baselen);
                        break;
 
                default:
                        break;
                }
        }
-       closedir(fdir);
+       close_cached_dir(&cdir);
  out:
        strbuf_release(&path);
 
@@ -1467,7 +1828,7 @@ static int treat_leading_path(struct dir_struct *dir,
                        break;
                if (simplify_away(sb.buf, sb.len, simplify))
                        break;
-               if (treat_one_path(dir, &sb, simplify,
+               if (treat_one_path(dir, NULL, &sb, simplify,
                                   DT_DIR, NULL) == path_none)
                        break; /* do not recurse into it */
                if (len <= baselen) {
@@ -1480,9 +1841,139 @@ static int treat_leading_path(struct dir_struct *dir,
        return rc;
 }
 
+static const char *get_ident_string(void)
+{
+       static struct strbuf sb = STRBUF_INIT;
+       struct utsname uts;
+
+       if (sb.len)
+               return sb.buf;
+       if (uname(&uts) < 0)
+               die_errno(_("failed to get kernel name and information"));
+       strbuf_addf(&sb, "Location %s, system %s %s %s", get_git_work_tree(),
+                   uts.sysname, uts.release, uts.version);
+       return sb.buf;
+}
+
+static int ident_in_untracked(const struct untracked_cache *uc)
+{
+       const char *end = uc->ident.buf + uc->ident.len;
+       const char *p   = uc->ident.buf;
+
+       for (p = uc->ident.buf; p < end; p += strlen(p) + 1)
+               if (!strcmp(p, get_ident_string()))
+                       return 1;
+       return 0;
+}
+
+void add_untracked_ident(struct untracked_cache *uc)
+{
+       if (ident_in_untracked(uc))
+               return;
+       strbuf_addstr(&uc->ident, get_ident_string());
+       /* this strbuf contains a list of strings, save NUL too */
+       strbuf_addch(&uc->ident, 0);
+}
+
+static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
+                                                     int base_len,
+                                                     const struct pathspec *pathspec)
+{
+       struct untracked_cache_dir *root;
+       int i;
+
+       if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE"))
+               return NULL;
+
+       /*
+        * We only support $GIT_DIR/info/exclude and core.excludesfile
+        * as the global ignore rule files. Any other additions
+        * (e.g. from command line) invalidate the cache. This
+        * condition also catches running setup_standard_excludes()
+        * before setting dir->untracked!
+        */
+       if (dir->unmanaged_exclude_files)
+               return NULL;
+
+       /*
+        * Optimize for the main use case only: whole-tree git
+        * status. More work involved in treat_leading_path() if we
+        * use cache on just a subset of the worktree. pathspec
+        * support could make the matter even worse.
+        */
+       if (base_len || (pathspec && pathspec->nr))
+               return NULL;
+
+       /* Different set of flags may produce different results */
+       if (dir->flags != dir->untracked->dir_flags ||
+           /*
+            * See treat_directory(), case index_nonexistent. Without
+            * this flag, we may need to also cache .git file content
+            * for the resolve_gitlink_ref() call, which we don't.
+            */
+           !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) ||
+           /* We don't support collecting ignore files */
+           (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO |
+                          DIR_COLLECT_IGNORED)))
+               return NULL;
+
+       /*
+        * If we use .gitignore in the cache and now you change it to
+        * .gitexclude, everything will go wrong.
+        */
+       if (dir->exclude_per_dir != dir->untracked->exclude_per_dir &&
+           strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir))
+               return NULL;
+
+       /*
+        * EXC_CMDL is not considered in the cache. If people set it,
+        * skip the cache.
+        */
+       if (dir->exclude_list_group[EXC_CMDL].nr)
+               return NULL;
+
+       /*
+        * An optimization in prep_exclude() does not play well with
+        * CE_SKIP_WORKTREE. It's a rare case anyway, if a single
+        * entry has that bit set, disable the whole untracked cache.
+        */
+       for (i = 0; i < active_nr; i++)
+               if (ce_skip_worktree(active_cache[i]))
+                       return NULL;
+
+       if (!ident_in_untracked(dir->untracked)) {
+               warning(_("Untracked cache is disabled on this system."));
+               return NULL;
+       }
+
+       if (!dir->untracked->root) {
+               const int len = sizeof(*dir->untracked->root);
+               dir->untracked->root = xmalloc(len);
+               memset(dir->untracked->root, 0, len);
+       }
+
+       /* Validate $GIT_DIR/info/exclude and core.excludesfile */
+       root = dir->untracked->root;
+       if (hashcmp(dir->ss_info_exclude.sha1,
+                   dir->untracked->ss_info_exclude.sha1)) {
+               invalidate_gitignore(dir->untracked, root);
+               dir->untracked->ss_info_exclude = dir->ss_info_exclude;
+       }
+       if (hashcmp(dir->ss_excludes_file.sha1,
+                   dir->untracked->ss_excludes_file.sha1)) {
+               invalidate_gitignore(dir->untracked, root);
+               dir->untracked->ss_excludes_file = dir->ss_excludes_file;
+       }
+
+       /* Make sure this directory is not dropped out at saving phase */
+       root->recurse = 1;
+       return root;
+}
+
 int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
 {
        struct path_simplify *simplify;
+       struct untracked_cache_dir *untracked;
 
        /*
         * Check out create_simplify()
@@ -1506,11 +1997,39 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
         * create_simplify().
         */
        simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
+       untracked = validate_untracked_cache(dir, len, pathspec);
+       if (!untracked)
+               /*
+                * make sure untracked cache code path is disabled,
+                * e.g. prep_exclude()
+                */
+               dir->untracked = NULL;
        if (!len || treat_leading_path(dir, path, len, simplify))
-               read_directory_recursive(dir, path, len, 0, simplify);
+               read_directory_recursive(dir, path, len, untracked, 0, simplify);
        free_simplify(simplify);
        qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
        qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
+       if (dir->untracked) {
+               static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
+               trace_printf_key(&trace_untracked_stats,
+                                "node creation: %u\n"
+                                "gitignore invalidation: %u\n"
+                                "directory invalidation: %u\n"
+                                "opendir: %u\n",
+                                dir->untracked->dir_created,
+                                dir->untracked->gitignore_invalidated,
+                                dir->untracked->dir_invalidated,
+                                dir->untracked->dir_opened);
+               if (dir->untracked == the_index.untracked &&
+                   (dir->untracked->dir_opened ||
+                    dir->untracked->gitignore_invalidated ||
+                    dir->untracked->dir_invalidated))
+                       the_index.cache_changed |= UNTRACKED_CHANGED;
+               if (dir->untracked != the_index.untracked) {
+                       free(dir->untracked);
+                       dir->untracked = NULL;
+               }
+       }
        return dir->nr;
 }
 
@@ -1666,6 +2185,8 @@ int remove_dir_recursively(struct strbuf *path, int flag)
        return remove_dir_recurse(path, flag, NULL);
 }
 
+static GIT_PATH_FUNC(git_path_info_exclude, "info/exclude")
+
 void setup_standard_excludes(struct dir_struct *dir)
 {
        const char *path;
@@ -1676,12 +2197,14 @@ void setup_standard_excludes(struct dir_struct *dir)
        if (!excludes_file)
                excludes_file = xdg_config_home("ignore");
        if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
-               add_excludes_from_file(dir, excludes_file);
+               add_excludes_from_file_1(dir, excludes_file,
+                                        dir->untracked ? &dir->ss_excludes_file : NULL);
 
        /* per repository user preference */
-       path = git_path("info/exclude");
+       path = git_path_info_exclude();
        if (!access_or_warn(path, R_OK, 0))
-               add_excludes_from_file(dir, path);
+               add_excludes_from_file_1(dir, path,
+                                        dir->untracked ? &dir->ss_info_exclude : NULL);
 }
 
 int remove_path(const char *name)
@@ -1733,3 +2256,404 @@ void clear_directory(struct dir_struct *dir)
        }
        strbuf_release(&dir->basebuf);
 }
+
+struct ondisk_untracked_cache {
+       struct stat_data info_exclude_stat;
+       struct stat_data excludes_file_stat;
+       uint32_t dir_flags;
+       unsigned char info_exclude_sha1[20];
+       unsigned char excludes_file_sha1[20];
+       char exclude_per_dir[FLEX_ARRAY];
+};
+
+#define ouc_size(len) (offsetof(struct ondisk_untracked_cache, exclude_per_dir) + len + 1)
+
+struct write_data {
+       int index;         /* number of written untracked_cache_dir */
+       struct ewah_bitmap *check_only; /* from untracked_cache_dir */
+       struct ewah_bitmap *valid;      /* from untracked_cache_dir */
+       struct ewah_bitmap *sha1_valid; /* set if exclude_sha1 is not null */
+       struct strbuf out;
+       struct strbuf sb_stat;
+       struct strbuf sb_sha1;
+};
+
+static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
+{
+       to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
+       to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
+       to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
+       to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
+       to->sd_dev        = htonl(from->sd_dev);
+       to->sd_ino        = htonl(from->sd_ino);
+       to->sd_uid        = htonl(from->sd_uid);
+       to->sd_gid        = htonl(from->sd_gid);
+       to->sd_size       = htonl(from->sd_size);
+}
+
+static void write_one_dir(struct untracked_cache_dir *untracked,
+                         struct write_data *wd)
+{
+       struct stat_data stat_data;
+       struct strbuf *out = &wd->out;
+       unsigned char intbuf[16];
+       unsigned int intlen, value;
+       int i = wd->index++;
+
+       /*
+        * untracked_nr should be reset whenever valid is clear, but
+        * for safety..
+        */
+       if (!untracked->valid) {
+               untracked->untracked_nr = 0;
+               untracked->check_only = 0;
+       }
+
+       if (untracked->check_only)
+               ewah_set(wd->check_only, i);
+       if (untracked->valid) {
+               ewah_set(wd->valid, i);
+               stat_data_to_disk(&stat_data, &untracked->stat_data);
+               strbuf_add(&wd->sb_stat, &stat_data, sizeof(stat_data));
+       }
+       if (!is_null_sha1(untracked->exclude_sha1)) {
+               ewah_set(wd->sha1_valid, i);
+               strbuf_add(&wd->sb_sha1, untracked->exclude_sha1, 20);
+       }
+
+       intlen = encode_varint(untracked->untracked_nr, intbuf);
+       strbuf_add(out, intbuf, intlen);
+
+       /* skip non-recurse directories */
+       for (i = 0, value = 0; i < untracked->dirs_nr; i++)
+               if (untracked->dirs[i]->recurse)
+                       value++;
+       intlen = encode_varint(value, intbuf);
+       strbuf_add(out, intbuf, intlen);
+
+       strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
+
+       for (i = 0; i < untracked->untracked_nr; i++)
+               strbuf_add(out, untracked->untracked[i],
+                          strlen(untracked->untracked[i]) + 1);
+
+       for (i = 0; i < untracked->dirs_nr; i++)
+               if (untracked->dirs[i]->recurse)
+                       write_one_dir(untracked->dirs[i], wd);
+}
+
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
+{
+       struct ondisk_untracked_cache *ouc;
+       struct write_data wd;
+       unsigned char varbuf[16];
+       int len = 0, varint_len;
+       if (untracked->exclude_per_dir)
+               len = strlen(untracked->exclude_per_dir);
+       ouc = xmalloc(sizeof(*ouc) + len + 1);
+       stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
+       stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
+       hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
+       hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
+       ouc->dir_flags = htonl(untracked->dir_flags);
+       memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+
+       varint_len = encode_varint(untracked->ident.len, varbuf);
+       strbuf_add(out, varbuf, varint_len);
+       strbuf_add(out, untracked->ident.buf, untracked->ident.len);
+
+       strbuf_add(out, ouc, ouc_size(len));
+       free(ouc);
+       ouc = NULL;
+
+       if (!untracked->root) {
+               varint_len = encode_varint(0, varbuf);
+               strbuf_add(out, varbuf, varint_len);
+               return;
+       }
+
+       wd.index      = 0;
+       wd.check_only = ewah_new();
+       wd.valid      = ewah_new();
+       wd.sha1_valid = ewah_new();
+       strbuf_init(&wd.out, 1024);
+       strbuf_init(&wd.sb_stat, 1024);
+       strbuf_init(&wd.sb_sha1, 1024);
+       write_one_dir(untracked->root, &wd);
+
+       varint_len = encode_varint(wd.index, varbuf);
+       strbuf_add(out, varbuf, varint_len);
+       strbuf_addbuf(out, &wd.out);
+       ewah_serialize_strbuf(wd.valid, out);
+       ewah_serialize_strbuf(wd.check_only, out);
+       ewah_serialize_strbuf(wd.sha1_valid, out);
+       strbuf_addbuf(out, &wd.sb_stat);
+       strbuf_addbuf(out, &wd.sb_sha1);
+       strbuf_addch(out, '\0'); /* safe guard for string lists */
+
+       ewah_free(wd.valid);
+       ewah_free(wd.check_only);
+       ewah_free(wd.sha1_valid);
+       strbuf_release(&wd.out);
+       strbuf_release(&wd.sb_stat);
+       strbuf_release(&wd.sb_sha1);
+}
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+       int i;
+       if (!ucd)
+               return;
+       for (i = 0; i < ucd->dirs_nr; i++)
+               free_untracked(ucd->dirs[i]);
+       for (i = 0; i < ucd->untracked_nr; i++)
+               free(ucd->untracked[i]);
+       free(ucd->untracked);
+       free(ucd->dirs);
+       free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+       if (uc)
+               free_untracked(uc->root);
+       free(uc);
+}
+
+struct read_data {
+       int index;
+       struct untracked_cache_dir **ucd;
+       struct ewah_bitmap *check_only;
+       struct ewah_bitmap *valid;
+       struct ewah_bitmap *sha1_valid;
+       const unsigned char *data;
+       const unsigned char *end;
+};
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+       to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+       to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+       to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+       to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+       to->sd_dev        = get_be32(&from->sd_dev);
+       to->sd_ino        = get_be32(&from->sd_ino);
+       to->sd_uid        = get_be32(&from->sd_uid);
+       to->sd_gid        = get_be32(&from->sd_gid);
+       to->sd_size       = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+                       struct read_data *rd)
+{
+       struct untracked_cache_dir ud, *untracked;
+       const unsigned char *next, *data = rd->data, *end = rd->end;
+       unsigned int value;
+       int i, len;
+
+       memset(&ud, 0, sizeof(ud));
+
+       next = data;
+       value = decode_varint(&next);
+       if (next > end)
+               return -1;
+       ud.recurse         = 1;
+       ud.untracked_alloc = value;
+       ud.untracked_nr    = value;
+       if (ud.untracked_nr)
+               ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+       data = next;
+
+       next = data;
+       ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+       if (next > end)
+               return -1;
+       ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+       data = next;
+
+       len = strlen((const char *)data);
+       next = data + len + 1;
+       if (next > rd->end)
+               return -1;
+       *untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+       memcpy(untracked, &ud, sizeof(ud));
+       memcpy(untracked->name, data, len + 1);
+       data = next;
+
+       for (i = 0; i < untracked->untracked_nr; i++) {
+               len = strlen((const char *)data);
+               next = data + len + 1;
+               if (next > rd->end)
+                       return -1;
+               untracked->untracked[i] = xstrdup((const char*)data);
+               data = next;
+       }
+
+       rd->ucd[rd->index++] = untracked;
+       rd->data = data;
+
+       for (i = 0; i < untracked->dirs_nr; i++) {
+               len = read_one_dir(untracked->dirs + i, rd);
+               if (len < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+static void set_check_only(size_t pos, void *cb)
+{
+       struct read_data *rd = cb;
+       struct untracked_cache_dir *ud = rd->ucd[pos];
+       ud->check_only = 1;
+}
+
+static void read_stat(size_t pos, void *cb)
+{
+       struct read_data *rd = cb;
+       struct untracked_cache_dir *ud = rd->ucd[pos];
+       if (rd->data + sizeof(struct stat_data) > rd->end) {
+               rd->data = rd->end + 1;
+               return;
+       }
+       stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data);
+       rd->data += sizeof(struct stat_data);
+       ud->valid = 1;
+}
+
+static void read_sha1(size_t pos, void *cb)
+{
+       struct read_data *rd = cb;
+       struct untracked_cache_dir *ud = rd->ucd[pos];
+       if (rd->data + 20 > rd->end) {
+               rd->data = rd->end + 1;
+               return;
+       }
+       hashcpy(ud->exclude_sha1, rd->data);
+       rd->data += 20;
+}
+
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+                          const struct stat_data *stat,
+                          const unsigned char *sha1)
+{
+       stat_data_from_disk(&sha1_stat->stat, stat);
+       hashcpy(sha1_stat->sha1, sha1);
+       sha1_stat->valid = 1;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+       const struct ondisk_untracked_cache *ouc;
+       struct untracked_cache *uc;
+       struct read_data rd;
+       const unsigned char *next = data, *end = (const unsigned char *)data + sz;
+       const char *ident;
+       int ident_len, len;
+
+       if (sz <= 1 || end[-1] != '\0')
+               return NULL;
+       end--;
+
+       ident_len = decode_varint(&next);
+       if (next + ident_len > end)
+               return NULL;
+       ident = (const char *)next;
+       next += ident_len;
+
+       ouc = (const struct ondisk_untracked_cache *)next;
+       if (next + ouc_size(0) > end)
+               return NULL;
+
+       uc = xcalloc(1, sizeof(*uc));
+       strbuf_init(&uc->ident, ident_len);
+       strbuf_add(&uc->ident, ident, ident_len);
+       load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+                      ouc->info_exclude_sha1);
+       load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+                      ouc->excludes_file_sha1);
+       uc->dir_flags = get_be32(&ouc->dir_flags);
+       uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+       /* NUL after exclude_per_dir is covered by sizeof(*ouc) */
+       next += ouc_size(strlen(ouc->exclude_per_dir));
+       if (next >= end)
+               goto done2;
+
+       len = decode_varint(&next);
+       if (next > end || len == 0)
+               goto done2;
+
+       rd.valid      = ewah_new();
+       rd.check_only = ewah_new();
+       rd.sha1_valid = ewah_new();
+       rd.data       = next;
+       rd.end        = end;
+       rd.index      = 0;
+       rd.ucd        = xmalloc(sizeof(*rd.ucd) * len);
+
+       if (read_one_dir(&uc->root, &rd) || rd.index != len)
+               goto done;
+
+       next = rd.data;
+       len = ewah_read_mmap(rd.valid, next, end - next);
+       if (len < 0)
+               goto done;
+
+       next += len;
+       len = ewah_read_mmap(rd.check_only, next, end - next);
+       if (len < 0)
+               goto done;
+
+       next += len;
+       len = ewah_read_mmap(rd.sha1_valid, next, end - next);
+       if (len < 0)
+               goto done;
+
+       ewah_each_bit(rd.check_only, set_check_only, &rd);
+       rd.data = next + len;
+       ewah_each_bit(rd.valid, read_stat, &rd);
+       ewah_each_bit(rd.sha1_valid, read_sha1, &rd);
+       next = rd.data;
+
+done:
+       free(rd.ucd);
+       ewah_free(rd.valid);
+       ewah_free(rd.check_only);
+       ewah_free(rd.sha1_valid);
+done2:
+       if (next != end) {
+               free_untracked_cache(uc);
+               uc = NULL;
+       }
+       return uc;
+}
+
+void untracked_cache_invalidate_path(struct index_state *istate,
+                                    const char *path)
+{
+       const char *sep;
+       struct untracked_cache_dir *d;
+       if (!istate->untracked || !istate->untracked->root)
+               return;
+       sep = strrchr(path, '/');
+       if (sep)
+               d = lookup_untracked(istate->untracked,
+                                    istate->untracked->root,
+                                    path, sep - path);
+       else
+               d = istate->untracked->root;
+       istate->untracked->dir_invalidated++;
+       d->valid = 0;
+       d->untracked_nr = 0;
+}
+
+void untracked_cache_remove_from_index(struct index_state *istate,
+                                      const char *path)
+{
+       untracked_cache_invalidate_path(istate, path);
+}
+
+void untracked_cache_add_to_index(struct index_state *istate,
+                                 const char *path)
+{
+       untracked_cache_invalidate_path(istate, path);
+}