Merge branch 'nd/exclude-workaround-top-heavy'
authorJunio C Hamano <gitster@pobox.com>
Thu, 28 Jun 2012 22:19:57 +0000 (15:19 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 28 Jun 2012 22:19:57 +0000 (15:19 -0700)
Attempt to optimize matching with an exclude pattern with a deep
directory hierarchy by taking the part that specifies leading path
without wildcard literally.

1  2 
dir.c
dir.h
diff --combined dir.c
index 2c02b312b77f3363c234bdc66faaeafd5899586d,94fe9f875c7bb998f62c3fefba94f5820311f252..0015cc54f455cf48e6e2d66c23ecebfa017fcf00
--- 1/dir.c
--- 2/dir.c
+++ b/dir.c
@@@ -288,9 -288,24 +288,24 @@@ int match_pathspec_depth(const struct p
        return retval;
  }
  
+ /*
+  * Return the length of the "simple" part of a path match limiter.
+  */
+ static int simple_length(const char *match)
+ {
+       int len = -1;
+       for (;;) {
+               unsigned char c = *match++;
+               len++;
+               if (c == '\0' || is_glob_special(c))
+                       return len;
+       }
+ }
  static int no_wildcard(const char *string)
  {
-       return string[strcspn(string, "*?[{\\")] == '\0';
+       return string[simple_length(string)] == '\0';
  }
  
  void add_exclude(const char *string, const char *base,
        x->flags = flags;
        if (!strchr(string, '/'))
                x->flags |= EXC_FLAG_NODIR;
-       if (no_wildcard(string))
-               x->flags |= EXC_FLAG_NOWILDCARD;
+       x->nowildcardlen = simple_length(string);
        if (*string == '*' && no_wildcard(string+1))
                x->flags |= EXC_FLAG_ENDSWITH;
        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
@@@ -498,62 -512,74 +512,74 @@@ int excluded_from_list(const char *path
  {
        int i;
  
-       if (el->nr) {
-               for (i = el->nr - 1; 0 <= i; i--) {
-                       struct exclude *x = el->excludes[i];
-                       const char *exclude = x->pattern;
-                       int to_exclude = x->to_exclude;
-                       if (x->flags & EXC_FLAG_MUSTBEDIR) {
-                               if (*dtype == DT_UNKNOWN)
-                                       *dtype = get_dtype(NULL, pathname, pathlen);
-                               if (*dtype != DT_DIR)
-                                       continue;
-                       }
+       if (!el->nr)
+               return -1;      /* undefined */
  
-                       if (x->flags & EXC_FLAG_NODIR) {
-                               /* match basename */
-                               if (x->flags & EXC_FLAG_NOWILDCARD) {
-                                       if (!strcmp_icase(exclude, basename))
-                                               return to_exclude;
-                               } else if (x->flags & EXC_FLAG_ENDSWITH) {
-                                       if (x->patternlen - 1 <= pathlen &&
-                                           !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
-                                               return to_exclude;
-                               } else {
-                                       if (fnmatch_icase(exclude, basename, 0) == 0)
-                                               return to_exclude;
-                               }
-                       }
-                       else {
-                               /* match with FNM_PATHNAME:
-                                * exclude has base (baselen long) implicitly
-                                * in front of it.
-                                */
-                               int baselen = x->baselen;
-                               if (*exclude == '/')
-                                       exclude++;
-                               if (pathlen < baselen ||
-                                   (baselen && pathname[baselen-1] != '/') ||
-                                   strncmp_icase(pathname, x->base, baselen))
-                                   continue;
-                               if (x->flags & EXC_FLAG_NOWILDCARD) {
-                                       if (!strcmp_icase(exclude, pathname + baselen))
-                                               return to_exclude;
-                               } else {
-                                       if (fnmatch_icase(exclude, pathname+baselen,
-                                                   FNM_PATHNAME) == 0)
-                                           return to_exclude;
-                               }
+       for (i = el->nr - 1; 0 <= i; i--) {
+               struct exclude *x = el->excludes[i];
+               const char *name, *exclude = x->pattern;
+               int to_exclude = x->to_exclude;
+               int namelen, prefix = x->nowildcardlen;
+               if (x->flags & EXC_FLAG_MUSTBEDIR) {
+                       if (*dtype == DT_UNKNOWN)
+                               *dtype = get_dtype(NULL, pathname, pathlen);
+                       if (*dtype != DT_DIR)
+                               continue;
+               }
+               if (x->flags & EXC_FLAG_NODIR) {
+                       /* match basename */
+                       if (prefix == x->patternlen) {
+                               if (!strcmp_icase(exclude, basename))
+                                       return to_exclude;
+                       } else if (x->flags & EXC_FLAG_ENDSWITH) {
+                               if (x->patternlen - 1 <= pathlen &&
+                                   !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
+                                       return to_exclude;
+                       } else {
+                               if (fnmatch_icase(exclude, basename, 0) == 0)
+                                       return to_exclude;
                        }
+                       continue;
+               }
+               /* match with FNM_PATHNAME:
+                * exclude has base (baselen long) implicitly in front of it.
+                */
+               if (*exclude == '/') {
+                       exclude++;
+                       prefix--;
                }
+               if (pathlen < x->baselen ||
+                   (x->baselen && pathname[x->baselen-1] != '/') ||
+                   strncmp_icase(pathname, x->base, x->baselen))
+                       continue;
+               namelen = x->baselen ? pathlen - x->baselen : pathlen;
+               name = pathname + pathlen  - namelen;
+               /* if the non-wildcard part is longer than the
+                  remaining pathname, surely it cannot match */
+               if (prefix > namelen)
+                       continue;
+               if (prefix) {
+                       if (strncmp_icase(exclude, name, prefix))
+                               continue;
+                       exclude += prefix;
+                       name    += prefix;
+                       namelen -= prefix;
+               }
+               if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
+                       return to_exclude;
        }
        return -1; /* undecided */
  }
  
 -int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
 +static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p)
  {
        int pathlen = strlen(pathname);
        int st;
        return 0;
  }
  
 +void path_exclude_check_init(struct path_exclude_check *check,
 +                           struct dir_struct *dir)
 +{
 +      check->dir = dir;
 +      strbuf_init(&check->path, 256);
 +}
 +
 +void path_exclude_check_clear(struct path_exclude_check *check)
 +{
 +      strbuf_release(&check->path);
 +}
 +
 +/*
 + * Is this name excluded?  This is for a caller like show_files() that
 + * do not honor directory hierarchy and iterate through paths that are
 + * possibly in an ignored directory.
 + *
 + * A path to a directory known to be excluded is left in check->path to
 + * optimize for repeated checks for files in the same excluded directory.
 + */
 +int path_excluded(struct path_exclude_check *check,
 +                const char *name, int namelen, int *dtype)
 +{
 +      int i;
 +      struct strbuf *path = &check->path;
 +
 +      /*
 +       * we allow the caller to pass namelen as an optimization; it
 +       * must match the length of the name, as we eventually call
 +       * excluded() on the whole name string.
 +       */
 +      if (namelen < 0)
 +              namelen = strlen(name);
 +
 +      if (path->len &&
 +          path->len <= namelen &&
 +          !memcmp(name, path->buf, path->len) &&
 +          (!name[path->len] || name[path->len] == '/'))
 +              return 1;
 +
 +      strbuf_setlen(path, 0);
 +      for (i = 0; name[i]; i++) {
 +              int ch = name[i];
 +
 +              if (ch == '/') {
 +                      int dt = DT_DIR;
 +                      if (excluded(check->dir, path->buf, &dt))
 +                              return 1;
 +              }
 +              strbuf_addch(path, ch);
 +      }
 +
 +      /* An entry in the index; cannot be a directory with subentries */
 +      strbuf_setlen(path, 0);
 +
 +      return excluded(check->dir, name, dtype);
 +}
 +
  static struct dir_entry *dir_entry_new(const char *pathname, int len)
  {
        struct dir_entry *ent;
@@@ -1055,21 -1023,6 +1081,6 @@@ static int cmp_name(const void *p1, con
                                  e2->name, e2->len);
  }
  
- /*
-  * Return the length of the "simple" part of a path match limiter.
-  */
- static int simple_length(const char *match)
- {
-       int len = -1;
-       for (;;) {
-               unsigned char c = *match++;
-               len++;
-               if (c == '\0' || is_glob_special(c))
-                       return len;
-       }
- }
  static struct path_simplify *create_simplify(const char **pathspec)
  {
        int nr, alloc = 0;
diff --combined dir.h
index 6c73e4151de8374d35427358cde50b506a2d7de9,39fc145b6617b0460a0db28f40412573c73bc4aa..893465a1e89d17cf5f94d3e68be1371f03e6163d
--- 1/dir.h
--- 2/dir.h
+++ b/dir.h
@@@ -1,15 -1,12 +1,14 @@@
  #ifndef DIR_H
  #define DIR_H
  
 +#include "strbuf.h"
 +
  struct dir_entry {
        unsigned int len;
        char name[FLEX_ARRAY]; /* more */
  };
  
  #define EXC_FLAG_NODIR 1
- #define EXC_FLAG_NOWILDCARD 2
  #define EXC_FLAG_ENDSWITH 4
  #define EXC_FLAG_MUSTBEDIR 8
  
@@@ -19,6 -16,7 +18,7 @@@ struct exclude_list 
        struct exclude {
                const char *pattern;
                int patternlen;
+               int nowildcardlen;
                const char *base;
                int baselen;
                int to_exclude;
@@@ -78,22 -76,8 +78,22 @@@ extern int read_directory(struct dir_st
  
  extern int excluded_from_list(const char *pathname, int pathlen, const char *basename,
                              int *dtype, struct exclude_list *el);
 -extern int excluded(struct dir_struct *, const char *, int *);
  struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
 +
 +/*
 + * The excluded() API is meant for callers that check each level of leading
 + * directory hierarchies with excluded() to avoid recursing into excluded
 + * directories.  Callers that do not do so should use this API instead.
 + */
 +struct path_exclude_check {
 +      struct dir_struct *dir;
 +      struct strbuf path;
 +};
 +extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *);
 +extern void path_exclude_check_clear(struct path_exclude_check *);
 +extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype);
 +
 +
  extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
                                          char **buf_p, struct exclude_list *which, int check_index);
  extern void add_excludes_from_file(struct dir_struct *, const char *fname);