Merge branch 'nd/attr-match-optim-more'
authorJeff King <peff@peff.net>
Fri, 9 Nov 2012 17:42:25 +0000 (12:42 -0500)
committerJeff King <peff@peff.net>
Fri, 9 Nov 2012 17:42:25 +0000 (12:42 -0500)
Start laying the foundation to build the "wildmatch" after we can
agree on its desired semantics.

* nd/attr-match-optim-more:
attr: more matching optimizations from .gitignore
gitignore: make pattern parsing code a separate function
exclude: split pathname matching code into a separate function
exclude: fix a bug in prefix compare optimization
exclude: split basename matching code into a separate function
exclude: stricten a length check in EXC_FLAG_ENDSWITH case

1  2 
Documentation/gitattributes.txt
attr.c
dir.c
t/t0003-attributes.sh
index ba02d4de5901b4448411654ecdf76a1b1980e53b,b7c0e6577e8d1dcdd7ac6f8f12ba7f6e29497445..2698f63cf9b492deb32b09e2a60c59f500c47f5c
@@@ -56,6 -56,7 +56,7 @@@ When more than one pattern matches the 
  overrides an earlier line.  This overriding is done per
  attribute.  The rules how the pattern matches paths are the
  same as in `.gitignore` files; see linkgit:gitignore[5].
+ Unlike `.gitignore`, negative patterns are forbidden.
  
  When deciding what attributes are assigned to a path, git
  consults `$GIT_DIR/info/attributes` file (which has the highest
@@@ -66,11 -67,6 +67,11 @@@ is from the path in question, the lowe
  global and system-wide files are considered (they have the lowest
  precedence).
  
 +When the `.gitattributes` file is missing from the work tree, the
 +path in the index is used as a fall-back.  During checkout process,
 +`.gitattributes` in the index is used and then the file in the
 +working tree is used as a fall-back.
 +
  If you wish to affect only a single repository (i.e., to assign
  attributes to files that are particular to
  one user's workflow for that repository), then
@@@ -80,8 -76,6 +81,8 @@@ repositories (i.e., attributes of inter
  `.gitattributes` files. Attributes that should affect all repositories
  for a single user should be placed in a file specified by the
  `core.attributesfile` configuration option (see linkgit:git-config[1]).
 +Its default value is $XDG_CONFIG_HOME/git/attributes. If $XDG_CONFIG_HOME
 +is either not set or empty, $HOME/.config/git/attributes is used instead.
  Attributes for all users on a system should be placed in the
  `$(prefix)/etc/gitattributes` file.
  
@@@ -516,8 -510,6 +517,8 @@@ configuration file (you still need to e
  attribute mechanism, via `.gitattributes`).  The following built in
  patterns are available:
  
 +- `ada` suitable for source code in the Ada language.
 +
  - `bibtex` suitable for files with BibTeX coded references.
  
  - `cpp` suitable for source code in the C and C++ languages.
@@@ -934,7 -926,7 +935,7 @@@ file at the toplevel (i.e. not in any s
  macro attribute "binary" is equivalent to:
  
  ------------
 -[attr]binary -diff -text
 +[attr]binary -diff -merge -text
  ------------
  
  
diff --combined attr.c
index 179886cc4b94a2c9260285b892eb200f6575453b,2fc6353628a2cd7ffe363a7b600dc8f4283d0e1f..097ae87f3c2b4093fffe6586f036918367fa672e
--- 1/attr.c
--- 2/attr.c
+++ b/attr.c
@@@ -115,6 -115,13 +115,13 @@@ struct attr_state 
        const char *setto;
  };
  
+ struct pattern {
+       const char *pattern;
+       int patternlen;
+       int nowildcardlen;
+       int flags;              /* EXC_FLAG_* */
+ };
  /*
   * One rule, as from a .gitattributes file.
   *
   */
  struct match_attr {
        union {
-               char *pattern;
+               struct pattern pat;
                struct git_attr *attr;
        } u;
        char is_macro;
@@@ -241,9 -248,16 +248,16 @@@ static struct match_attr *parse_attr_li
        if (is_macro)
                res->u.attr = git_attr_internal(name, namelen);
        else {
-               res->u.pattern = (char *)&(res->state[num_attr]);
-               memcpy(res->u.pattern, name, namelen);
-               res->u.pattern[namelen] = 0;
+               char *p = (char *)&(res->state[num_attr]);
+               memcpy(p, name, namelen);
+               res->u.pat.pattern = p;
+               parse_exclude_pattern(&res->u.pat.pattern,
+                                     &res->u.pat.patternlen,
+                                     &res->u.pat.flags,
+                                     &res->u.pat.nowildcardlen);
+               if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
+                       die(_("Negative patterns are forbidden in git attributes\n"
+                             "Use '\\!' for literal leading exclamation."));
        }
        res->is_macro = is_macro;
        res->num_attr = num_attr;
@@@ -307,7 -321,7 +321,7 @@@ static void free_attr_elem(struct attr_
  }
  
  static const char *builtin_attr[] = {
 -      "[attr]binary -diff -text",
 +      "[attr]binary -diff -merge -text",
        NULL,
  };
  
@@@ -353,11 -367,8 +367,11 @@@ static struct attr_stack *read_attr_fro
        char buf[2048];
        int lineno = 0;
  
 -      if (!fp)
 +      if (!fp) {
 +              if (errno != ENOENT && errno != ENOTDIR)
 +                      warn_on_inaccessible(path);
                return NULL;
 +      }
        res = xcalloc(1, sizeof(*res));
        while (fgets(buf, sizeof(buf), fp))
                handle_attr_line(res, buf, path, ++lineno, macro_ok);
@@@ -501,7 -512,6 +515,7 @@@ static int git_attr_system(void
  static void bootstrap_attr_stack(void)
  {
        struct attr_stack *elem;
 +      char *xdg_attributes_file;
  
        if (attr_stack)
                return;
                }
        }
  
 +      if (!git_attributes_file) {
 +              home_config_paths(NULL, &xdg_attributes_file, "attributes");
 +              git_attributes_file = xdg_attributes_file;
 +      }
        if (git_attributes_file) {
                elem = read_attr_from_file(git_attributes_file, 1);
                if (elem) {
@@@ -648,25 -654,21 +662,21 @@@ static void prepare_attr_stack(const ch
  
  static int path_matches(const char *pathname, int pathlen,
                        const char *basename,
-                       const char *pattern,
+                       const struct pattern *pat,
                        const char *base, int baselen)
  {
-       if (!strchr(pattern, '/')) {
-               return (fnmatch_icase(pattern, basename, 0) == 0);
+       const char *pattern = pat->pattern;
+       int prefix = pat->nowildcardlen;
+       if (pat->flags & EXC_FLAG_NODIR) {
+               return match_basename(basename,
+                                     pathlen - (basename - pathname),
+                                     pattern, prefix,
+                                     pat->patternlen, pat->flags);
        }
-       /*
-        * match with FNM_PATHNAME; the pattern has base implicitly
-        * in front of it.
-        */
-       if (*pattern == '/')
-               pattern++;
-       if (pathlen < baselen ||
-           (baselen && pathname[baselen] != '/') ||
-           strncmp(pathname, base, baselen))
-               return 0;
-       if (baselen != 0)
-               baselen++;
-       return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+       return match_pathname(pathname, pathlen,
+                             base, baselen,
+                             pattern, prefix, pat->patternlen, pat->flags);
  }
  
  static int macroexpand_one(int attr_nr, int rem);
@@@ -704,7 -706,7 +714,7 @@@ static int fill(const char *path, int p
                if (a->is_macro)
                        continue;
                if (path_matches(path, pathlen, basename,
-                                a->u.pattern, base, stk->originlen))
+                                &a->u.pat, base, stk->originlen))
                        rem = fill_one("fill", a, rem);
        }
        return rem;
diff --combined dir.c
index 486833986ed4b4e7d05d2086d53b15ec63905dd0,ee8e7115a8a37ca5c096dc3548b13e3761e4cac5..5a83aa7897f270279c403778f43aea6db1efc5af
--- 1/dir.c
--- 2/dir.c
+++ b/dir.c
@@@ -308,42 -308,69 +308,69 @@@ static int no_wildcard(const char *stri
        return string[simple_length(string)] == '\0';
  }
  
+ void parse_exclude_pattern(const char **pattern,
+                          int *patternlen,
+                          int *flags,
+                          int *nowildcardlen)
+ {
+       const char *p = *pattern;
+       size_t i, len;
+       *flags = 0;
+       if (*p == '!') {
+               *flags |= EXC_FLAG_NEGATIVE;
+               p++;
+       }
+       len = strlen(p);
+       if (len && p[len - 1] == '/') {
+               len--;
+               *flags |= EXC_FLAG_MUSTBEDIR;
+       }
+       for (i = 0; i < len; i++) {
+               if (p[i] == '/')
+                       break;
+       }
+       if (i == len)
+               *flags |= EXC_FLAG_NODIR;
+       *nowildcardlen = simple_length(p);
+       /*
+        * we should have excluded the trailing slash from 'p' too,
+        * but that's one more allocation. Instead just make sure
+        * nowildcardlen does not exceed real patternlen
+        */
+       if (*nowildcardlen > len)
+               *nowildcardlen = len;
+       if (*p == '*' && no_wildcard(p + 1))
+               *flags |= EXC_FLAG_ENDSWITH;
+       *pattern = p;
+       *patternlen = len;
+ }
  void add_exclude(const char *string, const char *base,
                 int baselen, struct exclude_list *which)
  {
        struct exclude *x;
-       size_t len;
-       int to_exclude = 1;
-       int flags = 0;
+       int patternlen;
+       int flags;
+       int nowildcardlen;
  
-       if (*string == '!') {
-               to_exclude = 0;
-               string++;
-       }
-       len = strlen(string);
-       if (len && string[len - 1] == '/') {
+       parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
+       if (flags & EXC_FLAG_MUSTBEDIR) {
                char *s;
-               x = xmalloc(sizeof(*x) + len);
+               x = xmalloc(sizeof(*x) + patternlen + 1);
                s = (char *)(x+1);
-               memcpy(s, string, len - 1);
-               s[len - 1] = '\0';
-               string = s;
+               memcpy(s, string, patternlen);
+               s[patternlen] = '\0';
                x->pattern = s;
-               flags = EXC_FLAG_MUSTBEDIR;
        } else {
                x = xmalloc(sizeof(*x));
                x->pattern = string;
        }
-       x->to_exclude = to_exclude;
-       x->patternlen = strlen(string);
+       x->patternlen = patternlen;
+       x->nowildcardlen = nowildcardlen;
        x->base = base;
        x->baselen = baselen;
        x->flags = flags;
-       if (!strchr(string, '/'))
-               x->flags |= EXC_FLAG_NODIR;
-       x->nowildcardlen = simple_length(string);
-       if (*string == '*' && no_wildcard(string+1))
-               x->flags |= EXC_FLAG_ENDSWITH;
        ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
        which->excludes[which->nr++] = x;
  }
@@@ -397,8 -424,6 +424,8 @@@ int add_excludes_from_file_to_list(cons
  
        fd = open(fname, O_RDONLY);
        if (fd < 0 || fstat(fd, &st) < 0) {
 +              if (errno != ENOENT)
 +                      warn_on_inaccessible(fname);
                if (0 <= fd)
                        close(fd);
                if (!check_index ||
@@@ -505,6 -530,72 +532,72 @@@ static void prep_exclude(struct dir_str
        dir->basebuf[baselen] = '\0';
  }
  
+ int match_basename(const char *basename, int basenamelen,
+                  const char *pattern, int prefix, int patternlen,
+                  int flags)
+ {
+       if (prefix == patternlen) {
+               if (!strcmp_icase(pattern, basename))
+                       return 1;
+       } else if (flags & EXC_FLAG_ENDSWITH) {
+               if (patternlen - 1 <= basenamelen &&
+                   !strcmp_icase(pattern + 1,
+                                 basename + basenamelen - patternlen + 1))
+                       return 1;
+       } else {
+               if (fnmatch_icase(pattern, basename, 0) == 0)
+                       return 1;
+       }
+       return 0;
+ }
+ int match_pathname(const char *pathname, int pathlen,
+                  const char *base, int baselen,
+                  const char *pattern, int prefix, int patternlen,
+                  int flags)
+ {
+       const char *name;
+       int namelen;
+       /*
+        * match with FNM_PATHNAME; the pattern has base implicitly
+        * in front of it.
+        */
+       if (*pattern == '/') {
+               pattern++;
+               prefix--;
+       }
+       /*
+        * baselen does not count the trailing slash. base[] may or
+        * may not end with a trailing slash though.
+        */
+       if (pathlen < baselen + 1 ||
+           (baselen && pathname[baselen] != '/') ||
+           strncmp_icase(pathname, base, baselen))
+               return 0;
+       namelen = baselen ? pathlen - baselen - 1 : pathlen;
+       name = pathname + pathlen - namelen;
+       if (prefix) {
+               /*
+                * if the non-wildcard part is longer than the
+                * remaining pathname, surely it cannot match.
+                */
+               if (prefix > namelen)
+                       return 0;
+               if (strncmp_icase(pattern, name, prefix))
+                       return 0;
+               pattern += prefix;
+               name    += prefix;
+               namelen -= prefix;
+       }
+       return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
+ }
  /* Scan the list and let the last match determine the fate.
   * Return 1 for exclude, 0 for include and -1 for undecided.
   */
@@@ -519,9 -610,9 +612,9 @@@ int excluded_from_list(const char *path
  
        for (i = el->nr - 1; 0 <= i; i--) {
                struct exclude *x = el->excludes[i];
-               const char *name, *exclude = x->pattern;
-               int to_exclude = x->to_exclude;
-               int namelen, prefix = x->nowildcardlen;
+               const char *exclude = x->pattern;
+               int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
+               int prefix = x->nowildcardlen;
  
                if (x->flags & EXC_FLAG_MUSTBEDIR) {
                        if (*dtype == DT_UNKNOWN)
                }
  
                if (x->flags & EXC_FLAG_NODIR) {
-                       /* match basename */
-                       if (prefix == x->patternlen) {
-                               if (!strcmp_icase(exclude, basename))
-                                       return to_exclude;
-                       } else if (x->flags & EXC_FLAG_ENDSWITH) {
-                               if (x->patternlen - 1 <= pathlen &&
-                                   !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
-                                       return to_exclude;
-                       } else {
-                               if (fnmatch_icase(exclude, basename, 0) == 0)
-                                       return to_exclude;
-                       }
+                       if (match_basename(basename,
+                                          pathlen - (basename - pathname),
+                                          exclude, prefix, x->patternlen,
+                                          x->flags))
+                               return to_exclude;
                        continue;
                }
  
-               /* match with FNM_PATHNAME:
-                * exclude has base (baselen long) implicitly in front of it.
-                */
-               if (*exclude == '/') {
-                       exclude++;
-                       prefix--;
-               }
-               if (pathlen < x->baselen ||
-                   (x->baselen && pathname[x->baselen-1] != '/') ||
-                   strncmp_icase(pathname, x->base, x->baselen))
-                       continue;
-               namelen = x->baselen ? pathlen - x->baselen : pathlen;
-               name = pathname + pathlen  - namelen;
-               /* if the non-wildcard part is longer than the
-                  remaining pathname, surely it cannot match */
-               if (prefix > namelen)
-                       continue;
-               if (prefix) {
-                       if (strncmp_icase(exclude, name, prefix))
-                               continue;
-                       exclude += prefix;
-                       name    += prefix;
-                       namelen -= prefix;
-               }
-               if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
+               assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
+               if (match_pathname(pathname, pathlen,
+                                  x->base, x->baselen ? x->baselen - 1 : 0,
+                                  exclude, prefix, x->patternlen, x->flags))
                        return to_exclude;
        }
        return -1; /* undecided */
@@@ -1305,17 -1363,12 +1365,17 @@@ int remove_dir_recursively(struct strbu
  void setup_standard_excludes(struct dir_struct *dir)
  {
        const char *path;
 +      char *xdg_path;
  
        dir->exclude_per_dir = ".gitignore";
        path = git_path("info/exclude");
 -      if (!access(path, R_OK))
 +      if (!excludes_file) {
 +              home_config_paths(NULL, &xdg_path, "ignore");
 +              excludes_file = xdg_path;
 +      }
 +      if (!access_or_warn(path, R_OK))
                add_excludes_from_file(dir, path);
 -      if (excludes_file && !access(excludes_file, R_OK))
 +      if (excludes_file && !access_or_warn(excludes_file, R_OK))
                add_excludes_from_file(dir, excludes_file);
  }
  
diff --combined t/t0003-attributes.sh
index febc45c9cc6f34a606ebac8470e582efad798148,f6c21ea4ea07e7603aee693814d3b5caa143923c..807b8b88e215df5ce39504b1858be8694499727d
@@@ -123,6 -123,16 +123,6 @@@ test_expect_success 'attribute matchin
  
  '
  
 -test_expect_success 'check whether FS is case-insensitive' '
 -      mkdir junk &&
 -      echo good >junk/CamelCase &&
 -      echo bad >junk/camelcase &&
 -      if test "$(cat junk/CamelCase)" != good
 -      then
 -              test_set_prereq CASE_INSENSITIVE_FS
 -      fi
 -'
 -
  test_expect_success CASE_INSENSITIVE_FS 'additional case insensitivity tests' '
        test_must_fail attr_check a/B/D/g "a/b/d/*" "-c core.ignorecase=0" &&
        test_must_fail attr_check A/B/D/NO "a/b/d/*" "-c core.ignorecase=0" &&
@@@ -196,6 -206,16 +196,16 @@@ test_expect_success 'root subdir attrib
        attr_check subdir/a/i unspecified
  '
  
+ test_expect_success 'negative patterns' '
+       echo "!f test=bar" >.gitattributes &&
+       test_must_fail git check-attr test -- f
+ '
+ test_expect_success 'patterns starting with exclamation' '
+       echo "\!f test=foo" >.gitattributes &&
+       attr_check "!f" foo
+ '
  test_expect_success 'setup bare' '
        git clone --bare . bare.git &&
        cd bare.git