From: Jeff King Date: Fri, 9 Nov 2012 17:42:25 +0000 (-0500) Subject: Merge branch 'nd/attr-match-optim-more' X-Git-Tag: v1.8.1-rc0~80 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/5f836422aba81b5a990ca5251892bfe26ec8125c?ds=inline;hp=-c Merge branch 'nd/attr-match-optim-more' Start laying the foundation to build the "wildmatch" after we can agree on its desired semantics. * nd/attr-match-optim-more: attr: more matching optimizations from .gitignore gitignore: make pattern parsing code a separate function exclude: split pathname matching code into a separate function exclude: fix a bug in prefix compare optimization exclude: split basename matching code into a separate function exclude: stricten a length check in EXC_FLAG_ENDSWITH case --- 5f836422aba81b5a990ca5251892bfe26ec8125c diff --combined Documentation/gitattributes.txt index ba02d4de59,b7c0e6577e..2698f63cf9 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@@ -56,6 -56,7 +56,7 @@@ When more than one pattern matches the overrides an earlier line. This overriding is done per attribute. The rules how the pattern matches paths are the same as in `.gitignore` files; see linkgit:gitignore[5]. + Unlike `.gitignore`, negative patterns are forbidden. When deciding what attributes are assigned to a path, git consults `$GIT_DIR/info/attributes` file (which has the highest @@@ -66,11 -67,6 +67,11 @@@ is from the path in question, the lowe global and system-wide files are considered (they have the lowest precedence). +When the `.gitattributes` file is missing from the work tree, the +path in the index is used as a fall-back. During checkout process, +`.gitattributes` in the index is used and then the file in the +working tree is used as a fall-back. + If you wish to affect only a single repository (i.e., to assign attributes to files that are particular to one user's workflow for that repository), then @@@ -80,8 -76,6 +81,8 @@@ repositories (i.e., attributes of inter `.gitattributes` files. Attributes that should affect all repositories for a single user should be placed in a file specified by the `core.attributesfile` configuration option (see linkgit:git-config[1]). +Its default value is $XDG_CONFIG_HOME/git/attributes. If $XDG_CONFIG_HOME +is either not set or empty, $HOME/.config/git/attributes is used instead. Attributes for all users on a system should be placed in the `$(prefix)/etc/gitattributes` file. @@@ -516,8 -510,6 +517,8 @@@ configuration file (you still need to e attribute mechanism, via `.gitattributes`). The following built in patterns are available: +- `ada` suitable for source code in the Ada language. + - `bibtex` suitable for files with BibTeX coded references. - `cpp` suitable for source code in the C and C++ languages. @@@ -934,7 -926,7 +935,7 @@@ file at the toplevel (i.e. not in any s macro attribute "binary" is equivalent to: ------------ -[attr]binary -diff -text +[attr]binary -diff -merge -text ------------ diff --combined attr.c index 179886cc4b,2fc6353628..097ae87f3c --- a/attr.c +++ b/attr.c @@@ -115,6 -115,13 +115,13 @@@ struct attr_state const char *setto; }; + struct pattern { + const char *pattern; + int patternlen; + int nowildcardlen; + int flags; /* EXC_FLAG_* */ + }; + /* * One rule, as from a .gitattributes file. * @@@ -131,7 -138,7 +138,7 @@@ */ struct match_attr { union { - char *pattern; + struct pattern pat; struct git_attr *attr; } u; char is_macro; @@@ -241,9 -248,16 +248,16 @@@ static struct match_attr *parse_attr_li if (is_macro) res->u.attr = git_attr_internal(name, namelen); else { - res->u.pattern = (char *)&(res->state[num_attr]); - memcpy(res->u.pattern, name, namelen); - res->u.pattern[namelen] = 0; + char *p = (char *)&(res->state[num_attr]); + memcpy(p, name, namelen); + res->u.pat.pattern = p; + parse_exclude_pattern(&res->u.pat.pattern, + &res->u.pat.patternlen, + &res->u.pat.flags, + &res->u.pat.nowildcardlen); + if (res->u.pat.flags & EXC_FLAG_NEGATIVE) + die(_("Negative patterns are forbidden in git attributes\n" + "Use '\\!' for literal leading exclamation.")); } res->is_macro = is_macro; res->num_attr = num_attr; @@@ -307,7 -321,7 +321,7 @@@ static void free_attr_elem(struct attr_ } static const char *builtin_attr[] = { - "[attr]binary -diff -text", + "[attr]binary -diff -merge -text", NULL, }; @@@ -353,11 -367,8 +367,11 @@@ static struct attr_stack *read_attr_fro char buf[2048]; int lineno = 0; - if (!fp) + if (!fp) { + if (errno != ENOENT && errno != ENOTDIR) + warn_on_inaccessible(path); return NULL; + } res = xcalloc(1, sizeof(*res)); while (fgets(buf, sizeof(buf), fp)) handle_attr_line(res, buf, path, ++lineno, macro_ok); @@@ -501,7 -512,6 +515,7 @@@ static int git_attr_system(void static void bootstrap_attr_stack(void) { struct attr_stack *elem; + char *xdg_attributes_file; if (attr_stack) return; @@@ -520,10 -530,6 +534,10 @@@ } } + if (!git_attributes_file) { + home_config_paths(NULL, &xdg_attributes_file, "attributes"); + git_attributes_file = xdg_attributes_file; + } if (git_attributes_file) { elem = read_attr_from_file(git_attributes_file, 1); if (elem) { @@@ -648,25 -654,21 +662,21 @@@ static void prepare_attr_stack(const ch static int path_matches(const char *pathname, int pathlen, const char *basename, - const char *pattern, + const struct pattern *pat, const char *base, int baselen) { - if (!strchr(pattern, '/')) { - return (fnmatch_icase(pattern, basename, 0) == 0); + const char *pattern = pat->pattern; + int prefix = pat->nowildcardlen; + + if (pat->flags & EXC_FLAG_NODIR) { + return match_basename(basename, + pathlen - (basename - pathname), + pattern, prefix, + pat->patternlen, pat->flags); } - /* - * match with FNM_PATHNAME; the pattern has base implicitly - * in front of it. - */ - if (*pattern == '/') - pattern++; - if (pathlen < baselen || - (baselen && pathname[baselen] != '/') || - strncmp(pathname, base, baselen)) - return 0; - if (baselen != 0) - baselen++; - return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0; + return match_pathname(pathname, pathlen, + base, baselen, + pattern, prefix, pat->patternlen, pat->flags); } static int macroexpand_one(int attr_nr, int rem); @@@ -704,7 -706,7 +714,7 @@@ static int fill(const char *path, int p if (a->is_macro) continue; if (path_matches(path, pathlen, basename, - a->u.pattern, base, stk->originlen)) + &a->u.pat, base, stk->originlen)) rem = fill_one("fill", a, rem); } return rem; diff --combined dir.c index 486833986e,ee8e7115a8..5a83aa7897 --- a/dir.c +++ b/dir.c @@@ -308,42 -308,69 +308,69 @@@ static int no_wildcard(const char *stri return string[simple_length(string)] == '\0'; } + void parse_exclude_pattern(const char **pattern, + int *patternlen, + int *flags, + int *nowildcardlen) + { + const char *p = *pattern; + size_t i, len; + + *flags = 0; + if (*p == '!') { + *flags |= EXC_FLAG_NEGATIVE; + p++; + } + len = strlen(p); + if (len && p[len - 1] == '/') { + len--; + *flags |= EXC_FLAG_MUSTBEDIR; + } + for (i = 0; i < len; i++) { + if (p[i] == '/') + break; + } + if (i == len) + *flags |= EXC_FLAG_NODIR; + *nowildcardlen = simple_length(p); + /* + * we should have excluded the trailing slash from 'p' too, + * but that's one more allocation. Instead just make sure + * nowildcardlen does not exceed real patternlen + */ + if (*nowildcardlen > len) + *nowildcardlen = len; + if (*p == '*' && no_wildcard(p + 1)) + *flags |= EXC_FLAG_ENDSWITH; + *pattern = p; + *patternlen = len; + } + void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *which) { struct exclude *x; - size_t len; - int to_exclude = 1; - int flags = 0; + int patternlen; + int flags; + int nowildcardlen; - if (*string == '!') { - to_exclude = 0; - string++; - } - len = strlen(string); - if (len && string[len - 1] == '/') { + parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen); + if (flags & EXC_FLAG_MUSTBEDIR) { char *s; - x = xmalloc(sizeof(*x) + len); + x = xmalloc(sizeof(*x) + patternlen + 1); s = (char *)(x+1); - memcpy(s, string, len - 1); - s[len - 1] = '\0'; - string = s; + memcpy(s, string, patternlen); + s[patternlen] = '\0'; x->pattern = s; - flags = EXC_FLAG_MUSTBEDIR; } else { x = xmalloc(sizeof(*x)); x->pattern = string; } - x->to_exclude = to_exclude; - x->patternlen = strlen(string); + x->patternlen = patternlen; + x->nowildcardlen = nowildcardlen; x->base = base; x->baselen = baselen; x->flags = flags; - if (!strchr(string, '/')) - x->flags |= EXC_FLAG_NODIR; - x->nowildcardlen = simple_length(string); - if (*string == '*' && no_wildcard(string+1)) - x->flags |= EXC_FLAG_ENDSWITH; ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); which->excludes[which->nr++] = x; } @@@ -397,8 -424,6 +424,8 @@@ int add_excludes_from_file_to_list(cons fd = open(fname, O_RDONLY); if (fd < 0 || fstat(fd, &st) < 0) { + if (errno != ENOENT) + warn_on_inaccessible(fname); if (0 <= fd) close(fd); if (!check_index || @@@ -505,6 -530,72 +532,72 @@@ static void prep_exclude(struct dir_str dir->basebuf[baselen] = '\0'; } + int match_basename(const char *basename, int basenamelen, + const char *pattern, int prefix, int patternlen, + int flags) + { + if (prefix == patternlen) { + if (!strcmp_icase(pattern, basename)) + return 1; + } else if (flags & EXC_FLAG_ENDSWITH) { + if (patternlen - 1 <= basenamelen && + !strcmp_icase(pattern + 1, + basename + basenamelen - patternlen + 1)) + return 1; + } else { + if (fnmatch_icase(pattern, basename, 0) == 0) + return 1; + } + return 0; + } + + int match_pathname(const char *pathname, int pathlen, + const char *base, int baselen, + const char *pattern, int prefix, int patternlen, + int flags) + { + const char *name; + int namelen; + + /* + * match with FNM_PATHNAME; the pattern has base implicitly + * in front of it. + */ + if (*pattern == '/') { + pattern++; + prefix--; + } + + /* + * baselen does not count the trailing slash. base[] may or + * may not end with a trailing slash though. + */ + if (pathlen < baselen + 1 || + (baselen && pathname[baselen] != '/') || + strncmp_icase(pathname, base, baselen)) + return 0; + + namelen = baselen ? pathlen - baselen - 1 : pathlen; + name = pathname + pathlen - namelen; + + if (prefix) { + /* + * if the non-wildcard part is longer than the + * remaining pathname, surely it cannot match. + */ + if (prefix > namelen) + return 0; + + if (strncmp_icase(pattern, name, prefix)) + return 0; + pattern += prefix; + name += prefix; + namelen -= prefix; + } + + return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0; + } + /* Scan the list and let the last match determine the fate. * Return 1 for exclude, 0 for include and -1 for undecided. */ @@@ -519,9 -610,9 +612,9 @@@ int excluded_from_list(const char *path for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; - const char *name, *exclude = x->pattern; - int to_exclude = x->to_exclude; - int namelen, prefix = x->nowildcardlen; + const char *exclude = x->pattern; + int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + int prefix = x->nowildcardlen; if (x->flags & EXC_FLAG_MUSTBEDIR) { if (*dtype == DT_UNKNOWN) @@@ -531,51 -622,18 +624,18 @@@ } if (x->flags & EXC_FLAG_NODIR) { - /* match basename */ - if (prefix == x->patternlen) { - if (!strcmp_icase(exclude, basename)) - return to_exclude; - } else if (x->flags & EXC_FLAG_ENDSWITH) { - if (x->patternlen - 1 <= pathlen && - !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1)) - return to_exclude; - } else { - if (fnmatch_icase(exclude, basename, 0) == 0) - return to_exclude; - } + if (match_basename(basename, + pathlen - (basename - pathname), + exclude, prefix, x->patternlen, + x->flags)) + return to_exclude; continue; } - /* match with FNM_PATHNAME: - * exclude has base (baselen long) implicitly in front of it. - */ - if (*exclude == '/') { - exclude++; - prefix--; - } - - if (pathlen < x->baselen || - (x->baselen && pathname[x->baselen-1] != '/') || - strncmp_icase(pathname, x->base, x->baselen)) - continue; - - namelen = x->baselen ? pathlen - x->baselen : pathlen; - name = pathname + pathlen - namelen; - - /* if the non-wildcard part is longer than the - remaining pathname, surely it cannot match */ - if (prefix > namelen) - continue; - - if (prefix) { - if (strncmp_icase(exclude, name, prefix)) - continue; - exclude += prefix; - name += prefix; - namelen -= prefix; - } - - if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME)) + assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); + if (match_pathname(pathname, pathlen, + x->base, x->baselen ? x->baselen - 1 : 0, + exclude, prefix, x->patternlen, x->flags)) return to_exclude; } return -1; /* undecided */ @@@ -1305,17 -1363,12 +1365,17 @@@ int remove_dir_recursively(struct strbu void setup_standard_excludes(struct dir_struct *dir) { const char *path; + char *xdg_path; dir->exclude_per_dir = ".gitignore"; path = git_path("info/exclude"); - if (!access(path, R_OK)) + if (!excludes_file) { + home_config_paths(NULL, &xdg_path, "ignore"); + excludes_file = xdg_path; + } + if (!access_or_warn(path, R_OK)) add_excludes_from_file(dir, path); - if (excludes_file && !access(excludes_file, R_OK)) + if (excludes_file && !access_or_warn(excludes_file, R_OK)) add_excludes_from_file(dir, excludes_file); } diff --combined t/t0003-attributes.sh index febc45c9cc,f6c21ea4ea..807b8b88e2 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@@ -123,6 -123,16 +123,6 @@@ test_expect_success 'attribute matchin ' -test_expect_success 'check whether FS is case-insensitive' ' - mkdir junk && - echo good >junk/CamelCase && - echo bad >junk/camelcase && - if test "$(cat junk/CamelCase)" != good - then - test_set_prereq CASE_INSENSITIVE_FS - fi -' - test_expect_success CASE_INSENSITIVE_FS 'additional case insensitivity tests' ' test_must_fail attr_check a/B/D/g "a/b/d/*" "-c core.ignorecase=0" && test_must_fail attr_check A/B/D/NO "a/b/d/*" "-c core.ignorecase=0" && @@@ -196,6 -206,16 +196,16 @@@ test_expect_success 'root subdir attrib attr_check subdir/a/i unspecified ' + test_expect_success 'negative patterns' ' + echo "!f test=bar" >.gitattributes && + test_must_fail git check-attr test -- f + ' + + test_expect_success 'patterns starting with exclamation' ' + echo "\!f test=foo" >.gitattributes && + attr_check "!f" foo + ' + test_expect_success 'setup bare' ' git clone --bare . bare.git && cd bare.git