From: Junio C Hamano Date: Thu, 28 Jun 2012 22:19:57 +0000 (-0700) Subject: Merge branch 'nd/exclude-workaround-top-heavy' X-Git-Tag: v1.7.12-rc0~73 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/653111f99c6bfa3bb5effd61c138bf925f19fbdb?ds=inline;hp=-c Merge branch 'nd/exclude-workaround-top-heavy' Attempt to optimize matching with an exclude pattern with a deep directory hierarchy by taking the part that specifies leading path without wildcard literally. --- 653111f99c6bfa3bb5effd61c138bf925f19fbdb diff --combined dir.c index 2c02b312b7,94fe9f875c..0015cc54f4 --- a/dir.c +++ b/dir.c @@@ -288,9 -288,24 +288,24 @@@ int match_pathspec_depth(const struct p return retval; } + /* + * Return the length of the "simple" part of a path match limiter. + */ + static int simple_length(const char *match) + { + int len = -1; + + for (;;) { + unsigned char c = *match++; + len++; + if (c == '\0' || is_glob_special(c)) + return len; + } + } + static int no_wildcard(const char *string) { - return string[strcspn(string, "*?[{\\")] == '\0'; + return string[simple_length(string)] == '\0'; } void add_exclude(const char *string, const char *base, @@@ -326,8 -341,7 +341,7 @@@ x->flags = flags; if (!strchr(string, '/')) x->flags |= EXC_FLAG_NODIR; - if (no_wildcard(string)) - x->flags |= EXC_FLAG_NOWILDCARD; + x->nowildcardlen = simple_length(string); if (*string == '*' && no_wildcard(string+1)) x->flags |= EXC_FLAG_ENDSWITH; ALLOC_GROW(which->excludes, which->nr + 1, which->alloc); @@@ -498,62 -512,74 +512,74 @@@ int excluded_from_list(const char *path { int i; - if (el->nr) { - for (i = el->nr - 1; 0 <= i; i--) { - struct exclude *x = el->excludes[i]; - const char *exclude = x->pattern; - int to_exclude = x->to_exclude; - - if (x->flags & EXC_FLAG_MUSTBEDIR) { - if (*dtype == DT_UNKNOWN) - *dtype = get_dtype(NULL, pathname, pathlen); - if (*dtype != DT_DIR) - continue; - } + if (!el->nr) + return -1; /* undefined */ - if (x->flags & EXC_FLAG_NODIR) { - /* match basename */ - if (x->flags & EXC_FLAG_NOWILDCARD) { - if (!strcmp_icase(exclude, basename)) - return to_exclude; - } else if (x->flags & EXC_FLAG_ENDSWITH) { - if (x->patternlen - 1 <= pathlen && - !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1)) - return to_exclude; - } else { - if (fnmatch_icase(exclude, basename, 0) == 0) - return to_exclude; - } - } - else { - /* match with FNM_PATHNAME: - * exclude has base (baselen long) implicitly - * in front of it. - */ - int baselen = x->baselen; - if (*exclude == '/') - exclude++; - - if (pathlen < baselen || - (baselen && pathname[baselen-1] != '/') || - strncmp_icase(pathname, x->base, baselen)) - continue; - - if (x->flags & EXC_FLAG_NOWILDCARD) { - if (!strcmp_icase(exclude, pathname + baselen)) - return to_exclude; - } else { - if (fnmatch_icase(exclude, pathname+baselen, - FNM_PATHNAME) == 0) - return to_exclude; - } + for (i = el->nr - 1; 0 <= i; i--) { + struct exclude *x = el->excludes[i]; + const char *name, *exclude = x->pattern; + int to_exclude = x->to_exclude; + int namelen, prefix = x->nowildcardlen; + + if (x->flags & EXC_FLAG_MUSTBEDIR) { + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + if (*dtype != DT_DIR) + continue; + } + + if (x->flags & EXC_FLAG_NODIR) { + /* match basename */ + if (prefix == x->patternlen) { + if (!strcmp_icase(exclude, basename)) + return to_exclude; + } else if (x->flags & EXC_FLAG_ENDSWITH) { + if (x->patternlen - 1 <= pathlen && + !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1)) + return to_exclude; + } else { + if (fnmatch_icase(exclude, basename, 0) == 0) + return to_exclude; } + continue; + } + + /* match with FNM_PATHNAME: + * exclude has base (baselen long) implicitly in front of it. + */ + if (*exclude == '/') { + exclude++; + prefix--; } + + if (pathlen < x->baselen || + (x->baselen && pathname[x->baselen-1] != '/') || + strncmp_icase(pathname, x->base, x->baselen)) + continue; + + namelen = x->baselen ? pathlen - x->baselen : pathlen; + name = pathname + pathlen - namelen; + + /* if the non-wildcard part is longer than the + remaining pathname, surely it cannot match */ + if (prefix > namelen) + continue; + + if (prefix) { + if (strncmp_icase(exclude, name, prefix)) + continue; + exclude += prefix; + name += prefix; + namelen -= prefix; + } + + if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME)) + return to_exclude; } return -1; /* undecided */ } -int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) { int pathlen = strlen(pathname); int st; @@@ -573,64 -599,6 +599,64 @@@ return 0; } +void path_exclude_check_init(struct path_exclude_check *check, + struct dir_struct *dir) +{ + check->dir = dir; + strbuf_init(&check->path, 256); +} + +void path_exclude_check_clear(struct path_exclude_check *check) +{ + strbuf_release(&check->path); +} + +/* + * Is this name excluded? This is for a caller like show_files() that + * do not honor directory hierarchy and iterate through paths that are + * possibly in an ignored directory. + * + * A path to a directory known to be excluded is left in check->path to + * optimize for repeated checks for files in the same excluded directory. + */ +int path_excluded(struct path_exclude_check *check, + const char *name, int namelen, int *dtype) +{ + int i; + struct strbuf *path = &check->path; + + /* + * we allow the caller to pass namelen as an optimization; it + * must match the length of the name, as we eventually call + * excluded() on the whole name string. + */ + if (namelen < 0) + namelen = strlen(name); + + if (path->len && + path->len <= namelen && + !memcmp(name, path->buf, path->len) && + (!name[path->len] || name[path->len] == '/')) + return 1; + + strbuf_setlen(path, 0); + for (i = 0; name[i]; i++) { + int ch = name[i]; + + if (ch == '/') { + int dt = DT_DIR; + if (excluded(check->dir, path->buf, &dt)) + return 1; + } + strbuf_addch(path, ch); + } + + /* An entry in the index; cannot be a directory with subentries */ + strbuf_setlen(path, 0); + + return excluded(check->dir, name, dtype); +} + static struct dir_entry *dir_entry_new(const char *pathname, int len) { struct dir_entry *ent; @@@ -1055,21 -1023,6 +1081,6 @@@ static int cmp_name(const void *p1, con e2->name, e2->len); } - /* - * Return the length of the "simple" part of a path match limiter. - */ - static int simple_length(const char *match) - { - int len = -1; - - for (;;) { - unsigned char c = *match++; - len++; - if (c == '\0' || is_glob_special(c)) - return len; - } - } - static struct path_simplify *create_simplify(const char **pathspec) { int nr, alloc = 0; diff --combined dir.h index 6c73e4151d,39fc145b66..893465a1e8 --- a/dir.h +++ b/dir.h @@@ -1,15 -1,12 +1,14 @@@ #ifndef DIR_H #define DIR_H +#include "strbuf.h" + struct dir_entry { unsigned int len; char name[FLEX_ARRAY]; /* more */ }; #define EXC_FLAG_NODIR 1 - #define EXC_FLAG_NOWILDCARD 2 #define EXC_FLAG_ENDSWITH 4 #define EXC_FLAG_MUSTBEDIR 8 @@@ -19,6 -16,7 +18,7 @@@ struct exclude_list struct exclude { const char *pattern; int patternlen; + int nowildcardlen; const char *base; int baselen; int to_exclude; @@@ -78,22 -76,8 +78,22 @@@ extern int read_directory(struct dir_st extern int excluded_from_list(const char *pathname, int pathlen, const char *basename, int *dtype, struct exclude_list *el); -extern int excluded(struct dir_struct *, const char *, int *); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); + +/* + * The excluded() API is meant for callers that check each level of leading + * directory hierarchies with excluded() to avoid recursing into excluded + * directories. Callers that do not do so should use this API instead. + */ +struct path_exclude_check { + struct dir_struct *dir; + struct strbuf path; +}; +extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); +extern void path_exclude_check_clear(struct path_exclude_check *); +extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); + + extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, struct exclude_list *which, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname);