parse_pathspec: accept :(icase)path syntax
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>
Sun, 14 Jul 2013 08:36:09 +0000 (15:36 +0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 15 Jul 2013 19:14:38 +0000 (12:14 -0700)
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git.txt
Documentation/glossary-content.txt
builtin/add.c
builtin/ls-tree.c
cache.h
dir.c
git.c
pathspec.c
pathspec.h
t/t6131-pathspec-icase.sh [new file with mode: 0755]
tree-walk.c
index 3571a1b7cee5afaf2a3cde0b0f549065e5eb26ab..2c1f6f5f53cf372e9fbaa02a9433e6cb3b416332 100644 (file)
@@ -466,6 +466,10 @@ help ...`.
        globbing on individual pathspecs can be done using pathspec
        magic ":(glob)"
 
+--icase-pathspecs:
+       Add "icase" magic to all pathspec. This is equivalent to setting
+       the `GIT_ICASE_PATHSPECS` environment variable to `1`.
+
 GIT COMMANDS
 ------------
 
@@ -879,6 +883,10 @@ GIT_NOGLOB_PATHSPECS::
        Setting this variable to `1` will cause Git to treat all
        pathspecs as literal (aka "literal" magic).
 
+GIT_ICASE_PATHSPECS::
+       Setting this variable to `1` will cause Git to treat all
+       pathspecs as case-insensitive.
+
 
 Discussion[[Discussion]]
 ------------------------
index a3d9029ce7cf42c9f6a1867640a84a437aa01b2e..13a64d3aac8aca7181a6735abdabd8ed6d28ddc0 100644 (file)
@@ -334,6 +334,9 @@ literal;;
        Wildcards in the pattern such as `*` or `?` are treated
        as literal characters.
 
+icase;;
+       Case insensitive match.
+
 glob;;
        Git treats the pattern as a shell glob suitable for
        consumption by fnmatch(3) with the FNM_PATHNAME flag:
index 1dab2464f6bd4c848b3068c3e12df378b0540197..9d52fc7915a5061a757ce937f24e894d65cb0dff 100644 (file)
@@ -544,12 +544,14 @@ int cmd_add(int argc, const char **argv, const char *prefix)
                GUARD_PATHSPEC(&pathspec,
                               PATHSPEC_FROMTOP |
                               PATHSPEC_LITERAL |
-                              PATHSPEC_GLOB);
+                              PATHSPEC_GLOB |
+                              PATHSPEC_ICASE);
 
                for (i = 0; i < pathspec.nr; i++) {
                        const char *path = pathspec.items[i].match;
                        if (!seen[i] &&
-                           ((pathspec.items[i].magic & PATHSPEC_GLOB) ||
+                           ((pathspec.items[i].magic &
+                             (PATHSPEC_GLOB | PATHSPEC_ICASE)) ||
                             !file_exists(path))) {
                                if (ignore_missing) {
                                        int dtype = DT_UNKNOWN;
index 7882352a9b06534592aa98732ec2841928d006a5..f6d82151813a44cedd5122ea3962e8b9b163114a 100644 (file)
@@ -173,7 +173,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
         * cannot be lifted until it is converted to use
         * match_pathspec_depth() or tree_entry_interesting()
         */
-       parse_pathspec(&pathspec, PATHSPEC_GLOB,
+       parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE,
                       PATHSPEC_PREFER_CWD,
                       prefix, argv + 1);
        for (i = 0; i < pathspec.nr; i++)
diff --git a/cache.h b/cache.h
index dc4d2ee22dc6f34cb126f0a892cd4220cb4c039b..3cff825d5c369f20dae01ab2dabedd98fc4381c1 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -369,6 +369,7 @@ static inline enum object_type object_type(unsigned int mode)
 #define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS"
 #define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS"
 #define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS"
+#define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS"
 
 /*
  * This environment variable is expected to contain a boolean indicating
diff --git a/dir.c b/dir.c
index 076bd462e58828251bac3e63ce83ce7be7fdd4c8..8543736debe797257b69b3805e8592d60d4ae8df 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -57,7 +57,7 @@ inline int git_fnmatch(const struct pathspec_item *item,
                       int prefix)
 {
        if (prefix > 0) {
-               if (strncmp(pattern, string, prefix))
+               if (ps_strncmp(item, pattern, string, prefix))
                        return FNM_NOMATCH;
                pattern += prefix;
                string += prefix;
@@ -66,14 +66,18 @@ inline int git_fnmatch(const struct pathspec_item *item,
                int pattern_len = strlen(++pattern);
                int string_len = strlen(string);
                return string_len < pattern_len ||
-                      strcmp(pattern,
-                             string + string_len - pattern_len);
+                       ps_strcmp(item, pattern,
+                                 string + string_len - pattern_len);
        }
        if (item->magic & PATHSPEC_GLOB)
-               return wildmatch(pattern, string, WM_PATHNAME, NULL);
+               return wildmatch(pattern, string,
+                                WM_PATHNAME |
+                                (item->magic & PATHSPEC_ICASE ? WM_CASEFOLD : 0),
+                                NULL);
        else
                /* wildmatch has not learned no FNM_PATHNAME mode yet */
-               return fnmatch(pattern, string, 0);
+               return fnmatch(pattern, string,
+                              item->magic & PATHSPEC_ICASE ? FNM_CASEFOLD : 0);
 }
 
 static int fnmatch_icase_mem(const char *pattern, int patternlen,
@@ -110,16 +114,27 @@ static size_t common_prefix_len(const struct pathspec *pathspec)
        int n;
        size_t max = 0;
 
+       /*
+        * ":(icase)path" is treated as a pathspec full of
+        * wildcard. In other words, only prefix is considered common
+        * prefix. If the pathspec is abc/foo abc/bar, running in
+        * subdir xyz, the common prefix is still xyz, not xuz/abc as
+        * in non-:(icase).
+        */
        GUARD_PATHSPEC(pathspec,
                       PATHSPEC_FROMTOP |
                       PATHSPEC_MAXDEPTH |
                       PATHSPEC_LITERAL |
-                      PATHSPEC_GLOB);
+                      PATHSPEC_GLOB |
+                      PATHSPEC_ICASE);
 
        for (n = 0; n < pathspec->nr; n++) {
-               size_t i = 0, len = 0;
-               while (i < pathspec->items[n].nowildcard_len &&
-                      (n == 0 || i < max)) {
+               size_t i = 0, len = 0, item_len;
+               if (pathspec->items[n].magic & PATHSPEC_ICASE)
+                       item_len = pathspec->items[n].prefix;
+               else
+                       item_len = pathspec->items[n].nowildcard_len;
+               while (i < item_len && (n == 0 || i < max)) {
                        char c = pathspec->items[n].match[i];
                        if (c != pathspec->items[0].match[i])
                                break;
@@ -196,11 +211,44 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
        const char *match = item->match + prefix;
        int matchlen = item->len - prefix;
 
+       /*
+        * The normal call pattern is:
+        * 1. prefix = common_prefix_len(ps);
+        * 2. prune something, or fill_directory
+        * 3. match_pathspec_depth()
+        *
+        * 'prefix' at #1 may be shorter than the command's prefix and
+        * it's ok for #2 to match extra files. Those extras will be
+        * trimmed at #3.
+        *
+        * Suppose the pathspec is 'foo' and '../bar' running from
+        * subdir 'xyz'. The common prefix at #1 will be empty, thanks
+        * to "../". We may have xyz/foo _and_ XYZ/foo after #2. The
+        * user does not want XYZ/foo, only the "foo" part should be
+        * case-insensitive. We need to filter out XYZ/foo here. In
+        * other words, we do not trust the caller on comparing the
+        * prefix part when :(icase) is involved. We do exact
+        * comparison ourselves.
+        *
+        * Normally the caller (common_prefix_len() in fact) does
+        * _exact_ matching on name[-prefix+1..-1] and we do not need
+        * to check that part. Be defensive and check it anyway, in
+        * case common_prefix_len is changed, or a new caller is
+        * introduced that does not use common_prefix_len.
+        *
+        * If the penalty turns out too high when prefix is really
+        * long, maybe change it to
+        * strncmp(match, name, item->prefix - prefix)
+        */
+       if (item->prefix && (item->magic & PATHSPEC_ICASE) &&
+           strncmp(item->match, name - prefix, item->prefix))
+               return 0;
+
        /* If the match was just the prefix, we matched */
        if (!*match)
                return MATCHED_RECURSIVELY;
 
-       if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
+       if (matchlen <= namelen && !ps_strncmp(item, match, name, matchlen)) {
                if (matchlen == namelen)
                        return MATCHED_EXACTLY;
 
@@ -241,7 +289,8 @@ int match_pathspec_depth(const struct pathspec *ps,
                       PATHSPEC_FROMTOP |
                       PATHSPEC_MAXDEPTH |
                       PATHSPEC_LITERAL |
-                      PATHSPEC_GLOB);
+                      PATHSPEC_GLOB |
+                      PATHSPEC_ICASE);
 
        if (!ps->nr) {
                if (!ps->recursive ||
@@ -1301,7 +1350,8 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
                               PATHSPEC_FROMTOP |
                               PATHSPEC_MAXDEPTH |
                               PATHSPEC_LITERAL |
-                              PATHSPEC_GLOB);
+                              PATHSPEC_GLOB |
+                              PATHSPEC_ICASE);
 
        if (has_symlink_leading_path(path, len))
                return dir->nr;
diff --git a/git.c b/git.c
index 25096755f44be3a5b297d09885570cd01d0a5188..cebf8827da1a614d601a500564cf77e7f23eda10 100644 (file)
--- a/git.c
+++ b/git.c
@@ -155,6 +155,10 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
                        setenv(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, "1", 1);
                        if (envchanged)
                                *envchanged = 1;
+               } else if (!strcmp(cmd, "--icase-pathspecs")) {
+                       setenv(GIT_ICASE_PATHSPECS_ENVIRONMENT, "1", 1);
+                       if (envchanged)
+                               *envchanged = 1;
                } else if (!strcmp(cmd, "--shallow-file")) {
                        (*argv)++;
                        (*argc)--;
index c1e69178979c9c13da9566d24e59731809051a07..d9f41432221ab5f886aaf0a6e306cb263892ab33 100644 (file)
@@ -57,7 +57,6 @@ char *find_pathspecs_matching_against_index(const struct pathspec *pathspec)
  *
  * Possible future magic semantics include stuff like:
  *
- *     { PATHSPEC_ICASE, '\0', "icase" },
  *     { PATHSPEC_RECURSIVE, '*', "recursive" },
  *     { PATHSPEC_REGEXP, '\0', "regexp" },
  *
@@ -71,6 +70,7 @@ static struct pathspec_magic {
        { PATHSPEC_FROMTOP, '/', "top" },
        { PATHSPEC_LITERAL,   0, "literal" },
        { PATHSPEC_GLOB,   '\0', "glob" },
+       { PATHSPEC_ICASE,  '\0', "icase" },
 };
 
 /*
@@ -95,6 +95,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
        static int literal_global = -1;
        static int glob_global = -1;
        static int noglob_global = -1;
+       static int icase_global = -1;
        unsigned magic = 0, short_magic = 0, global_magic = 0;
        const char *copyfrom = elt, *long_magic_end = NULL;
        char *match;
@@ -116,6 +117,12 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
        if (glob_global && noglob_global)
                die(_("global 'glob' and 'noglob' pathspec settings are incompatible"));
 
+
+       if (icase_global < 0)
+               icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
+       if (icase_global)
+               global_magic |= PATHSPEC_ICASE;
+
        if ((global_magic & PATHSPEC_LITERAL) &&
            (global_magic & ~PATHSPEC_LITERAL))
                die(_("global 'literal' pathspec setting is incompatible "
index cdf2fa39f6f3c0d81f99d7bf28c65793d848d61c..04b632fa3326defd3d119d88beadb296f8e4a722 100644 (file)
@@ -6,11 +6,13 @@
 #define PATHSPEC_MAXDEPTH      (1<<1)
 #define PATHSPEC_LITERAL       (1<<2)
 #define PATHSPEC_GLOB          (1<<3)
+#define PATHSPEC_ICASE         (1<<4)
 #define PATHSPEC_ALL_MAGIC       \
        (PATHSPEC_FROMTOP       | \
         PATHSPEC_MAXDEPTH      | \
         PATHSPEC_LITERAL       | \
-        PATHSPEC_GLOB)
+        PATHSPEC_GLOB          | \
+        PATHSPEC_ICASE)
 
 #define PATHSPEC_ONESTAR 1     /* the pathspec pattern sastisfies GFNM_ONESTAR */
 
@@ -65,6 +67,24 @@ extern void parse_pathspec(struct pathspec *pathspec,
 extern void copy_pathspec(struct pathspec *dst, const struct pathspec *src);
 extern void free_pathspec(struct pathspec *);
 
+static inline int ps_strncmp(const struct pathspec_item *item,
+                            const char *s1, const char *s2, size_t n)
+{
+       if (item->magic & PATHSPEC_ICASE)
+               return strncasecmp(s1, s2, n);
+       else
+               return strncmp(s1, s2, n);
+}
+
+static inline int ps_strcmp(const struct pathspec_item *item,
+                           const char *s1, const char *s2)
+{
+       if (item->magic & PATHSPEC_ICASE)
+               return strcasecmp(s1, s2);
+       else
+               return strcmp(s1, s2);
+}
+
 extern char *find_pathspecs_matching_against_index(const struct pathspec *pathspec);
 extern void add_pathspec_matches_against_index(const struct pathspec *pathspec, char *seen);
 extern const char *check_path_for_gitlink(const char *path);
diff --git a/t/t6131-pathspec-icase.sh b/t/t6131-pathspec-icase.sh
new file mode 100755 (executable)
index 0000000..3215eef
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+test_description='test case insensitive pathspec limiting'
+. ./test-lib.sh
+
+test_expect_success 'create commits with glob characters' '
+       test_commit bar bar &&
+       test_commit bAr bAr &&
+       test_commit BAR BAR &&
+       mkdir foo &&
+       test_commit foo/bar foo/bar &&
+       test_commit foo/bAr foo/bAr &&
+       test_commit foo/BAR foo/BAR &&
+       mkdir fOo &&
+       test_commit fOo/bar fOo/bar &&
+       test_commit fOo/bAr fOo/bAr &&
+       test_commit fOo/BAR fOo/BAR &&
+       mkdir FOO &&
+       test_commit FOO/bar FOO/bar &&
+       test_commit FOO/bAr FOO/bAr &&
+       test_commit FOO/BAR FOO/BAR
+'
+
+test_expect_success 'tree_entry_interesting matches bar' '
+       echo bar >expect &&
+       git log --format=%s -- "bar" >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar' '
+       cat <<-EOF >expect &&
+       BAR
+       bAr
+       bar
+       EOF
+       git log --format=%s -- ":(icase)bar" >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar with prefix' '
+       cat <<-EOF >expect &&
+       fOo/BAR
+       fOo/bAr
+       fOo/bar
+       EOF
+       ( cd fOo && git log --format=%s -- ":(icase)bar" ) >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar with empty prefix' '
+       cat <<-EOF >expect &&
+       FOO/BAR
+       FOO/bAr
+       FOO/bar
+       fOo/BAR
+       fOo/bAr
+       fOo/bar
+       foo/BAR
+       foo/bAr
+       foo/bar
+       EOF
+       ( cd fOo && git log --format=%s -- ":(icase)../foo/bar" ) >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar' '
+       cat <<-EOF >expect &&
+       BAR
+       bAr
+       bar
+       EOF
+       git ls-files ":(icase)bar" >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar with prefix' '
+       cat <<-EOF >expect &&
+       fOo/BAR
+       fOo/bAr
+       fOo/bar
+       EOF
+       ( cd fOo && git ls-files --full-name ":(icase)bar" ) >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar with empty prefix' '
+       cat <<-EOF >expect &&
+       bar
+       fOo/BAR
+       fOo/bAr
+       fOo/bar
+       EOF
+       ( cd fOo && git ls-files --full-name ":(icase)bar" ../bar ) >actual &&
+       test_cmp expect actual
+'
+
+test_done
index a44f528b3beda45ed1ee2818a3193c14ae3aad99..c366852553d08ac9697c87a46375c47761b58c0c 100644 (file)
@@ -489,13 +489,25 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
        return retval;
 }
 
-static int match_entry(const struct name_entry *entry, int pathlen,
+static int match_entry(const struct pathspec_item *item,
+                      const struct name_entry *entry, int pathlen,
                       const char *match, int matchlen,
                       enum interesting *never_interesting)
 {
        int m = -1; /* signals that we haven't called strncmp() */
 
-       if (*never_interesting != entry_not_interesting) {
+       if (item->magic & PATHSPEC_ICASE)
+               /*
+                * "Never interesting" trick requires exact
+                * matching. We could do something clever with inexact
+                * matching, but it's trickier (and not to forget that
+                * strcasecmp is locale-dependent, at least in
+                * glibc). Just disable it for now. It can't be worse
+                * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
+                * pattern.
+                */
+               *never_interesting = entry_not_interesting;
+       else if (*never_interesting != entry_not_interesting) {
                /*
                 * We have not seen any match that sorts later
                 * than the current path.
@@ -541,7 +553,7 @@ static int match_entry(const struct name_entry *entry, int pathlen,
                 * we cheated and did not do strncmp(), so we do
                 * that here.
                 */
-               m = strncmp(match, entry->path, pathlen);
+               m = ps_strncmp(item, match, entry->path, pathlen);
 
        /*
         * If common part matched earlier then it is a hit,
@@ -549,15 +561,39 @@ static int match_entry(const struct name_entry *entry, int pathlen,
         * leading directory and is shorter than match.
         */
        if (!m)
+               /*
+                * match_entry does not check if the prefix part is
+                * matched case-sensitively. If the entry is a
+                * directory and part of prefix, it'll be rematched
+                * eventually by basecmp with special treatment for
+                * the prefix.
+                */
                return 1;
 
        return 0;
 }
 
-static int match_dir_prefix(const char *base,
+/* :(icase)-aware string compare */
+static int basecmp(const struct pathspec_item *item,
+                  const char *base, const char *match, int len)
+{
+       if (item->magic & PATHSPEC_ICASE) {
+               int ret, n = len > item->prefix ? item->prefix : len;
+               ret = strncmp(base, match, n);
+               if (ret)
+                       return ret;
+               base += n;
+               match += n;
+               len -= n;
+       }
+       return ps_strncmp(item, base, match, len);
+}
+
+static int match_dir_prefix(const struct pathspec_item *item,
+                           const char *base,
                            const char *match, int matchlen)
 {
-       if (strncmp(base, match, matchlen))
+       if (basecmp(item, base, match, matchlen))
                return 0;
 
        /*
@@ -594,7 +630,7 @@ static int match_wildcard_base(const struct pathspec_item *item,
                 */
                if (baselen >= matchlen) {
                        *matched = matchlen;
-                       return !strncmp(base, match, matchlen);
+                       return !basecmp(item, base, match, matchlen);
                }
 
                dirlen = matchlen;
@@ -607,7 +643,7 @@ static int match_wildcard_base(const struct pathspec_item *item,
                 * base ends with '/' so we are sure it really matches
                 * directory
                 */
-               if (strncmp(base, match, baselen))
+               if (basecmp(item, base, match, baselen))
                        return 0;
                *matched = baselen;
        } else
@@ -640,7 +676,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
                       PATHSPEC_FROMTOP |
                       PATHSPEC_MAXDEPTH |
                       PATHSPEC_LITERAL |
-                      PATHSPEC_GLOB);
+                      PATHSPEC_GLOB |
+                      PATHSPEC_ICASE);
 
        if (!ps->nr) {
                if (!ps->recursive ||
@@ -663,7 +700,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 
                if (baselen >= matchlen) {
                        /* If it doesn't match, move along... */
-                       if (!match_dir_prefix(base_str, match, matchlen))
+                       if (!match_dir_prefix(item, base_str, match, matchlen))
                                goto match_wildcards;
 
                        if (!ps->recursive ||
@@ -679,8 +716,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
                }
 
                /* Either there must be no base, or the base must match. */
-               if (baselen == 0 || !strncmp(base_str, match, baselen)) {
-                       if (match_entry(entry, pathlen,
+               if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
+                       if (match_entry(item, entry, pathlen,
                                        match + baselen, matchlen - baselen,
                                        &never_interesting))
                                return entry_interesting;