Merge branch 'jj/icase-directory'
authorJunio C Hamano <gitster@pobox.com>
Sat, 4 Dec 2010 00:10:34 +0000 (16:10 -0800)
committerJunio C Hamano <gitster@pobox.com>
Sat, 4 Dec 2010 00:10:34 +0000 (16:10 -0800)
* jj/icase-directory:
Support case folding in git fast-import when core.ignorecase=true
Support case folding for git add when core.ignorecase=true
Add case insensitivity support when using git ls-files
Add case insensitivity support for directories when using git status
Case insensitivity support for .gitignore via core.ignorecase
Add string comparison functions that respect the ignore_case variable.
Makefile & configure: add a NO_FNMATCH_CASEFOLD flag
Makefile & configure: add a NO_FNMATCH flag

Conflicts:
Makefile
config.mak.in
configure.ac
fast-import.c

Makefile
config.mak.in
configure.ac
dir.c
dir.h
fast-import.c
name-hash.c
read-cache.c
index 1d4241346594e5e0df7df7f766899d4ac76d84ff..29ebe70599aa9c01ddb84e5611ac08d441adef4c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -70,6 +70,11 @@ all::
 #
 # Define NO_STRTOK_R if you don't have strtok_r in the C library.
 #
+# Define NO_FNMATCH if you don't have fnmatch in the C library.
+#
+# Define NO_FNMATCH_CASEFOLD if your fnmatch function doesn't have the
+# FNM_CASEFOLD GNU extension.
+#
 # Define NO_LIBGEN_H if you don't have libgen.h.
 #
 # Define NEEDS_LIBGEN if your libgen needs -lgen when linking
@@ -849,6 +854,7 @@ ifeq ($(uname_S),SunOS)
        NO_MKDTEMP = YesPlease
        NO_MKSTEMPS = YesPlease
        NO_REGEX = YesPlease
+       NO_FNMATCH_CASEFOLD = YesPlease
        ifeq ($(uname_R),5.6)
                SOCKLEN_T = int
                NO_HSTRERROR = YesPlease
@@ -1055,6 +1061,7 @@ ifeq ($(uname_S),Windows)
        NO_STRCASESTR = YesPlease
        NO_STRLCPY = YesPlease
        NO_STRTOK_R = YesPlease
+       NO_FNMATCH = YesPlease
        NO_MEMMEM = YesPlease
        # NEEDS_LIBICONV = YesPlease
        NO_ICONV = YesPlease
@@ -1084,8 +1091,8 @@ ifeq ($(uname_S),Windows)
        AR = compat/vcbuild/scripts/lib.pl
        CFLAGS =
        BASIC_CFLAGS = -nologo -I. -I../zlib -Icompat/vcbuild -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
-       COMPAT_OBJS = compat/msvc.o compat/fnmatch/fnmatch.o compat/winansi.o compat/win32/pthread.o compat/win32/syslog.o compat/win32/sys/poll.o
-       COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/fnmatch -Icompat/regex -Icompat/fnmatch -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
+       COMPAT_OBJS = compat/msvc.o compat/winansi.o compat/win32/pthread.o compat/win32/syslog.o compat/win32/sys/poll.o
+       COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
        BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib
        EXTLIBS = advapi32.lib shell32.lib wininet.lib ws2_32.lib
        PTHREAD_LIBS =
@@ -1129,6 +1136,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
        NO_STRCASESTR = YesPlease
        NO_STRLCPY = YesPlease
        NO_STRTOK_R = YesPlease
+       NO_FNMATCH = YesPlease
        NO_MEMMEM = YesPlease
        NEEDS_LIBICONV = YesPlease
        OLD_ICONV = YesPlease
@@ -1152,9 +1160,9 @@ ifneq (,$(findstring MINGW,$(uname_S)))
        NO_INET_PTON = YesPlease
        NO_INET_NTOP = YesPlease
        NO_POSIX_GOODIES = UnfortunatelyYes
-       COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/fnmatch -Icompat/win32
+       COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/win32
        COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
-       COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/winansi.o \
+       COMPAT_OBJS += compat/mingw.o compat/winansi.o \
                compat/win32/pthread.o compat/win32/syslog.o \
                compat/win32/sys/poll.o
        EXTLIBS += -lws2_32
@@ -1364,6 +1372,17 @@ ifdef NO_STRTOK_R
        COMPAT_CFLAGS += -DNO_STRTOK_R
        COMPAT_OBJS += compat/strtok_r.o
 endif
+ifdef NO_FNMATCH
+       COMPAT_CFLAGS += -Icompat/fnmatch
+       COMPAT_CFLAGS += -DNO_FNMATCH
+       COMPAT_OBJS += compat/fnmatch/fnmatch.o
+else
+ifdef NO_FNMATCH_CASEFOLD
+       COMPAT_CFLAGS += -Icompat/fnmatch
+       COMPAT_CFLAGS += -DNO_FNMATCH_CASEFOLD
+       COMPAT_OBJS += compat/fnmatch/fnmatch.o
+endif
+endif
 ifdef NO_SETENV
        COMPAT_CFLAGS += -DNO_SETENV
        COMPAT_OBJS += compat/setenv.o
index a0c34eec15773f0f3e4d2b4e12f0c76e722ede95..56343bab505b59289d0a5334a2a6ac840d3f10d1 100644 (file)
@@ -47,6 +47,8 @@ NO_C99_FORMAT=@NO_C99_FORMAT@
 NO_HSTRERROR=@NO_HSTRERROR@
 NO_STRCASESTR=@NO_STRCASESTR@
 NO_STRTOK_R=@NO_STRTOK_R@
+NO_FNMATCH=@NO_FNMATCH@
+NO_FNMATCH_CASEFOLD=@NO_FNMATCH_CASEFOLD@
 NO_MEMMEM=@NO_MEMMEM@
 NO_STRLCPY=@NO_STRLCPY@
 NO_UINTMAX_T=@NO_UINTMAX_T@
index c5bc9a05f3730dc42f6a0dbf35551107886d7b0c..33dd46262bfb7e6d9db80446fa06ff0ba819f5df 100644 (file)
@@ -830,6 +830,34 @@ GIT_CHECK_FUNC(strtok_r,
 [NO_STRTOK_R=YesPlease])
 AC_SUBST(NO_STRTOK_R)
 #
+# Define NO_FNMATCH if you don't have fnmatch
+GIT_CHECK_FUNC(fnmatch,
+[NO_FNMATCH=],
+[NO_FNMATCH=YesPlease])
+AC_SUBST(NO_FNMATCH)
+#
+# Define NO_FNMATCH_CASEFOLD if your fnmatch function doesn't have the
+# FNM_CASEFOLD GNU extension.
+AC_CACHE_CHECK([whether the fnmatch function supports the FNMATCH_CASEFOLD GNU extension],
+ [ac_cv_c_excellent_fnmatch], [
+AC_EGREP_CPP(yippeeyeswehaveit,
+       AC_LANG_PROGRAM([
+#include <fnmatch.h>
+],
+[#ifdef FNM_CASEFOLD
+yippeeyeswehaveit
+#endif
+]),
+       [ac_cv_c_excellent_fnmatch=yes],
+       [ac_cv_c_excellent_fnmatch=no])
+])
+if test $ac_cv_c_excellent_fnmatch = yes; then
+       NO_FNMATCH_CASEFOLD=
+else
+       NO_FNMATCH_CASEFOLD=YesPlease
+fi
+AC_SUBST(NO_FNMATCH_CASEFOLD)
+#
 # Define NO_MEMMEM if you don't have memmem.
 GIT_CHECK_FUNC(memmem,
 [NO_MEMMEM=],
diff --git a/dir.c b/dir.c
index b2dfb69eb5606a7538cc5e1876a91f703ec4969c..852e60f2a8d4784bf73d044f89fe822538121286 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -18,6 +18,22 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, in
        int check_only, const struct path_simplify *simplify);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
+/* helper string functions with support for the ignore_case flag */
+int strcmp_icase(const char *a, const char *b)
+{
+       return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
+}
+
+int strncmp_icase(const char *a, const char *b, size_t count)
+{
+       return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
+}
+
+int fnmatch_icase(const char *pattern, const char *string, int flags)
+{
+       return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
+}
+
 static int common_prefix(const char **pathspec)
 {
        const char *path, *slash, *next;
@@ -91,16 +107,30 @@ static int match_one(const char *match, const char *name, int namelen)
        if (!*match)
                return MATCHED_RECURSIVELY;
 
-       for (;;) {
-               unsigned char c1 = *match;
-               unsigned char c2 = *name;
-               if (c1 == '\0' || is_glob_special(c1))
-                       break;
-               if (c1 != c2)
-                       return 0;
-               match++;
-               name++;
-               namelen--;
+       if (ignore_case) {
+               for (;;) {
+                       unsigned char c1 = tolower(*match);
+                       unsigned char c2 = tolower(*name);
+                       if (c1 == '\0' || is_glob_special(c1))
+                               break;
+                       if (c1 != c2)
+                               return 0;
+                       match++;
+                       name++;
+                       namelen--;
+               }
+       } else {
+               for (;;) {
+                       unsigned char c1 = *match;
+                       unsigned char c2 = *name;
+                       if (c1 == '\0' || is_glob_special(c1))
+                               break;
+                       if (c1 != c2)
+                               return 0;
+                       match++;
+                       name++;
+                       namelen--;
+               }
        }
 
 
@@ -109,8 +139,8 @@ static int match_one(const char *match, const char *name, int namelen)
         * we need to match by fnmatch
         */
        matchlen = strlen(match);
-       if (strncmp(match, name, matchlen))
-               return !fnmatch(match, name, 0) ? MATCHED_FNMATCH : 0;
+       if (strncmp_icase(match, name, matchlen))
+               return !fnmatch_icase(match, name, 0) ? MATCHED_FNMATCH : 0;
 
        if (namelen == matchlen)
                return MATCHED_EXACTLY;
@@ -375,14 +405,14 @@ int excluded_from_list(const char *pathname,
                        if (x->flags & EXC_FLAG_NODIR) {
                                /* match basename */
                                if (x->flags & EXC_FLAG_NOWILDCARD) {
-                                       if (!strcmp(exclude, basename))
+                                       if (!strcmp_icase(exclude, basename))
                                                return to_exclude;
                                } else if (x->flags & EXC_FLAG_ENDSWITH) {
                                        if (x->patternlen - 1 <= pathlen &&
-                                           !strcmp(exclude + 1, pathname + pathlen - x->patternlen + 1))
+                                           !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
                                                return to_exclude;
                                } else {
-                                       if (fnmatch(exclude, basename, 0) == 0)
+                                       if (fnmatch_icase(exclude, basename, 0) == 0)
                                                return to_exclude;
                                }
                        }
@@ -397,14 +427,14 @@ int excluded_from_list(const char *pathname,
 
                                if (pathlen < baselen ||
                                    (baselen && pathname[baselen-1] != '/') ||
-                                   strncmp(pathname, x->base, baselen))
+                                   strncmp_icase(pathname, x->base, baselen))
                                    continue;
 
                                if (x->flags & EXC_FLAG_NOWILDCARD) {
-                                       if (!strcmp(exclude, pathname + baselen))
+                                       if (!strcmp_icase(exclude, pathname + baselen))
                                                return to_exclude;
                                } else {
-                                       if (fnmatch(exclude, pathname+baselen,
+                                       if (fnmatch_icase(exclude, pathname+baselen,
                                                    FNM_PATHNAME) == 0)
                                            return to_exclude;
                                }
@@ -469,6 +499,39 @@ enum exist_status {
        index_gitdir
 };
 
+/*
+ * Do not use the alphabetically stored index to look up
+ * the directory name; instead, use the case insensitive
+ * name hash.
+ */
+static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
+{
+       struct cache_entry *ce = index_name_exists(&the_index, dirname, len + 1, ignore_case);
+       unsigned char endchar;
+
+       if (!ce)
+               return index_nonexistent;
+       endchar = ce->name[len];
+
+       /*
+        * The cache_entry structure returned will contain this dirname
+        * and possibly additional path components.
+        */
+       if (endchar == '/')
+               return index_directory;
+
+       /*
+        * If there are no additional path components, then this cache_entry
+        * represents a submodule.  Submodules, despite being directories,
+        * are stored in the cache without a closing slash.
+        */
+       if (!endchar && S_ISGITLINK(ce->ce_mode))
+               return index_gitdir;
+
+       /* This should never be hit, but it exists just in case. */
+       return index_nonexistent;
+}
+
 /*
  * The index sorts alphabetically by entry name, which
  * means that a gitlink sorts as '\0' at the end, while
@@ -478,7 +541,12 @@ enum exist_status {
  */
 static enum exist_status directory_exists_in_index(const char *dirname, int len)
 {
-       int pos = cache_name_pos(dirname, len);
+       int pos;
+
+       if (ignore_case)
+               return directory_exists_in_index_icase(dirname, len);
+
+       pos = cache_name_pos(dirname, len);
        if (pos < 0)
                pos = -pos-1;
        while (pos < active_nr) {
diff --git a/dir.h b/dir.h
index 278d84cdf7df01c33a45e6dc9c20592cecff9d85..b3e2104b9f231fbed88f98df12ad48d0d9992130 100644 (file)
--- a/dir.h
+++ b/dir.h
@@ -101,4 +101,8 @@ extern int remove_dir_recursively(struct strbuf *path, int flag);
 /* tries to remove the path with empty directories along it, ignores ENOENT */
 extern int remove_path(const char *path);
 
+extern int strcmp_icase(const char *a, const char *b);
+extern int strncmp_icase(const char *a, const char *b, size_t count);
+extern int fnmatch_icase(const char *pattern, const char *string, int flags);
+
 #endif
index 8263dbe841a6ac738c3a5a31006f3fdffcbc7e77..534c68db6fe4d0c34c38e632bd2c442966cf8663 100644 (file)
@@ -156,6 +156,7 @@ Format of STDIN stream:
 #include "csum-file.h"
 #include "quote.h"
 #include "exec_cmd.h"
+#include "dir.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -1478,7 +1479,7 @@ static int tree_content_set(
        t = root->tree;
        for (i = 0; i < t->entry_count; i++) {
                e = t->entries[i];
-               if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
+               if (e->name->str_len == n && !strncmp_icase(p, e->name->str_dat, n)) {
                        if (!slash1) {
                                if (!S_ISDIR(mode)
                                                && e->versions[1].mode == mode
@@ -1547,7 +1548,7 @@ static int tree_content_remove(
        t = root->tree;
        for (i = 0; i < t->entry_count; i++) {
                e = t->entries[i];
-               if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
+               if (e->name->str_len == n && !strncmp_icase(p, e->name->str_dat, n)) {
                        if (slash1 && !S_ISDIR(e->versions[1].mode))
                                /*
                                 * If p names a file in some subdirectory, and a
@@ -1608,7 +1609,7 @@ static int tree_content_get(
        t = root->tree;
        for (i = 0; i < t->entry_count; i++) {
                e = t->entries[i];
-               if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
+               if (e->name->str_len == n && !strncmp_icase(p, e->name->str_dat, n)) {
                        if (!slash1) {
                                memcpy(leaf, e, sizeof(*leaf));
                                if (e->tree && is_null_sha1(e->versions[1].sha1))
index 0031d78e8c98a32d61cd0dc0f939a033e24ed890..c6b6a3fe4cd94e48893b172c17b6e7df3bfa36f8 100644 (file)
@@ -32,6 +32,42 @@ static unsigned int hash_name(const char *name, int namelen)
        return hash;
 }
 
+static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce)
+{
+       /*
+        * Throw each directory component in the hash for quick lookup
+        * during a git status. Directory components are stored with their
+        * closing slash.  Despite submodules being a directory, they never
+        * reach this point, because they are stored without a closing slash
+        * in the cache.
+        *
+        * Note that the cache_entry stored with the directory does not
+        * represent the directory itself.  It is a pointer to an existing
+        * filename, and its only purpose is to represent existence of the
+        * directory in the cache.  It is very possible multiple directory
+        * hash entries may point to the same cache_entry.
+        */
+       unsigned int hash;
+       void **pos;
+
+       const char *ptr = ce->name;
+       while (*ptr) {
+               while (*ptr && *ptr != '/')
+                       ++ptr;
+               if (*ptr == '/') {
+                       ++ptr;
+                       hash = hash_name(ce->name, ptr - ce->name);
+                       if (!lookup_hash(hash, &istate->name_hash)) {
+                               pos = insert_hash(hash, ce, &istate->name_hash);
+                               if (pos) {
+                                       ce->next = *pos;
+                                       *pos = ce;
+                               }
+                       }
+               }
+       }
+}
+
 static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 {
        void **pos;
@@ -47,6 +83,9 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
                ce->next = *pos;
                *pos = ce;
        }
+
+       if (ignore_case)
+               hash_index_entry_directories(istate, ce);
 }
 
 static void lazy_init_name_hash(struct index_state *istate)
@@ -97,7 +136,21 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
        if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
                return 1;
 
-       return icase && slow_same_name(name, namelen, ce->name, len);
+       if (!icase)
+               return 0;
+
+       /*
+        * If the entry we're comparing is a filename (no trailing slash), then compare
+        * the lengths exactly.
+        */
+       if (name[namelen - 1] != '/')
+               return slow_same_name(name, namelen, ce->name, len);
+
+       /*
+        * For a directory, we point to an arbitrary cache_entry filename.  Just
+        * make sure the directory portion matches.
+        */
+       return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len);
 }
 
 struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
@@ -115,5 +168,22 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
                }
                ce = ce->next;
        }
+
+       /*
+        * Might be a submodule.  Despite submodules being directories,
+        * they are stored in the name hash without a closing slash.
+        * When ignore_case is 1, directories are stored in the name hash
+        * with their closing slash.
+        *
+        * The side effect of this storage technique is we have need to
+        * remove the slash from name and perform the lookup again without
+        * the slash.  If a match is made, S_ISGITLINK(ce->mode) will be
+        * true.
+        */
+       if (icase && name[namelen - 1] == '/') {
+               ce = index_name_exists(istate, name, namelen - 1, icase);
+               if (ce && S_ISGITLINK(ce->ce_mode))
+                       return ce;
+       }
        return NULL;
 }
index 1f42473e8070a05ada8c56b0d60537227a5223ec..4f2e890b01b0c27ef2e49080e1fd34bf67e969c7 100644 (file)
@@ -608,6 +608,29 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
                ce->ce_mode = ce_mode_from_stat(ent, st_mode);
        }
 
+       /* When core.ignorecase=true, determine if a directory of the same name but differing
+        * case already exists within the Git repository.  If it does, ensure the directory
+        * case of the file being added to the repository matches (is folded into) the existing
+        * entry's directory case.
+        */
+       if (ignore_case) {
+               const char *startPtr = ce->name;
+               const char *ptr = startPtr;
+               while (*ptr) {
+                       while (*ptr && *ptr != '/')
+                               ++ptr;
+                       if (*ptr == '/') {
+                               struct cache_entry *foundce;
+                               ++ptr;
+                               foundce = index_name_exists(&the_index, ce->name, ptr - ce->name, ignore_case);
+                               if (foundce) {
+                                       memcpy((void *)startPtr, foundce->name + (startPtr - ce->name), ptr - startPtr);
+                                       startPtr = ptr;
+                               }
+                       }
+               }
+       }
+
        alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
        if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, st, ce_option)) {
                /* Nothing changed, really */