From: Junio C Hamano Date: Mon, 1 Apr 2013 15:59:53 +0000 (-0700) Subject: Merge branch 'kb/name-hash' X-Git-Tag: v1.8.3-rc0~146 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/c044bed8f0ed0275792cf66201579e42c0de7171?ds=inline;hp=-c Merge branch 'kb/name-hash' The code to keep track of what directory names are known to Git on platforms with case insensitive filesystems can get confused upon a hash collision between these pathnames and looped forever. * kb/name-hash: name-hash.c: fix endless loop with core.ignorecase=true --- c044bed8f0ed0275792cf66201579e42c0de7171 diff --combined cache.h index bcdb3edbc4,2d938eae23..ec2fd7a304 --- a/cache.h +++ b/cache.h @@@ -34,7 -34,6 +34,7 @@@ int git_inflate(git_zstream *, int flus void git_deflate_init(git_zstream *, int level); void git_deflate_init_gzip(git_zstream *, int level); +void git_deflate_init_raw(git_zstream *, int level); void git_deflate_end(git_zstream *); int git_deflate_abort(git_zstream *); int git_deflate_end_gently(git_zstream *); @@@ -132,7 -131,6 +132,6 @@@ struct cache_entry unsigned int ce_namelen; unsigned char sha1[20]; struct cache_entry *next; - struct cache_entry *dir_next; char name[FLEX_ARRAY]; /* more */ }; @@@ -268,25 -266,15 +267,15 @@@ struct index_state unsigned name_hash_initialized : 1, initialized : 1; struct hash_table name_hash; + struct hash_table dir_hash; }; extern struct index_state the_index; /* Name hashing */ extern void add_name_hash(struct index_state *istate, struct cache_entry *ce); - /* - * We don't actually *remove* it, we can just mark it invalid so that - * we won't find it in lookups. - * - * Not only would we have to search the lists (simple enough), but - * we'd also have to rehash other hash buckets in case this makes the - * hash bucket empty (common). So it's much better to just mark - * it. - */ - static inline void remove_name_hash(struct cache_entry *ce) - { - ce->ce_flags |= CE_UNHASHED; - } + extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce); + extern void free_name_hash(struct index_state *istate); #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS @@@ -342,11 -330,9 +331,11 @@@ static inline enum object_type object_t OBJ_BLOB; } +/* Double-check local_repo_env below if you add to this list. */ #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define GIT_NAMESPACE_ENVIRONMENT "GIT_NAMESPACE" #define GIT_WORK_TREE_ENVIRONMENT "GIT_WORK_TREE" +#define GIT_PREFIX_ENVIRONMENT "GIT_PREFIX" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" #define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY" #define INDEX_ENVIRONMENT "GIT_INDEX_FILE" @@@ -365,27 -351,15 +354,27 @@@ #define GIT_NOTES_DISPLAY_REF_ENVIRONMENT "GIT_NOTES_DISPLAY_REF" #define GIT_NOTES_REWRITE_REF_ENVIRONMENT "GIT_NOTES_REWRITE_REF" #define GIT_NOTES_REWRITE_MODE_ENVIRONMENT "GIT_NOTES_REWRITE_MODE" +#define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS" /* - * Repository-local GIT_* environment variables - * The array is NULL-terminated to simplify its usage in contexts such - * environment creation or simple walk of the list. - * The number of non-NULL entries is available as a macro. + * This environment variable is expected to contain a boolean indicating + * whether we should or should not treat: + * + * GIT_DIR=foo.git git ... + * + * as if GIT_WORK_TREE=. was given. It's not expected that users will make use + * of this, but we use it internally to communicate to sub-processes that we + * are in a bare repo. If not set, defaults to true. + */ +#define GIT_IMPLICIT_WORK_TREE_ENVIRONMENT "GIT_IMPLICIT_WORK_TREE" + +/* + * Repository-local GIT_* environment variables; these will be cleared + * when git spawns a sub-process that runs inside another repository. + * The array is NULL-terminated, which makes it easy to pass in the "env" + * parameter of a run-command invocation, or to do a simple walk. */ -#define LOCAL_REPO_ENV_SIZE 9 -extern const char *const local_repo_env[LOCAL_REPO_ENV_SIZE + 1]; +extern const char * const local_repo_env[]; extern int is_bare_repository_cfg; extern int is_bare_repository(void); @@@ -488,8 -462,6 +477,8 @@@ extern int index_name_is_other(const st extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int); extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int); +#define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ + struct pathspec { const char **raw; /* get_pathspec() result, not freed by free_pathspec() */ int nr; @@@ -499,8 -471,7 +488,8 @@@ struct pathspec_item { const char *match; int len; - unsigned int use_wildcard:1; + int nowildcard_len; + int flags; } *items; }; @@@ -508,8 -479,6 +497,8 @@@ extern int init_pathspec(struct pathspe extern void free_pathspec(struct pathspec *); extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec); +extern int limit_pathspec_to_literal(void); + #define HASH_WRITE_OBJECT 1 #define HASH_FORMAT_CHECK 2 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); @@@ -550,7 -519,6 +539,7 @@@ extern int delete_ref(const char *, con /* Environment bits from configuration mechanism */ extern int trust_executable_bit; extern int trust_ctime; +extern int check_stat; extern int quote_path_fully; extern int has_symlinks; extern int minimum_abbrev, default_abbrev; @@@ -577,12 -545,6 +566,12 @@@ extern int core_preload_index extern int core_apply_sparse_checkout; extern int precomposed_unicode; +/* + * The character that begins a commented line in user-editable file + * that is subject to stripspace. + */ +extern char comment_line_char; + enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, BRANCH_TRACK_NEVER = 0, @@@ -741,11 -703,10 +730,11 @@@ static inline int is_absolute_path(cons } int is_directory(const char *); const char *real_path(const char *path); +const char *real_path_if_valid(const char *path); const char *absolute_path(const char *path); const char *relative_path(const char *abs, const char *base); int normalize_path_copy(char *dst, const char *src); -int longest_ancestor_length(const char *path, const char *prefix_list); +int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); @@@ -1027,20 -988,15 +1016,20 @@@ struct ref unsigned char old_sha1[20]; unsigned char new_sha1[20]; char *symref; - unsigned int force:1, + unsigned int + force:1, + forced_update:1, merge:1, - nonfastforward:1, - deletion:1; + deletion:1, + matched:1; enum { REF_STATUS_NONE = 0, REF_STATUS_OK, REF_STATUS_REJECT_NONFASTFORWARD, + REF_STATUS_REJECT_ALREADY_EXISTS, REF_STATUS_REJECT_NODELETE, + REF_STATUS_REJECT_FETCH_FIRST, + REF_STATUS_REJECT_NEEDS_FORCE, REF_STATUS_UPTODATE, REF_STATUS_REMOTE_REJECT, REF_STATUS_EXPECTING_REPORT @@@ -1064,9 -1020,7 +1053,9 @@@ struct extra_have_objects int nr, alloc; unsigned char (*array)[20]; }; -extern struct ref **get_remote_heads(int in, struct ref **list, unsigned int flags, struct extra_have_objects *); +extern struct ref **get_remote_heads(int in, char *src_buf, size_t src_len, + struct ref **list, unsigned int flags, + struct extra_have_objects *); extern int server_supports(const char *feature); extern int parse_feature_request(const char *features, const char *feature); extern const char *server_feature_value(const char *feature, int *len_ret); @@@ -1074,9 -1028,6 +1063,9 @@@ extern const char *parse_feature_value( extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); +/* A hook for count-objects to report invalid files in pack directory */ +extern void (*report_garbage)(const char *desc, const char *path); + extern void prepare_packed_git(void); extern void reprepare_packed_git(void); extern void install_packed_git(struct packed_git *pack); @@@ -1174,9 -1125,6 +1163,9 @@@ extern int check_repository_format_vers extern int git_env_bool(const char *, int); extern int git_config_system(void); extern int config_error_nonbool(const char *); +#if defined(__GNUC__) && ! defined(__clang__) +#define config_error_nonbool(s) (config_error_nonbool(s), -1) +#endif extern const char *get_log_output_encoding(void); extern const char *get_commit_output_encoding(void); @@@ -1190,28 -1138,15 +1179,28 @@@ struct config_include_data #define CONFIG_INCLUDE_INIT { 0 } extern int git_config_include(const char *name, const char *value, void *data); -#define IDENT_NAME_GIVEN 01 -#define IDENT_MAIL_GIVEN 02 -#define IDENT_ALL_GIVEN (IDENT_NAME_GIVEN|IDENT_MAIL_GIVEN) -extern int user_ident_explicitly_given; -extern int user_ident_sufficiently_given(void); +/* + * Match and parse a config key of the form: + * + * section.(subsection.)?key + * + * (i.e., what gets handed to a config_fn_t). The caller provides the section; + * we return -1 if it does not match, 0 otherwise. The subsection and key + * out-parameters are filled by the function (and subsection is NULL if it is + * missing). + */ +extern int parse_config_key(const char *var, + const char *section, + const char **subsection, int *subsection_len, + const char **key); + +extern int committer_ident_sufficiently_given(void); +extern int author_ident_sufficiently_given(void); extern const char *git_commit_encoding; extern const char *git_log_output_encoding; extern const char *git_mailmap_file; +extern const char *git_mailmap_blob; /* IO helper functions */ extern void maybe_flush_or_die(FILE *, const char *); @@@ -1237,7 -1172,6 +1226,7 @@@ extern int pager_in_use(void) extern int pager_use_color; extern int term_columns(void); extern int decimal_width(int); +extern int check_pager_config(const char *cmd); extern const char *editor_program; extern const char *askpass_program; @@@ -1320,15 -1254,8 +1309,15 @@@ struct startup_info }; extern struct startup_info *startup_info; -/* builtin/merge.c */ -int checkout_fast_forward(const unsigned char *from, const unsigned char *to); +/* merge.c */ +struct commit_list; +int try_merge_command(const char *strategy, size_t xopts_nr, + const char **xopts, struct commit_list *common, + const char *head_arg, struct commit_list *remotes); +int checkout_fast_forward(const unsigned char *from, + const unsigned char *to, + int overwrite_ignore); + int sane_execvp(const char *file, char *const argv[]); diff --combined name-hash.c index 9bac31a6ab,91241336f8..617c86c537 --- a/name-hash.c +++ b/name-hash.c @@@ -24,46 -24,104 +24,104 @@@ static unsigned int hash_name(const cha { unsigned int hash = 0x123; - do { + while (namelen--) { unsigned char c = *name++; c = icase_hash(c); hash = hash*101 + c; - } while (--namelen); + } return hash; } - static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce) + struct dir_entry { + struct dir_entry *next; + struct dir_entry *parent; + struct cache_entry *ce; + int nr; + unsigned int namelen; + }; + + static struct dir_entry *find_dir_entry(struct index_state *istate, + const char *name, unsigned int namelen) + { + unsigned int hash = hash_name(name, namelen); + struct dir_entry *dir; + + for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) + if (dir->namelen == namelen && + !strncasecmp(dir->ce->name, name, namelen)) + return dir; + return NULL; + } + + static struct dir_entry *hash_dir_entry(struct index_state *istate, + struct cache_entry *ce, int namelen) { /* * Throw each directory component in the hash for quick lookup * during a git status. Directory components are stored with their * closing slash. Despite submodules being a directory, they never * reach this point, because they are stored without a closing slash - * in the cache. + * in index_state.name_hash (as ordinary cache_entries). * - * Note that the cache_entry stored with the directory does not - * represent the directory itself. It is a pointer to an existing - * filename, and its only purpose is to represent existence of the - * directory in the cache. It is very possible multiple directory - * hash entries may point to the same cache_entry. + * Note that the cache_entry stored with the dir_entry merely + * supplies the name of the directory (up to dir_entry.namelen). We + * track the number of 'active' files in a directory in dir_entry.nr, + * so we can tell if the directory is still relevant, e.g. for git + * status. However, if cache_entries are removed, we cannot pinpoint + * an exact cache_entry that's still active. It is very possible that + * multiple dir_entries point to the same cache_entry. */ - unsigned int hash; - void **pos; + struct dir_entry *dir; + + /* get length of parent directory */ + while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) + namelen--; + if (namelen <= 0) + return NULL; + + /* lookup existing entry for that directory */ + dir = find_dir_entry(istate, ce->name, namelen); + if (!dir) { + /* not found, create it and add to hash table */ + void **pdir; + unsigned int hash = hash_name(ce->name, namelen); - const char *ptr = ce->name; - while (*ptr) { - while (*ptr && *ptr != '/') - ++ptr; - if (*ptr == '/') { - ++ptr; - hash = hash_name(ce->name, ptr - ce->name); - pos = insert_hash(hash, ce, &istate->name_hash); - if (pos) { - ce->dir_next = *pos; - *pos = ce; - } + dir = xcalloc(1, sizeof(struct dir_entry)); + dir->namelen = namelen; + dir->ce = ce; + + pdir = insert_hash(hash, dir, &istate->dir_hash); + if (pdir) { + dir->next = *pdir; + *pdir = dir; } + + /* recursively add missing parent directories */ + dir->parent = hash_dir_entry(istate, ce, namelen - 1); } + return dir; + } + + static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) + { + /* Add reference to the directory entry (and parents if 0). */ + struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); + while (dir && !(dir->nr++)) + dir = dir->parent; + } + + static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) + { + /* + * Release reference to the directory entry (and parents if 0). + * + * Note: we do not remove / free the entry because there's no + * hash.[ch]::remove_hash and dir->next may point to other entries + * that are still valid, so we must not free the memory. + */ + struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); + while (dir && dir->nr && !(--dir->nr)) + dir = dir->parent; } static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) @@@ -74,7 -132,7 +132,7 @@@ if (ce->ce_flags & CE_HASHED) return; ce->ce_flags |= CE_HASHED; - ce->next = ce->dir_next = NULL; + ce->next = NULL; hash = hash_name(ce->name, ce_namelen(ce)); pos = insert_hash(hash, ce, &istate->name_hash); if (pos) { @@@ -82,8 -140,8 +140,8 @@@ *pos = ce; } - if (ignore_case) - hash_index_entry_directories(istate, ce); + if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) + add_dir_entry(istate, ce); } static void lazy_init_name_hash(struct index_state *istate) @@@ -92,8 -150,6 +150,8 @@@ if (istate->name_hash_initialized) return; + if (istate->cache_nr) + preallocate_hash(&istate->name_hash, istate->cache_nr); for (nr = 0; nr < istate->cache_nr; nr++) hash_index_entry(istate, istate->cache[nr]); istate->name_hash_initialized = 1; @@@ -101,11 -157,33 +159,33 @@@ void add_name_hash(struct index_state *istate, struct cache_entry *ce) { + /* if already hashed, add reference to directory entries */ + if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) + add_dir_entry(istate, ce); + ce->ce_flags &= ~CE_UNHASHED; if (istate->name_hash_initialized) hash_index_entry(istate, ce); } + /* + * We don't actually *remove* it, we can just mark it invalid so that + * we won't find it in lookups. + * + * Not only would we have to search the lists (simple enough), but + * we'd also have to rehash other hash buckets in case this makes the + * hash bucket empty (common). So it's much better to just mark + * it. + */ + void remove_name_hash(struct index_state *istate, struct cache_entry *ce) + { + /* if already hashed, release reference to directory entries */ + if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) + remove_dir_entry(istate, ce); + + ce->ce_flags |= CE_UNHASHED; + } + static int slow_same_name(const char *name1, int len1, const char *name2, int len2) { if (len1 != len2) @@@ -139,18 -217,7 +219,7 @@@ static int same_name(const struct cache if (!icase) return 0; - /* - * If the entry we're comparing is a filename (no trailing slash), then compare - * the lengths exactly. - */ - if (name[namelen - 1] != '/') - return slow_same_name(name, namelen, ce->name, len); - - /* - * For a directory, we point to an arbitrary cache_entry filename. Just - * make sure the directory portion matches. - */ - return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len); + return slow_same_name(name, namelen, ce->name, len); } struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) @@@ -166,27 -233,54 +235,54 @@@ if (same_name(ce, name, namelen, icase)) return ce; } - if (icase && name[namelen - 1] == '/') - ce = ce->dir_next; - else - ce = ce->next; + ce = ce->next; } /* - * Might be a submodule. Despite submodules being directories, + * When looking for a directory (trailing '/'), it might be a + * submodule or a directory. Despite submodules being directories, * they are stored in the name hash without a closing slash. - * When ignore_case is 1, directories are stored in the name hash - * with their closing slash. + * When ignore_case is 1, directories are stored in a separate hash + * table *with* their closing slash. * * The side effect of this storage technique is we have need to + * lookup the directory in a separate hash table, and if not found * remove the slash from name and perform the lookup again without * the slash. If a match is made, S_ISGITLINK(ce->mode) will be * true. */ if (icase && name[namelen - 1] == '/') { + struct dir_entry *dir = find_dir_entry(istate, name, namelen); + if (dir && dir->nr) + return dir->ce; + ce = index_name_exists(istate, name, namelen - 1, icase); if (ce && S_ISGITLINK(ce->ce_mode)) return ce; } return NULL; } + + static int free_dir_entry(void *entry, void *unused) + { + struct dir_entry *dir = entry; + while (dir) { + struct dir_entry *next = dir->next; + free(dir); + dir = next; + } + return 0; + } + + void free_name_hash(struct index_state *istate) + { + if (!istate->name_hash_initialized) + return; + istate->name_hash_initialized = 0; + if (ignore_case) + /* free directory entries */ + for_each_hash(&istate->dir_hash, free_dir_entry, NULL); + + free_hash(&istate->name_hash); + free_hash(&istate->dir_hash); + } diff --combined read-cache.c index 670a06bc79,ffb425c0ca..5a9704f4e5 --- a/read-cache.c +++ b/read-cache.c @@@ -46,7 -46,7 +46,7 @@@ static void replace_index_entry(struct { struct cache_entry *old = istate->cache[nr]; - remove_name_hash(old); + remove_name_hash(istate, old); set_index_entry(istate, nr, ce); istate->cache_changed = 1; } @@@ -197,25 -197,21 +197,25 @@@ static int ce_match_stat_basic(struct c } if (ce->ce_mtime.sec != (unsigned int)st->st_mtime) changed |= MTIME_CHANGED; - if (trust_ctime && ce->ce_ctime.sec != (unsigned int)st->st_ctime) + if (trust_ctime && check_stat && + ce->ce_ctime.sec != (unsigned int)st->st_ctime) changed |= CTIME_CHANGED; #ifdef USE_NSEC - if (ce->ce_mtime.nsec != ST_MTIME_NSEC(*st)) + if (check_stat && ce->ce_mtime.nsec != ST_MTIME_NSEC(*st)) changed |= MTIME_CHANGED; - if (trust_ctime && ce->ce_ctime.nsec != ST_CTIME_NSEC(*st)) + if (trust_ctime && check_stat && + ce->ce_ctime.nsec != ST_CTIME_NSEC(*st)) changed |= CTIME_CHANGED; #endif - if (ce->ce_uid != (unsigned int) st->st_uid || - ce->ce_gid != (unsigned int) st->st_gid) - changed |= OWNER_CHANGED; - if (ce->ce_ino != (unsigned int) st->st_ino) - changed |= INODE_CHANGED; + if (check_stat) { + if (ce->ce_uid != (unsigned int) st->st_uid || + ce->ce_gid != (unsigned int) st->st_gid) + changed |= OWNER_CHANGED; + if (ce->ce_ino != (unsigned int) st->st_ino) + changed |= INODE_CHANGED; + } #ifdef USE_STDEV /* @@@ -223,8 -219,8 +223,8 @@@ * clients will have different views of what "device" * the filesystem is on */ - if (ce->ce_dev != (unsigned int) st->st_dev) - changed |= INODE_CHANGED; + if (check_stat && ce->ce_dev != (unsigned int) st->st_dev) + changed |= INODE_CHANGED; #endif if (ce->ce_size != (unsigned int) st->st_size) @@@ -460,7 -456,7 +460,7 @@@ int remove_index_entry_at(struct index_ struct cache_entry *ce = istate->cache[pos]; record_resolve_undo(istate, ce); - remove_name_hash(ce); + remove_name_hash(istate, ce); istate->cache_changed = 1; istate->cache_nr--; if (pos >= istate->cache_nr) @@@ -483,7 -479,7 +483,7 @@@ void remove_marked_cache_entries(struc for (i = j = 0; i < istate->cache_nr; i++) { if (ce_array[i]->ce_flags & CE_REMOVE) - remove_name_hash(ce_array[i]); + remove_name_hash(istate, ce_array[i]); else ce_array[j++] = ce_array[i]; } @@@ -1260,7 -1256,7 +1260,7 @@@ static int verify_hdr(struct cache_head if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); hdr_version = ntohl(hdr->hdr_version); - if (hdr_version < 2 || 4 < hdr_version) + if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version) return error("bad index version %d", hdr_version); git_SHA1_Init(&c); git_SHA1_Update(&c, hdr, size - 20); @@@ -1515,8 -1511,7 +1515,7 @@@ int discard_index(struct index_state *i istate->cache_changed = 0; istate->timestamp.sec = 0; istate->timestamp.nsec = 0; - istate->name_hash_initialized = 0; - free_hash(&istate->name_hash); + free_name_hash(istate); cache_tree_free(&(istate->cache_tree)); istate->initialized = 0;