1/* 2 * name-hash.c 3 * 4 * Hashing names in the index state 5 * 6 * Copyright (C) 2008 Linus Torvalds 7 */ 8#define NO_THE_INDEX_COMPATIBILITY_MACROS 9#include "cache.h" 10 11/* 12 * This removes bit 5 if bit 6 is set. 13 * 14 * That will make US-ASCII characters hash to their upper-case 15 * equivalent. We could easily do this one whole word at a time, 16 * but that's for future worries. 17 */ 18static inline unsigned char icase_hash(unsigned char c) 19{ 20 return c & ~((c & 0x40) >> 1); 21} 22 23static unsigned int hash_name(const char *name, int namelen) 24{ 25 unsigned int hash = 0x123; 26 27 while (namelen--) { 28 unsigned char c = *name++; 29 c = icase_hash(c); 30 hash = hash*101 + c; 31 } 32 return hash; 33} 34 35struct dir_entry { 36 struct dir_entry *next; 37 struct dir_entry *parent; 38 struct cache_entry *ce; 39 int nr; 40 unsigned int namelen; 41}; 42 43static struct dir_entry *find_dir_entry(struct index_state *istate, 44 const char *name, unsigned int namelen) 45{ 46 unsigned int hash = hash_name(name, namelen); 47 struct dir_entry *dir; 48 49 for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) 50 if (dir->namelen == namelen && 51 !strncasecmp(dir->ce->name, name, namelen)) 52 return dir; 53 return NULL; 54} 55 56static struct dir_entry *hash_dir_entry(struct index_state *istate, 57 struct cache_entry *ce, int namelen) 58{ 59 /* 60 * Throw each directory component in the hash for quick lookup 61 * during a git status. Directory components are stored with their 62 * closing slash. Despite submodules being a directory, they never 63 * reach this point, because they are stored without a closing slash 64 * in index_state.name_hash (as ordinary cache_entries). 65 * 66 * Note that the cache_entry stored with the dir_entry merely 67 * supplies the name of the directory (up to dir_entry.namelen). We 68 * track the number of 'active' files in a directory in dir_entry.nr, 69 * so we can tell if the directory is still relevant, e.g. for git 70 * status. However, if cache_entries are removed, we cannot pinpoint 71 * an exact cache_entry that's still active. It is very possible that 72 * multiple dir_entries point to the same cache_entry. 73 */ 74 struct dir_entry *dir; 75 76 /* get length of parent directory */ 77 while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) 78 namelen--; 79 if (namelen <= 0) 80 return NULL; 81 82 /* lookup existing entry for that directory */ 83 dir = find_dir_entry(istate, ce->name, namelen); 84 if (!dir) { 85 /* not found, create it and add to hash table */ 86 void **pdir; 87 unsigned int hash = hash_name(ce->name, namelen); 88 89 dir = xcalloc(1, sizeof(struct dir_entry)); 90 dir->namelen = namelen; 91 dir->ce = ce; 92 93 pdir = insert_hash(hash, dir, &istate->dir_hash); 94 if (pdir) { 95 dir->next = *pdir; 96 *pdir = dir; 97 } 98 99 /* recursively add missing parent directories */ 100 dir->parent = hash_dir_entry(istate, ce, namelen - 1); 101 } 102 return dir; 103} 104 105static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) 106{ 107 /* Add reference to the directory entry (and parents if 0). */ 108 struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); 109 while (dir && !(dir->nr++)) 110 dir = dir->parent; 111} 112 113static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) 114{ 115 /* 116 * Release reference to the directory entry (and parents if 0). 117 * 118 * Note: we do not remove / free the entry because there's no 119 * hash.[ch]::remove_hash and dir->next may point to other entries 120 * that are still valid, so we must not free the memory. 121 */ 122 struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); 123 while (dir && dir->nr && !(--dir->nr)) 124 dir = dir->parent; 125} 126 127static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) 128{ 129 void **pos; 130 unsigned int hash; 131 132 if (ce->ce_flags & CE_HASHED) 133 return; 134 ce->ce_flags |= CE_HASHED; 135 ce->next = NULL; 136 hash = hash_name(ce->name, ce_namelen(ce)); 137 pos = insert_hash(hash, ce, &istate->name_hash); 138 if (pos) { 139 ce->next = *pos; 140 *pos = ce; 141 } 142 143 if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) 144 add_dir_entry(istate, ce); 145} 146 147static void lazy_init_name_hash(struct index_state *istate) 148{ 149 int nr; 150 151 if (istate->name_hash_initialized) 152 return; 153 if (istate->cache_nr) 154 preallocate_hash(&istate->name_hash, istate->cache_nr); 155 for (nr = 0; nr < istate->cache_nr; nr++) 156 hash_index_entry(istate, istate->cache[nr]); 157 istate->name_hash_initialized = 1; 158} 159 160void add_name_hash(struct index_state *istate, struct cache_entry *ce) 161{ 162 /* if already hashed, add reference to directory entries */ 163 if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) 164 add_dir_entry(istate, ce); 165 166 ce->ce_flags &= ~CE_UNHASHED; 167 if (istate->name_hash_initialized) 168 hash_index_entry(istate, ce); 169} 170 171/* 172 * We don't actually *remove* it, we can just mark it invalid so that 173 * we won't find it in lookups. 174 * 175 * Not only would we have to search the lists (simple enough), but 176 * we'd also have to rehash other hash buckets in case this makes the 177 * hash bucket empty (common). So it's much better to just mark 178 * it. 179 */ 180void remove_name_hash(struct index_state *istate, struct cache_entry *ce) 181{ 182 /* if already hashed, release reference to directory entries */ 183 if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) 184 remove_dir_entry(istate, ce); 185 186 ce->ce_flags |= CE_UNHASHED; 187} 188 189static int slow_same_name(const char *name1, int len1, const char *name2, int len2) 190{ 191 if (len1 != len2) 192 return 0; 193 194 while (len1) { 195 unsigned char c1 = *name1++; 196 unsigned char c2 = *name2++; 197 len1--; 198 if (c1 != c2) { 199 c1 = toupper(c1); 200 c2 = toupper(c2); 201 if (c1 != c2) 202 return 0; 203 } 204 } 205 return 1; 206} 207 208static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) 209{ 210 int len = ce_namelen(ce); 211 212 /* 213 * Always do exact compare, even if we want a case-ignoring comparison; 214 * we do the quick exact one first, because it will be the common case. 215 */ 216 if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) 217 return 1; 218 219 if (!icase) 220 return 0; 221 222 return slow_same_name(name, namelen, ce->name, len); 223} 224 225struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) 226{ 227 unsigned int hash = hash_name(name, namelen); 228 struct cache_entry *ce; 229 230 lazy_init_name_hash(istate); 231 ce = lookup_hash(hash, &istate->name_hash); 232 233 while (ce) { 234 if (!(ce->ce_flags & CE_UNHASHED)) { 235 if (same_name(ce, name, namelen, icase)) 236 return ce; 237 } 238 ce = ce->next; 239 } 240 241 /* 242 * When looking for a directory (trailing '/'), it might be a 243 * submodule or a directory. Despite submodules being directories, 244 * they are stored in the name hash without a closing slash. 245 * When ignore_case is 1, directories are stored in a separate hash 246 * table *with* their closing slash. 247 * 248 * The side effect of this storage technique is we have need to 249 * lookup the directory in a separate hash table, and if not found 250 * remove the slash from name and perform the lookup again without 251 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be 252 * true. 253 */ 254 if (icase && name[namelen - 1] == '/') { 255 struct dir_entry *dir = find_dir_entry(istate, name, namelen); 256 if (dir && dir->nr) 257 return dir->ce; 258 259 ce = index_name_exists(istate, name, namelen - 1, icase); 260 if (ce && S_ISGITLINK(ce->ce_mode)) 261 return ce; 262 } 263 return NULL; 264} 265 266static int free_dir_entry(void *entry, void *unused) 267{ 268 struct dir_entry *dir = entry; 269 while (dir) { 270 struct dir_entry *next = dir->next; 271 free(dir); 272 dir = next; 273 } 274 return 0; 275} 276 277void free_name_hash(struct index_state *istate) 278{ 279 if (!istate->name_hash_initialized) 280 return; 281 istate->name_hash_initialized = 0; 282 if (ignore_case) 283 /* free directory entries */ 284 for_each_hash(&istate->dir_hash, free_dir_entry, NULL); 285 286 free_hash(&istate->name_hash); 287 free_hash(&istate->dir_hash); 288}