1/* 2 * name-hash.c 3 * 4 * Hashing names in the index state 5 * 6 * Copyright (C) 2008 Linus Torvalds 7 */ 8#define NO_THE_INDEX_COMPATIBILITY_MACROS 9#include"cache.h" 10 11/* 12 * This removes bit 5 if bit 6 is set. 13 * 14 * That will make US-ASCII characters hash to their upper-case 15 * equivalent. We could easily do this one whole word at a time, 16 * but that's for future worries. 17 */ 18staticinlineunsigned charicase_hash(unsigned char c) 19{ 20return c & ~((c &0x40) >>1); 21} 22 23static unsigned inthash_name(const char*name,int namelen) 24{ 25unsigned int hash =0x123; 26 27while(namelen--) { 28unsigned char c = *name++; 29 c =icase_hash(c); 30 hash = hash*101+ c; 31} 32return hash; 33} 34 35struct dir_entry { 36struct dir_entry *next; 37struct dir_entry *parent; 38struct cache_entry *ce; 39int nr; 40unsigned int namelen; 41}; 42 43static struct dir_entry *find_dir_entry(struct index_state *istate, 44const char*name,unsigned int namelen) 45{ 46unsigned int hash =hash_name(name, namelen); 47struct dir_entry *dir; 48 49for(dir =lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) 50if(dir->namelen == namelen && 51!strncasecmp(dir->ce->name, name, namelen)) 52return dir; 53return NULL; 54} 55 56static struct dir_entry *hash_dir_entry(struct index_state *istate, 57struct cache_entry *ce,int namelen) 58{ 59/* 60 * Throw each directory component in the hash for quick lookup 61 * during a git status. Directory components are stored with their 62 * closing slash. Despite submodules being a directory, they never 63 * reach this point, because they are stored without a closing slash 64 * in index_state.name_hash (as ordinary cache_entries). 65 * 66 * Note that the cache_entry stored with the dir_entry merely 67 * supplies the name of the directory (up to dir_entry.namelen). We 68 * track the number of 'active' files in a directory in dir_entry.nr, 69 * so we can tell if the directory is still relevant, e.g. for git 70 * status. However, if cache_entries are removed, we cannot pinpoint 71 * an exact cache_entry that's still active. It is very possible that 72 * multiple dir_entries point to the same cache_entry. 73 */ 74struct dir_entry *dir; 75 76/* get length of parent directory */ 77while(namelen >0&& !is_dir_sep(ce->name[namelen -1])) 78 namelen--; 79if(namelen <=0) 80return NULL; 81 82/* lookup existing entry for that directory */ 83 dir =find_dir_entry(istate, ce->name, namelen); 84if(!dir) { 85/* not found, create it and add to hash table */ 86void**pdir; 87unsigned int hash =hash_name(ce->name, namelen); 88 89 dir =xcalloc(1,sizeof(struct dir_entry)); 90 dir->namelen = namelen; 91 dir->ce = ce; 92 93 pdir =insert_hash(hash, dir, &istate->dir_hash); 94if(pdir) { 95 dir->next = *pdir; 96*pdir = dir; 97} 98 99/* recursively add missing parent directories */ 100 dir->parent =hash_dir_entry(istate, ce, namelen -1); 101} 102return dir; 103} 104 105static voidadd_dir_entry(struct index_state *istate,struct cache_entry *ce) 106{ 107/* Add reference to the directory entry (and parents if 0). */ 108struct dir_entry *dir =hash_dir_entry(istate, ce,ce_namelen(ce)); 109while(dir && !(dir->nr++)) 110 dir = dir->parent; 111} 112 113static voidremove_dir_entry(struct index_state *istate,struct cache_entry *ce) 114{ 115/* 116 * Release reference to the directory entry (and parents if 0). 117 * 118 * Note: we do not remove / free the entry because there's no 119 * hash.[ch]::remove_hash and dir->next may point to other entries 120 * that are still valid, so we must not free the memory. 121 */ 122struct dir_entry *dir =hash_dir_entry(istate, ce,ce_namelen(ce)); 123while(dir && dir->nr && !(--dir->nr)) 124 dir = dir->parent; 125} 126 127static voidhash_index_entry(struct index_state *istate,struct cache_entry *ce) 128{ 129void**pos; 130unsigned int hash; 131 132if(ce->ce_flags & CE_HASHED) 133return; 134 ce->ce_flags |= CE_HASHED; 135 ce->next = NULL; 136 hash =hash_name(ce->name,ce_namelen(ce)); 137 pos =insert_hash(hash, ce, &istate->name_hash); 138if(pos) { 139 ce->next = *pos; 140*pos = ce; 141} 142 143if(ignore_case && !(ce->ce_flags & CE_UNHASHED)) 144add_dir_entry(istate, ce); 145} 146 147static voidlazy_init_name_hash(struct index_state *istate) 148{ 149int nr; 150 151if(istate->name_hash_initialized) 152return; 153if(istate->cache_nr) 154preallocate_hash(&istate->name_hash, istate->cache_nr); 155for(nr =0; nr < istate->cache_nr; nr++) 156hash_index_entry(istate, istate->cache[nr]); 157 istate->name_hash_initialized =1; 158} 159 160voidadd_name_hash(struct index_state *istate,struct cache_entry *ce) 161{ 162/* if already hashed, add reference to directory entries */ 163if(ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) 164add_dir_entry(istate, ce); 165 166 ce->ce_flags &= ~CE_UNHASHED; 167if(istate->name_hash_initialized) 168hash_index_entry(istate, ce); 169} 170 171/* 172 * We don't actually *remove* it, we can just mark it invalid so that 173 * we won't find it in lookups. 174 * 175 * Not only would we have to search the lists (simple enough), but 176 * we'd also have to rehash other hash buckets in case this makes the 177 * hash bucket empty (common). So it's much better to just mark 178 * it. 179 */ 180voidremove_name_hash(struct index_state *istate,struct cache_entry *ce) 181{ 182/* if already hashed, release reference to directory entries */ 183if(ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) 184remove_dir_entry(istate, ce); 185 186 ce->ce_flags |= CE_UNHASHED; 187} 188 189static intslow_same_name(const char*name1,int len1,const char*name2,int len2) 190{ 191if(len1 != len2) 192return0; 193 194while(len1) { 195unsigned char c1 = *name1++; 196unsigned char c2 = *name2++; 197 len1--; 198if(c1 != c2) { 199 c1 =toupper(c1); 200 c2 =toupper(c2); 201if(c1 != c2) 202return0; 203} 204} 205return1; 206} 207 208static intsame_name(const struct cache_entry *ce,const char*name,int namelen,int icase) 209{ 210int len =ce_namelen(ce); 211 212/* 213 * Always do exact compare, even if we want a case-ignoring comparison; 214 * we do the quick exact one first, because it will be the common case. 215 */ 216if(len == namelen && !cache_name_compare(name, namelen, ce->name, len)) 217return1; 218 219if(!icase) 220return0; 221 222returnslow_same_name(name, namelen, ce->name, len); 223} 224 225struct cache_entry *index_name_exists(struct index_state *istate,const char*name,int namelen,int icase) 226{ 227unsigned int hash =hash_name(name, namelen); 228struct cache_entry *ce; 229 230lazy_init_name_hash(istate); 231 ce =lookup_hash(hash, &istate->name_hash); 232 233while(ce) { 234if(!(ce->ce_flags & CE_UNHASHED)) { 235if(same_name(ce, name, namelen, icase)) 236return ce; 237} 238 ce = ce->next; 239} 240 241/* 242 * When looking for a directory (trailing '/'), it might be a 243 * submodule or a directory. Despite submodules being directories, 244 * they are stored in the name hash without a closing slash. 245 * When ignore_case is 1, directories are stored in a separate hash 246 * table *with* their closing slash. 247 * 248 * The side effect of this storage technique is we have need to 249 * lookup the directory in a separate hash table, and if not found 250 * remove the slash from name and perform the lookup again without 251 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be 252 * true. 253 */ 254if(icase && name[namelen -1] =='/') { 255struct dir_entry *dir =find_dir_entry(istate, name, namelen); 256if(dir && dir->nr) 257return dir->ce; 258 259 ce =index_name_exists(istate, name, namelen -1, icase); 260if(ce &&S_ISGITLINK(ce->ce_mode)) 261return ce; 262} 263return NULL; 264} 265 266static intfree_dir_entry(void*entry,void*unused) 267{ 268struct dir_entry *dir = entry; 269while(dir) { 270struct dir_entry *next = dir->next; 271free(dir); 272 dir = next; 273} 274return0; 275} 276 277voidfree_name_hash(struct index_state *istate) 278{ 279if(!istate->name_hash_initialized) 280return; 281 istate->name_hash_initialized =0; 282if(ignore_case) 283/* free directory entries */ 284for_each_hash(&istate->dir_hash, free_dir_entry, NULL); 285 286free_hash(&istate->name_hash); 287free_hash(&istate->dir_hash); 288}