1/* 2 * name-hash.c 3 * 4 * Hashing names in the index state 5 * 6 * Copyright (C) 2008 Linus Torvalds 7 */ 8#define NO_THE_INDEX_COMPATIBILITY_MACROS 9#include"cache.h" 10 11/* 12 * This removes bit 5 if bit 6 is set. 13 * 14 * That will make US-ASCII characters hash to their upper-case 15 * equivalent. We could easily do this one whole word at a time, 16 * but that's for future worries. 17 */ 18staticinlineunsigned charicase_hash(unsigned char c) 19{ 20return c & ~((c &0x40) >>1); 21} 22 23static unsigned inthash_name(const char*name,int namelen) 24{ 25unsigned int hash =0x123; 26 27do{ 28unsigned char c = *name++; 29 c =icase_hash(c); 30 hash = hash*101+ c; 31}while(--namelen); 32return hash; 33} 34 35static voidhash_index_entry_directories(struct index_state *istate,struct cache_entry *ce) 36{ 37/* 38 * Throw each directory component in the hash for quick lookup 39 * during a git status. Directory components are stored with their 40 * closing slash. Despite submodules being a directory, they never 41 * reach this point, because they are stored without a closing slash 42 * in the cache. 43 * 44 * Note that the cache_entry stored with the directory does not 45 * represent the directory itself. It is a pointer to an existing 46 * filename, and its only purpose is to represent existence of the 47 * directory in the cache. It is very possible multiple directory 48 * hash entries may point to the same cache_entry. 49 */ 50unsigned int hash; 51void**pos; 52 53const char*ptr = ce->name; 54while(*ptr) { 55while(*ptr && *ptr !='/') 56++ptr; 57if(*ptr =='/') { 58++ptr; 59 hash =hash_name(ce->name, ptr - ce->name); 60if(!lookup_hash(hash, &istate->name_hash)) { 61 pos =insert_hash(hash, ce, &istate->name_hash); 62if(pos) { 63 ce->next = *pos; 64*pos = ce; 65} 66} 67} 68} 69} 70 71static voidhash_index_entry(struct index_state *istate,struct cache_entry *ce) 72{ 73void**pos; 74unsigned int hash; 75 76if(ce->ce_flags & CE_HASHED) 77return; 78 ce->ce_flags |= CE_HASHED; 79 ce->next = NULL; 80 hash =hash_name(ce->name,ce_namelen(ce)); 81 pos =insert_hash(hash, ce, &istate->name_hash); 82if(pos) { 83 ce->next = *pos; 84*pos = ce; 85} 86 87if(ignore_case) 88hash_index_entry_directories(istate, ce); 89} 90 91static voidlazy_init_name_hash(struct index_state *istate) 92{ 93int nr; 94 95if(istate->name_hash_initialized) 96return; 97for(nr =0; nr < istate->cache_nr; nr++) 98hash_index_entry(istate, istate->cache[nr]); 99 istate->name_hash_initialized =1; 100} 101 102voidadd_name_hash(struct index_state *istate,struct cache_entry *ce) 103{ 104 ce->ce_flags &= ~CE_UNHASHED; 105if(istate->name_hash_initialized) 106hash_index_entry(istate, ce); 107} 108 109static intslow_same_name(const char*name1,int len1,const char*name2,int len2) 110{ 111if(len1 != len2) 112return0; 113 114while(len1) { 115unsigned char c1 = *name1++; 116unsigned char c2 = *name2++; 117 len1--; 118if(c1 != c2) { 119 c1 =toupper(c1); 120 c2 =toupper(c2); 121if(c1 != c2) 122return0; 123} 124} 125return1; 126} 127 128static intsame_name(const struct cache_entry *ce,const char*name,int namelen,int icase) 129{ 130int len =ce_namelen(ce); 131 132/* 133 * Always do exact compare, even if we want a case-ignoring comparison; 134 * we do the quick exact one first, because it will be the common case. 135 */ 136if(len == namelen && !cache_name_compare(name, namelen, ce->name, len)) 137return1; 138 139if(!icase) 140return0; 141 142/* 143 * If the entry we're comparing is a filename (no trailing slash), then compare 144 * the lengths exactly. 145 */ 146if(name[namelen -1] !='/') 147returnslow_same_name(name, namelen, ce->name, len); 148 149/* 150 * For a directory, we point to an arbitrary cache_entry filename. Just 151 * make sure the directory portion matches. 152 */ 153returnslow_same_name(name, namelen, ce->name, namelen < len ? namelen : len); 154} 155 156struct cache_entry *index_name_exists(struct index_state *istate,const char*name,int namelen,int icase) 157{ 158unsigned int hash =hash_name(name, namelen); 159struct cache_entry *ce; 160 161lazy_init_name_hash(istate); 162 ce =lookup_hash(hash, &istate->name_hash); 163 164while(ce) { 165if(!(ce->ce_flags & CE_UNHASHED)) { 166if(same_name(ce, name, namelen, icase)) 167return ce; 168} 169 ce = ce->next; 170} 171 172/* 173 * Might be a submodule. Despite submodules being directories, 174 * they are stored in the name hash without a closing slash. 175 * When ignore_case is 1, directories are stored in the name hash 176 * with their closing slash. 177 * 178 * The side effect of this storage technique is we have need to 179 * remove the slash from name and perform the lookup again without 180 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be 181 * true. 182 */ 183if(icase && name[namelen -1] =='/') { 184 ce =index_name_exists(istate, name, namelen -1, icase); 185if(ce &&S_ISGITLINK(ce->ce_mode)) 186return ce; 187} 188return NULL; 189}