1/* 2 * name-hash.c 3 * 4 * Hashing names in the index state 5 * 6 * Copyright (C) 2008 Linus Torvalds 7 */ 8#define NO_THE_INDEX_COMPATIBILITY_MACROS 9#include "cache.h" 10 11/* 12 * This removes bit 5 if bit 6 is set. 13 * 14 * That will make US-ASCII characters hash to their upper-case 15 * equivalent. We could easily do this one whole word at a time, 16 * but that's for future worries. 17 */ 18static inline unsigned char icase_hash(unsigned char c) 19{ 20 return c & ~((c & 0x40) >> 1); 21} 22 23static unsigned int hash_name(const char *name, int namelen) 24{ 25 unsigned int hash = 0x123; 26 27 do { 28 unsigned char c = *name++; 29 c = icase_hash(c); 30 hash = hash*101 + c; 31 } while (--namelen); 32 return hash; 33} 34 35static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce) 36{ 37 /* 38 * Throw each directory component in the hash for quick lookup 39 * during a git status. Directory components are stored with their 40 * closing slash. Despite submodules being a directory, they never 41 * reach this point, because they are stored without a closing slash 42 * in the cache. 43 * 44 * Note that the cache_entry stored with the directory does not 45 * represent the directory itself. It is a pointer to an existing 46 * filename, and its only purpose is to represent existence of the 47 * directory in the cache. It is very possible multiple directory 48 * hash entries may point to the same cache_entry. 49 */ 50 unsigned int hash; 51 void **pos; 52 53 const char *ptr = ce->name; 54 while (*ptr) { 55 while (*ptr && *ptr != '/') 56 ++ptr; 57 if (*ptr == '/') { 58 ++ptr; 59 hash = hash_name(ce->name, ptr - ce->name); 60 pos = insert_hash(hash, ce, &istate->name_hash); 61 if (pos) { 62 ce->dir_next = *pos; 63 *pos = ce; 64 } 65 } 66 } 67} 68 69static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) 70{ 71 void **pos; 72 unsigned int hash; 73 74 if (ce->ce_flags & CE_HASHED) 75 return; 76 ce->ce_flags |= CE_HASHED; 77 ce->next = ce->dir_next = NULL; 78 hash = hash_name(ce->name, ce_namelen(ce)); 79 pos = insert_hash(hash, ce, &istate->name_hash); 80 if (pos) { 81 ce->next = *pos; 82 *pos = ce; 83 } 84 85 if (ignore_case) 86 hash_index_entry_directories(istate, ce); 87} 88 89static void lazy_init_name_hash(struct index_state *istate) 90{ 91 int nr; 92 93 if (istate->name_hash_initialized) 94 return; 95 for (nr = 0; nr < istate->cache_nr; nr++) 96 hash_index_entry(istate, istate->cache[nr]); 97 istate->name_hash_initialized = 1; 98} 99 100void add_name_hash(struct index_state *istate, struct cache_entry *ce) 101{ 102 ce->ce_flags &= ~CE_UNHASHED; 103 if (istate->name_hash_initialized) 104 hash_index_entry(istate, ce); 105} 106 107static int slow_same_name(const char *name1, int len1, const char *name2, int len2) 108{ 109 if (len1 != len2) 110 return 0; 111 112 while (len1) { 113 unsigned char c1 = *name1++; 114 unsigned char c2 = *name2++; 115 len1--; 116 if (c1 != c2) { 117 c1 = toupper(c1); 118 c2 = toupper(c2); 119 if (c1 != c2) 120 return 0; 121 } 122 } 123 return 1; 124} 125 126static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) 127{ 128 int len = ce_namelen(ce); 129 130 /* 131 * Always do exact compare, even if we want a case-ignoring comparison; 132 * we do the quick exact one first, because it will be the common case. 133 */ 134 if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) 135 return 1; 136 137 if (!icase) 138 return 0; 139 140 /* 141 * If the entry we're comparing is a filename (no trailing slash), then compare 142 * the lengths exactly. 143 */ 144 if (name[namelen - 1] != '/') 145 return slow_same_name(name, namelen, ce->name, len); 146 147 /* 148 * For a directory, we point to an arbitrary cache_entry filename. Just 149 * make sure the directory portion matches. 150 */ 151 return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len); 152} 153 154struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) 155{ 156 unsigned int hash = hash_name(name, namelen); 157 struct cache_entry *ce; 158 159 lazy_init_name_hash(istate); 160 ce = lookup_hash(hash, &istate->name_hash); 161 162 while (ce) { 163 if (!(ce->ce_flags & CE_UNHASHED)) { 164 if (same_name(ce, name, namelen, icase)) 165 return ce; 166 } 167 if (icase && name[namelen - 1] == '/') 168 ce = ce->dir_next; 169 else 170 ce = ce->next; 171 } 172 173 /* 174 * Might be a submodule. Despite submodules being directories, 175 * they are stored in the name hash without a closing slash. 176 * When ignore_case is 1, directories are stored in the name hash 177 * with their closing slash. 178 * 179 * The side effect of this storage technique is we have need to 180 * remove the slash from name and perform the lookup again without 181 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be 182 * true. 183 */ 184 if (icase && name[namelen - 1] == '/') { 185 ce = index_name_exists(istate, name, namelen - 1, icase); 186 if (ce && S_ISGITLINK(ce->ce_mode)) 187 return ce; 188 } 189 return NULL; 190}