c75fadf33b88689b4b35be5b1ae65d1317deec0d
   1/*
   2 * name-hash.c
   3 *
   4 * Hashing names in the index state
   5 *
   6 * Copyright (C) 2008 Linus Torvalds
   7 */
   8#define NO_THE_INDEX_COMPATIBILITY_MACROS
   9#include "cache.h"
  10
  11struct dir_entry {
  12        struct hashmap_entry ent;
  13        struct dir_entry *parent;
  14        struct cache_entry *ce;
  15        int nr;
  16        unsigned int namelen;
  17};
  18
  19static int dir_entry_cmp(const struct dir_entry *e1,
  20                const struct dir_entry *e2, const char *name)
  21{
  22        return e1->namelen != e2->namelen || strncasecmp(e1->ce->name,
  23                        name ? name : e2->ce->name, e1->namelen);
  24}
  25
  26static struct dir_entry *find_dir_entry(struct index_state *istate,
  27                const char *name, unsigned int namelen)
  28{
  29        struct dir_entry key;
  30        hashmap_entry_init(&key, memihash(name, namelen));
  31        key.namelen = namelen;
  32        return hashmap_get(&istate->dir_hash, &key, name);
  33}
  34
  35static struct dir_entry *hash_dir_entry(struct index_state *istate,
  36                struct cache_entry *ce, int namelen)
  37{
  38        /*
  39         * Throw each directory component in the hash for quick lookup
  40         * during a git status. Directory components are stored without their
  41         * closing slash.  Despite submodules being a directory, they never
  42         * reach this point, because they are stored
  43         * in index_state.name_hash (as ordinary cache_entries).
  44         *
  45         * Note that the cache_entry stored with the dir_entry merely
  46         * supplies the name of the directory (up to dir_entry.namelen). We
  47         * track the number of 'active' files in a directory in dir_entry.nr,
  48         * so we can tell if the directory is still relevant, e.g. for git
  49         * status. However, if cache_entries are removed, we cannot pinpoint
  50         * an exact cache_entry that's still active. It is very possible that
  51         * multiple dir_entries point to the same cache_entry.
  52         */
  53        struct dir_entry *dir;
  54
  55        /* get length of parent directory */
  56        while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
  57                namelen--;
  58        if (namelen <= 0)
  59                return NULL;
  60        namelen--;
  61
  62        /* lookup existing entry for that directory */
  63        dir = find_dir_entry(istate, ce->name, namelen);
  64        if (!dir) {
  65                /* not found, create it and add to hash table */
  66                dir = xcalloc(1, sizeof(struct dir_entry));
  67                hashmap_entry_init(dir, memihash(ce->name, namelen));
  68                dir->namelen = namelen;
  69                dir->ce = ce;
  70                hashmap_add(&istate->dir_hash, dir);
  71
  72                /* recursively add missing parent directories */
  73                dir->parent = hash_dir_entry(istate, ce, namelen);
  74        }
  75        return dir;
  76}
  77
  78static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
  79{
  80        /* Add reference to the directory entry (and parents if 0). */
  81        struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
  82        while (dir && !(dir->nr++))
  83                dir = dir->parent;
  84}
  85
  86static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
  87{
  88        /*
  89         * Release reference to the directory entry (and parents if 0).
  90         *
  91         * Note: we do not remove / free the entry because there's no
  92         * hash.[ch]::remove_hash and dir->next may point to other entries
  93         * that are still valid, so we must not free the memory.
  94         */
  95        struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
  96        while (dir && dir->nr && !(--dir->nr))
  97                dir = dir->parent;
  98}
  99
 100static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 101{
 102        void **pos;
 103        unsigned int hash;
 104
 105        if (ce->ce_flags & CE_HASHED)
 106                return;
 107        ce->ce_flags |= CE_HASHED;
 108        ce->next = NULL;
 109        hash = memihash(ce->name, ce_namelen(ce));
 110        pos = insert_hash(hash, ce, &istate->name_hash);
 111        if (pos) {
 112                ce->next = *pos;
 113                *pos = ce;
 114        }
 115
 116        if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
 117                add_dir_entry(istate, ce);
 118}
 119
 120static void lazy_init_name_hash(struct index_state *istate)
 121{
 122        int nr;
 123
 124        if (istate->name_hash_initialized)
 125                return;
 126        if (istate->cache_nr)
 127                preallocate_hash(&istate->name_hash, istate->cache_nr);
 128        hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
 129        for (nr = 0; nr < istate->cache_nr; nr++)
 130                hash_index_entry(istate, istate->cache[nr]);
 131        istate->name_hash_initialized = 1;
 132}
 133
 134void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 135{
 136        /* if already hashed, add reference to directory entries */
 137        if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
 138                add_dir_entry(istate, ce);
 139
 140        ce->ce_flags &= ~CE_UNHASHED;
 141        if (istate->name_hash_initialized)
 142                hash_index_entry(istate, ce);
 143}
 144
 145/*
 146 * We don't actually *remove* it, we can just mark it invalid so that
 147 * we won't find it in lookups.
 148 *
 149 * Not only would we have to search the lists (simple enough), but
 150 * we'd also have to rehash other hash buckets in case this makes the
 151 * hash bucket empty (common). So it's much better to just mark
 152 * it.
 153 */
 154void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 155{
 156        /* if already hashed, release reference to directory entries */
 157        if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
 158                remove_dir_entry(istate, ce);
 159
 160        ce->ce_flags |= CE_UNHASHED;
 161}
 162
 163static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
 164{
 165        if (len1 != len2)
 166                return 0;
 167
 168        while (len1) {
 169                unsigned char c1 = *name1++;
 170                unsigned char c2 = *name2++;
 171                len1--;
 172                if (c1 != c2) {
 173                        c1 = toupper(c1);
 174                        c2 = toupper(c2);
 175                        if (c1 != c2)
 176                                return 0;
 177                }
 178        }
 179        return 1;
 180}
 181
 182static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
 183{
 184        int len = ce_namelen(ce);
 185
 186        /*
 187         * Always do exact compare, even if we want a case-ignoring comparison;
 188         * we do the quick exact one first, because it will be the common case.
 189         */
 190        if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
 191                return 1;
 192
 193        if (!icase)
 194                return 0;
 195
 196        return slow_same_name(name, namelen, ce->name, len);
 197}
 198
 199struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen)
 200{
 201        struct cache_entry *ce;
 202        struct dir_entry *dir;
 203
 204        lazy_init_name_hash(istate);
 205        dir = find_dir_entry(istate, name, namelen);
 206        if (dir && dir->nr)
 207                return dir->ce;
 208
 209        /*
 210         * It might be a submodule. Unlike plain directories, which are stored
 211         * in the dir-hash, submodules are stored in the name-hash, so check
 212         * there, as well.
 213         */
 214        ce = index_file_exists(istate, name, namelen, 1);
 215        if (ce && S_ISGITLINK(ce->ce_mode))
 216                return ce;
 217
 218        return NULL;
 219}
 220
 221struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase)
 222{
 223        unsigned int hash = memihash(name, namelen);
 224        struct cache_entry *ce;
 225
 226        lazy_init_name_hash(istate);
 227        ce = lookup_hash(hash, &istate->name_hash);
 228
 229        while (ce) {
 230                if (!(ce->ce_flags & CE_UNHASHED)) {
 231                        if (same_name(ce, name, namelen, icase))
 232                                return ce;
 233                }
 234                ce = ce->next;
 235        }
 236        return NULL;
 237}
 238
 239struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
 240{
 241        if (namelen > 0 && name[namelen - 1] == '/')
 242                return index_dir_exists(istate, name, namelen - 1);
 243        return index_file_exists(istate, name, namelen, icase);
 244}
 245
 246void free_name_hash(struct index_state *istate)
 247{
 248        if (!istate->name_hash_initialized)
 249                return;
 250        istate->name_hash_initialized = 0;
 251
 252        free_hash(&istate->name_hash);
 253        hashmap_free(&istate->dir_hash, 1);
 254}