name-hash.con commit pack-objects: refactor the packing list (2834bc2)
   1/*
   2 * name-hash.c
   3 *
   4 * Hashing names in the index state
   5 *
   6 * Copyright (C) 2008 Linus Torvalds
   7 */
   8#define NO_THE_INDEX_COMPATIBILITY_MACROS
   9#include "cache.h"
  10
  11/*
  12 * This removes bit 5 if bit 6 is set.
  13 *
  14 * That will make US-ASCII characters hash to their upper-case
  15 * equivalent. We could easily do this one whole word at a time,
  16 * but that's for future worries.
  17 */
  18static inline unsigned char icase_hash(unsigned char c)
  19{
  20        return c & ~((c & 0x40) >> 1);
  21}
  22
  23static unsigned int hash_name(const char *name, int namelen)
  24{
  25        unsigned int hash = 0x123;
  26
  27        while (namelen--) {
  28                unsigned char c = *name++;
  29                c = icase_hash(c);
  30                hash = hash*101 + c;
  31        }
  32        return hash;
  33}
  34
  35struct dir_entry {
  36        struct dir_entry *next;
  37        struct dir_entry *parent;
  38        struct cache_entry *ce;
  39        int nr;
  40        unsigned int namelen;
  41};
  42
  43static struct dir_entry *find_dir_entry(struct index_state *istate,
  44                const char *name, unsigned int namelen)
  45{
  46        unsigned int hash = hash_name(name, namelen);
  47        struct dir_entry *dir;
  48
  49        for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next)
  50                if (dir->namelen == namelen &&
  51                    !strncasecmp(dir->ce->name, name, namelen))
  52                        return dir;
  53        return NULL;
  54}
  55
  56static struct dir_entry *hash_dir_entry(struct index_state *istate,
  57                struct cache_entry *ce, int namelen)
  58{
  59        /*
  60         * Throw each directory component in the hash for quick lookup
  61         * during a git status. Directory components are stored without their
  62         * closing slash.  Despite submodules being a directory, they never
  63         * reach this point, because they are stored
  64         * in index_state.name_hash (as ordinary cache_entries).
  65         *
  66         * Note that the cache_entry stored with the dir_entry merely
  67         * supplies the name of the directory (up to dir_entry.namelen). We
  68         * track the number of 'active' files in a directory in dir_entry.nr,
  69         * so we can tell if the directory is still relevant, e.g. for git
  70         * status. However, if cache_entries are removed, we cannot pinpoint
  71         * an exact cache_entry that's still active. It is very possible that
  72         * multiple dir_entries point to the same cache_entry.
  73         */
  74        struct dir_entry *dir;
  75
  76        /* get length of parent directory */
  77        while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
  78                namelen--;
  79        if (namelen <= 0)
  80                return NULL;
  81        namelen--;
  82
  83        /* lookup existing entry for that directory */
  84        dir = find_dir_entry(istate, ce->name, namelen);
  85        if (!dir) {
  86                /* not found, create it and add to hash table */
  87                void **pdir;
  88                unsigned int hash = hash_name(ce->name, namelen);
  89
  90                dir = xcalloc(1, sizeof(struct dir_entry));
  91                dir->namelen = namelen;
  92                dir->ce = ce;
  93
  94                pdir = insert_hash(hash, dir, &istate->dir_hash);
  95                if (pdir) {
  96                        dir->next = *pdir;
  97                        *pdir = dir;
  98                }
  99
 100                /* recursively add missing parent directories */
 101                dir->parent = hash_dir_entry(istate, ce, namelen);
 102        }
 103        return dir;
 104}
 105
 106static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
 107{
 108        /* Add reference to the directory entry (and parents if 0). */
 109        struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
 110        while (dir && !(dir->nr++))
 111                dir = dir->parent;
 112}
 113
 114static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
 115{
 116        /*
 117         * Release reference to the directory entry (and parents if 0).
 118         *
 119         * Note: we do not remove / free the entry because there's no
 120         * hash.[ch]::remove_hash and dir->next may point to other entries
 121         * that are still valid, so we must not free the memory.
 122         */
 123        struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
 124        while (dir && dir->nr && !(--dir->nr))
 125                dir = dir->parent;
 126}
 127
 128static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 129{
 130        void **pos;
 131        unsigned int hash;
 132
 133        if (ce->ce_flags & CE_HASHED)
 134                return;
 135        ce->ce_flags |= CE_HASHED;
 136        ce->next = NULL;
 137        hash = hash_name(ce->name, ce_namelen(ce));
 138        pos = insert_hash(hash, ce, &istate->name_hash);
 139        if (pos) {
 140                ce->next = *pos;
 141                *pos = ce;
 142        }
 143
 144        if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
 145                add_dir_entry(istate, ce);
 146}
 147
 148static void lazy_init_name_hash(struct index_state *istate)
 149{
 150        int nr;
 151
 152        if (istate->name_hash_initialized)
 153                return;
 154        if (istate->cache_nr)
 155                preallocate_hash(&istate->name_hash, istate->cache_nr);
 156        for (nr = 0; nr < istate->cache_nr; nr++)
 157                hash_index_entry(istate, istate->cache[nr]);
 158        istate->name_hash_initialized = 1;
 159}
 160
 161void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 162{
 163        /* if already hashed, add reference to directory entries */
 164        if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
 165                add_dir_entry(istate, ce);
 166
 167        ce->ce_flags &= ~CE_UNHASHED;
 168        if (istate->name_hash_initialized)
 169                hash_index_entry(istate, ce);
 170}
 171
 172/*
 173 * We don't actually *remove* it, we can just mark it invalid so that
 174 * we won't find it in lookups.
 175 *
 176 * Not only would we have to search the lists (simple enough), but
 177 * we'd also have to rehash other hash buckets in case this makes the
 178 * hash bucket empty (common). So it's much better to just mark
 179 * it.
 180 */
 181void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 182{
 183        /* if already hashed, release reference to directory entries */
 184        if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
 185                remove_dir_entry(istate, ce);
 186
 187        ce->ce_flags |= CE_UNHASHED;
 188}
 189
 190static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
 191{
 192        if (len1 != len2)
 193                return 0;
 194
 195        while (len1) {
 196                unsigned char c1 = *name1++;
 197                unsigned char c2 = *name2++;
 198                len1--;
 199                if (c1 != c2) {
 200                        c1 = toupper(c1);
 201                        c2 = toupper(c2);
 202                        if (c1 != c2)
 203                                return 0;
 204                }
 205        }
 206        return 1;
 207}
 208
 209static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
 210{
 211        int len = ce_namelen(ce);
 212
 213        /*
 214         * Always do exact compare, even if we want a case-ignoring comparison;
 215         * we do the quick exact one first, because it will be the common case.
 216         */
 217        if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
 218                return 1;
 219
 220        if (!icase)
 221                return 0;
 222
 223        return slow_same_name(name, namelen, ce->name, len);
 224}
 225
 226struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen)
 227{
 228        struct cache_entry *ce;
 229        struct dir_entry *dir;
 230
 231        lazy_init_name_hash(istate);
 232        dir = find_dir_entry(istate, name, namelen);
 233        if (dir && dir->nr)
 234                return dir->ce;
 235
 236        /*
 237         * It might be a submodule. Unlike plain directories, which are stored
 238         * in the dir-hash, submodules are stored in the name-hash, so check
 239         * there, as well.
 240         */
 241        ce = index_file_exists(istate, name, namelen, 1);
 242        if (ce && S_ISGITLINK(ce->ce_mode))
 243                return ce;
 244
 245        return NULL;
 246}
 247
 248struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase)
 249{
 250        unsigned int hash = hash_name(name, namelen);
 251        struct cache_entry *ce;
 252
 253        lazy_init_name_hash(istate);
 254        ce = lookup_hash(hash, &istate->name_hash);
 255
 256        while (ce) {
 257                if (!(ce->ce_flags & CE_UNHASHED)) {
 258                        if (same_name(ce, name, namelen, icase))
 259                                return ce;
 260                }
 261                ce = ce->next;
 262        }
 263        return NULL;
 264}
 265
 266struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
 267{
 268        if (namelen > 0 && name[namelen - 1] == '/')
 269                return index_dir_exists(istate, name, namelen - 1);
 270        return index_file_exists(istate, name, namelen, icase);
 271}
 272
 273static int free_dir_entry(void *entry, void *unused)
 274{
 275        struct dir_entry *dir = entry;
 276        while (dir) {
 277                struct dir_entry *next = dir->next;
 278                free(dir);
 279                dir = next;
 280        }
 281        return 0;
 282}
 283
 284void free_name_hash(struct index_state *istate)
 285{
 286        if (!istate->name_hash_initialized)
 287                return;
 288        istate->name_hash_initialized = 0;
 289        if (ignore_case)
 290                /* free directory entries */
 291                for_each_hash(&istate->dir_hash, free_dir_entry, NULL);
 292
 293        free_hash(&istate->name_hash);
 294        free_hash(&istate->dir_hash);
 295}