name-hash.con commit Merge branch 'jh/dirstat' (d98a509)
   1/*
   2 * name-hash.c
   3 *
   4 * Hashing names in the index state
   5 *
   6 * Copyright (C) 2008 Linus Torvalds
   7 */
   8#define NO_THE_INDEX_COMPATIBILITY_MACROS
   9#include "cache.h"
  10
  11/*
  12 * This removes bit 5 if bit 6 is set.
  13 *
  14 * That will make US-ASCII characters hash to their upper-case
  15 * equivalent. We could easily do this one whole word at a time,
  16 * but that's for future worries.
  17 */
  18static inline unsigned char icase_hash(unsigned char c)
  19{
  20        return c & ~((c & 0x40) >> 1);
  21}
  22
  23static unsigned int hash_name(const char *name, int namelen)
  24{
  25        unsigned int hash = 0x123;
  26
  27        do {
  28                unsigned char c = *name++;
  29                c = icase_hash(c);
  30                hash = hash*101 + c;
  31        } while (--namelen);
  32        return hash;
  33}
  34
  35static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce)
  36{
  37        /*
  38         * Throw each directory component in the hash for quick lookup
  39         * during a git status. Directory components are stored with their
  40         * closing slash.  Despite submodules being a directory, they never
  41         * reach this point, because they are stored without a closing slash
  42         * in the cache.
  43         *
  44         * Note that the cache_entry stored with the directory does not
  45         * represent the directory itself.  It is a pointer to an existing
  46         * filename, and its only purpose is to represent existence of the
  47         * directory in the cache.  It is very possible multiple directory
  48         * hash entries may point to the same cache_entry.
  49         */
  50        unsigned int hash;
  51        void **pos;
  52
  53        const char *ptr = ce->name;
  54        while (*ptr) {
  55                while (*ptr && *ptr != '/')
  56                        ++ptr;
  57                if (*ptr == '/') {
  58                        ++ptr;
  59                        hash = hash_name(ce->name, ptr - ce->name);
  60                        if (!lookup_hash(hash, &istate->name_hash)) {
  61                                pos = insert_hash(hash, ce, &istate->name_hash);
  62                                if (pos) {
  63                                        ce->next = *pos;
  64                                        *pos = ce;
  65                                }
  66                        }
  67                }
  68        }
  69}
  70
  71static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
  72{
  73        void **pos;
  74        unsigned int hash;
  75
  76        if (ce->ce_flags & CE_HASHED)
  77                return;
  78        ce->ce_flags |= CE_HASHED;
  79        ce->next = NULL;
  80        hash = hash_name(ce->name, ce_namelen(ce));
  81        pos = insert_hash(hash, ce, &istate->name_hash);
  82        if (pos) {
  83                ce->next = *pos;
  84                *pos = ce;
  85        }
  86
  87        if (ignore_case)
  88                hash_index_entry_directories(istate, ce);
  89}
  90
  91static void lazy_init_name_hash(struct index_state *istate)
  92{
  93        int nr;
  94
  95        if (istate->name_hash_initialized)
  96                return;
  97        for (nr = 0; nr < istate->cache_nr; nr++)
  98                hash_index_entry(istate, istate->cache[nr]);
  99        istate->name_hash_initialized = 1;
 100}
 101
 102void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 103{
 104        ce->ce_flags &= ~CE_UNHASHED;
 105        if (istate->name_hash_initialized)
 106                hash_index_entry(istate, ce);
 107}
 108
 109static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
 110{
 111        if (len1 != len2)
 112                return 0;
 113
 114        while (len1) {
 115                unsigned char c1 = *name1++;
 116                unsigned char c2 = *name2++;
 117                len1--;
 118                if (c1 != c2) {
 119                        c1 = toupper(c1);
 120                        c2 = toupper(c2);
 121                        if (c1 != c2)
 122                                return 0;
 123                }
 124        }
 125        return 1;
 126}
 127
 128static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
 129{
 130        int len = ce_namelen(ce);
 131
 132        /*
 133         * Always do exact compare, even if we want a case-ignoring comparison;
 134         * we do the quick exact one first, because it will be the common case.
 135         */
 136        if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
 137                return 1;
 138
 139        if (!icase)
 140                return 0;
 141
 142        /*
 143         * If the entry we're comparing is a filename (no trailing slash), then compare
 144         * the lengths exactly.
 145         */
 146        if (name[namelen - 1] != '/')
 147                return slow_same_name(name, namelen, ce->name, len);
 148
 149        /*
 150         * For a directory, we point to an arbitrary cache_entry filename.  Just
 151         * make sure the directory portion matches.
 152         */
 153        return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len);
 154}
 155
 156struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
 157{
 158        unsigned int hash = hash_name(name, namelen);
 159        struct cache_entry *ce;
 160
 161        lazy_init_name_hash(istate);
 162        ce = lookup_hash(hash, &istate->name_hash);
 163
 164        while (ce) {
 165                if (!(ce->ce_flags & CE_UNHASHED)) {
 166                        if (same_name(ce, name, namelen, icase))
 167                                return ce;
 168                }
 169                ce = ce->next;
 170        }
 171
 172        /*
 173         * Might be a submodule.  Despite submodules being directories,
 174         * they are stored in the name hash without a closing slash.
 175         * When ignore_case is 1, directories are stored in the name hash
 176         * with their closing slash.
 177         *
 178         * The side effect of this storage technique is we have need to
 179         * remove the slash from name and perform the lookup again without
 180         * the slash.  If a match is made, S_ISGITLINK(ce->mode) will be
 181         * true.
 182         */
 183        if (icase && name[namelen - 1] == '/') {
 184                ce = index_name_exists(istate, name, namelen - 1, icase);
 185                if (ce && S_ISGITLINK(ce->ce_mode))
 186                        return ce;
 187        }
 188        return NULL;
 189}