name-hash.con commit Merge branch 'dl/am-hg-locale' (738b314)
   1/*
   2 * name-hash.c
   3 *
   4 * Hashing names in the index state
   5 *
   6 * Copyright (C) 2008 Linus Torvalds
   7 */
   8#define NO_THE_INDEX_COMPATIBILITY_MACROS
   9#include "cache.h"
  10
  11/*
  12 * This removes bit 5 if bit 6 is set.
  13 *
  14 * That will make US-ASCII characters hash to their upper-case
  15 * equivalent. We could easily do this one whole word at a time,
  16 * but that's for future worries.
  17 */
  18static inline unsigned char icase_hash(unsigned char c)
  19{
  20        return c & ~((c & 0x40) >> 1);
  21}
  22
  23static unsigned int hash_name(const char *name, int namelen)
  24{
  25        unsigned int hash = 0x123;
  26
  27        do {
  28                unsigned char c = *name++;
  29                c = icase_hash(c);
  30                hash = hash*101 + c;
  31        } while (--namelen);
  32        return hash;
  33}
  34
  35static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce)
  36{
  37        /*
  38         * Throw each directory component in the hash for quick lookup
  39         * during a git status. Directory components are stored with their
  40         * closing slash.  Despite submodules being a directory, they never
  41         * reach this point, because they are stored without a closing slash
  42         * in the cache.
  43         *
  44         * Note that the cache_entry stored with the directory does not
  45         * represent the directory itself.  It is a pointer to an existing
  46         * filename, and its only purpose is to represent existence of the
  47         * directory in the cache.  It is very possible multiple directory
  48         * hash entries may point to the same cache_entry.
  49         */
  50        unsigned int hash;
  51        void **pos;
  52
  53        const char *ptr = ce->name;
  54        while (*ptr) {
  55                while (*ptr && *ptr != '/')
  56                        ++ptr;
  57                if (*ptr == '/') {
  58                        ++ptr;
  59                        hash = hash_name(ce->name, ptr - ce->name);
  60                        pos = insert_hash(hash, ce, &istate->name_hash);
  61                        if (pos) {
  62                                ce->dir_next = *pos;
  63                                *pos = ce;
  64                        }
  65                }
  66        }
  67}
  68
  69static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
  70{
  71        void **pos;
  72        unsigned int hash;
  73
  74        if (ce->ce_flags & CE_HASHED)
  75                return;
  76        ce->ce_flags |= CE_HASHED;
  77        ce->next = ce->dir_next = NULL;
  78        hash = hash_name(ce->name, ce_namelen(ce));
  79        pos = insert_hash(hash, ce, &istate->name_hash);
  80        if (pos) {
  81                ce->next = *pos;
  82                *pos = ce;
  83        }
  84
  85        if (ignore_case)
  86                hash_index_entry_directories(istate, ce);
  87}
  88
  89static void lazy_init_name_hash(struct index_state *istate)
  90{
  91        int nr;
  92
  93        if (istate->name_hash_initialized)
  94                return;
  95        for (nr = 0; nr < istate->cache_nr; nr++)
  96                hash_index_entry(istate, istate->cache[nr]);
  97        istate->name_hash_initialized = 1;
  98}
  99
 100void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 101{
 102        ce->ce_flags &= ~CE_UNHASHED;
 103        if (istate->name_hash_initialized)
 104                hash_index_entry(istate, ce);
 105}
 106
 107static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
 108{
 109        if (len1 != len2)
 110                return 0;
 111
 112        while (len1) {
 113                unsigned char c1 = *name1++;
 114                unsigned char c2 = *name2++;
 115                len1--;
 116                if (c1 != c2) {
 117                        c1 = toupper(c1);
 118                        c2 = toupper(c2);
 119                        if (c1 != c2)
 120                                return 0;
 121                }
 122        }
 123        return 1;
 124}
 125
 126static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
 127{
 128        int len = ce_namelen(ce);
 129
 130        /*
 131         * Always do exact compare, even if we want a case-ignoring comparison;
 132         * we do the quick exact one first, because it will be the common case.
 133         */
 134        if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
 135                return 1;
 136
 137        if (!icase)
 138                return 0;
 139
 140        /*
 141         * If the entry we're comparing is a filename (no trailing slash), then compare
 142         * the lengths exactly.
 143         */
 144        if (name[namelen - 1] != '/')
 145                return slow_same_name(name, namelen, ce->name, len);
 146
 147        /*
 148         * For a directory, we point to an arbitrary cache_entry filename.  Just
 149         * make sure the directory portion matches.
 150         */
 151        return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len);
 152}
 153
 154struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
 155{
 156        unsigned int hash = hash_name(name, namelen);
 157        struct cache_entry *ce;
 158
 159        lazy_init_name_hash(istate);
 160        ce = lookup_hash(hash, &istate->name_hash);
 161
 162        while (ce) {
 163                if (!(ce->ce_flags & CE_UNHASHED)) {
 164                        if (same_name(ce, name, namelen, icase))
 165                                return ce;
 166                }
 167                if (icase && name[namelen - 1] == '/')
 168                        ce = ce->dir_next;
 169                else
 170                        ce = ce->next;
 171        }
 172
 173        /*
 174         * Might be a submodule.  Despite submodules being directories,
 175         * they are stored in the name hash without a closing slash.
 176         * When ignore_case is 1, directories are stored in the name hash
 177         * with their closing slash.
 178         *
 179         * The side effect of this storage technique is we have need to
 180         * remove the slash from name and perform the lookup again without
 181         * the slash.  If a match is made, S_ISGITLINK(ce->mode) will be
 182         * true.
 183         */
 184        if (icase && name[namelen - 1] == '/') {
 185                ce = index_name_exists(istate, name, namelen - 1, icase);
 186                if (ce && S_ISGITLINK(ce->ce_mode))
 187                        return ce;
 188        }
 189        return NULL;
 190}