hashmap.con commit Merge branch 'mh/blame-worktree' (37e9c7f)
   1/*
   2 * Generic implementation of hash-based key value mappings.
   3 */
   4#include "cache.h"
   5#include "hashmap.h"
   6
   7#define FNV32_BASE ((unsigned int) 0x811c9dc5)
   8#define FNV32_PRIME ((unsigned int) 0x01000193)
   9
  10unsigned int strhash(const char *str)
  11{
  12        unsigned int c, hash = FNV32_BASE;
  13        while ((c = (unsigned char) *str++))
  14                hash = (hash * FNV32_PRIME) ^ c;
  15        return hash;
  16}
  17
  18unsigned int strihash(const char *str)
  19{
  20        unsigned int c, hash = FNV32_BASE;
  21        while ((c = (unsigned char) *str++)) {
  22                if (c >= 'a' && c <= 'z')
  23                        c -= 'a' - 'A';
  24                hash = (hash * FNV32_PRIME) ^ c;
  25        }
  26        return hash;
  27}
  28
  29unsigned int memhash(const void *buf, size_t len)
  30{
  31        unsigned int hash = FNV32_BASE;
  32        unsigned char *ucbuf = (unsigned char *) buf;
  33        while (len--) {
  34                unsigned int c = *ucbuf++;
  35                hash = (hash * FNV32_PRIME) ^ c;
  36        }
  37        return hash;
  38}
  39
  40unsigned int memihash(const void *buf, size_t len)
  41{
  42        unsigned int hash = FNV32_BASE;
  43        unsigned char *ucbuf = (unsigned char *) buf;
  44        while (len--) {
  45                unsigned int c = *ucbuf++;
  46                if (c >= 'a' && c <= 'z')
  47                        c -= 'a' - 'A';
  48                hash = (hash * FNV32_PRIME) ^ c;
  49        }
  50        return hash;
  51}
  52
  53#define HASHMAP_INITIAL_SIZE 64
  54/* grow / shrink by 2^2 */
  55#define HASHMAP_RESIZE_BITS 2
  56/* load factor in percent */
  57#define HASHMAP_LOAD_FACTOR 80
  58
  59static void alloc_table(struct hashmap *map, unsigned int size)
  60{
  61        map->tablesize = size;
  62        map->table = xcalloc(size, sizeof(struct hashmap_entry *));
  63
  64        /* calculate resize thresholds for new size */
  65        map->grow_at = (unsigned int) ((uint64_t) size * HASHMAP_LOAD_FACTOR / 100);
  66        if (size <= HASHMAP_INITIAL_SIZE)
  67                map->shrink_at = 0;
  68        else
  69                /*
  70                 * The shrink-threshold must be slightly smaller than
  71                 * (grow-threshold / resize-factor) to prevent erratic resizing,
  72                 * thus we divide by (resize-factor + 1).
  73                 */
  74                map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1);
  75}
  76
  77static inline int entry_equals(const struct hashmap *map,
  78                const struct hashmap_entry *e1, const struct hashmap_entry *e2,
  79                const void *keydata)
  80{
  81        return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata));
  82}
  83
  84static inline unsigned int bucket(const struct hashmap *map,
  85                const struct hashmap_entry *key)
  86{
  87        return key->hash & (map->tablesize - 1);
  88}
  89
  90static void rehash(struct hashmap *map, unsigned int newsize)
  91{
  92        unsigned int i, oldsize = map->tablesize;
  93        struct hashmap_entry **oldtable = map->table;
  94
  95        alloc_table(map, newsize);
  96        for (i = 0; i < oldsize; i++) {
  97                struct hashmap_entry *e = oldtable[i];
  98                while (e) {
  99                        struct hashmap_entry *next = e->next;
 100                        unsigned int b = bucket(map, e);
 101                        e->next = map->table[b];
 102                        map->table[b] = e;
 103                        e = next;
 104                }
 105        }
 106        free(oldtable);
 107}
 108
 109static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
 110                const struct hashmap_entry *key, const void *keydata)
 111{
 112        struct hashmap_entry **e = &map->table[bucket(map, key)];
 113        while (*e && !entry_equals(map, *e, key, keydata))
 114                e = &(*e)->next;
 115        return e;
 116}
 117
 118static int always_equal(const void *unused1, const void *unused2, const void *unused3)
 119{
 120        return 0;
 121}
 122
 123void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
 124                size_t initial_size)
 125{
 126        unsigned int size = HASHMAP_INITIAL_SIZE;
 127        map->size = 0;
 128        map->cmpfn = equals_function ? equals_function : always_equal;
 129
 130        /* calculate initial table size and allocate the table */
 131        initial_size = (unsigned int) ((uint64_t) initial_size * 100
 132                        / HASHMAP_LOAD_FACTOR);
 133        while (initial_size > size)
 134                size <<= HASHMAP_RESIZE_BITS;
 135        alloc_table(map, size);
 136}
 137
 138void hashmap_free(struct hashmap *map, int free_entries)
 139{
 140        if (!map || !map->table)
 141                return;
 142        if (free_entries) {
 143                struct hashmap_iter iter;
 144                struct hashmap_entry *e;
 145                hashmap_iter_init(map, &iter);
 146                while ((e = hashmap_iter_next(&iter)))
 147                        free(e);
 148        }
 149        free(map->table);
 150        memset(map, 0, sizeof(*map));
 151}
 152
 153void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
 154{
 155        return *find_entry_ptr(map, key, keydata);
 156}
 157
 158void *hashmap_get_next(const struct hashmap *map, const void *entry)
 159{
 160        struct hashmap_entry *e = ((struct hashmap_entry *) entry)->next;
 161        for (; e; e = e->next)
 162                if (entry_equals(map, entry, e, NULL))
 163                        return e;
 164        return NULL;
 165}
 166
 167void hashmap_add(struct hashmap *map, void *entry)
 168{
 169        unsigned int b = bucket(map, entry);
 170
 171        /* add entry */
 172        ((struct hashmap_entry *) entry)->next = map->table[b];
 173        map->table[b] = entry;
 174
 175        /* fix size and rehash if appropriate */
 176        map->size++;
 177        if (map->size > map->grow_at)
 178                rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
 179}
 180
 181void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
 182{
 183        struct hashmap_entry *old;
 184        struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
 185        if (!*e)
 186                return NULL;
 187
 188        /* remove existing entry */
 189        old = *e;
 190        *e = old->next;
 191        old->next = NULL;
 192
 193        /* fix size and rehash if appropriate */
 194        map->size--;
 195        if (map->size < map->shrink_at)
 196                rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
 197        return old;
 198}
 199
 200void *hashmap_put(struct hashmap *map, void *entry)
 201{
 202        struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
 203        hashmap_add(map, entry);
 204        return old;
 205}
 206
 207void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
 208{
 209        iter->map = map;
 210        iter->tablepos = 0;
 211        iter->next = NULL;
 212}
 213
 214void *hashmap_iter_next(struct hashmap_iter *iter)
 215{
 216        struct hashmap_entry *current = iter->next;
 217        for (;;) {
 218                if (current) {
 219                        iter->next = current->next;
 220                        return current;
 221                }
 222
 223                if (iter->tablepos >= iter->map->tablesize)
 224                        return NULL;
 225
 226                current = iter->map->table[iter->tablepos++];
 227        }
 228}
 229
 230struct pool_entry {
 231        struct hashmap_entry ent;
 232        size_t len;
 233        unsigned char data[FLEX_ARRAY];
 234};
 235
 236static int pool_entry_cmp(const struct pool_entry *e1,
 237                          const struct pool_entry *e2,
 238                          const unsigned char *keydata)
 239{
 240        return e1->data != keydata &&
 241               (e1->len != e2->len || memcmp(e1->data, keydata, e1->len));
 242}
 243
 244const void *memintern(const void *data, size_t len)
 245{
 246        static struct hashmap map;
 247        struct pool_entry key, *e;
 248
 249        /* initialize string pool hashmap */
 250        if (!map.tablesize)
 251                hashmap_init(&map, (hashmap_cmp_fn) pool_entry_cmp, 0);
 252
 253        /* lookup interned string in pool */
 254        hashmap_entry_init(&key, memhash(data, len));
 255        key.len = len;
 256        e = hashmap_get(&map, &key, data);
 257        if (!e) {
 258                /* not found: create it */
 259                FLEX_ALLOC_MEM(e, data, data, len);
 260                hashmap_entry_init(e, key.ent.hash);
 261                e->len = len;
 262                hashmap_add(&map, e);
 263        }
 264        return e->data;
 265}