This is to address concerns raised by ThreadSanitizer on the mailing list
about threaded unprotected R/W access to map.size with my previous "disallow
rehash" change (
0607e10009ee4e37cb49b4cec8d28a9dda1656a4).
See:
https://public-inbox.org/git/
adb37b70139fd1e2bac18bfd22c8b96683ae18eb.
1502780344.git.martin.agren@gmail.com/
Add API to hashmap to disable item counting and thus automatic rehashing.
Also include API to later re-enable them.
When item counting is disabled, the map.size field is invalid. So to
prevent accidents, the field has been renamed and an accessor function
hashmap_get_size() has been added. All direct references to this
field have been been updated. And the name of the field changed
to map.private_size to communicate this.
Here is the relevant output from ThreadSanitizer showing the problem:
WARNING: ThreadSanitizer: data race (pid=10554)
Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16):
#0 hashmap_add hashmap.c:209
#1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
#2 handle_range_dir name-hash.c:347
#3 handle_range_1 name-hash.c:415
#4 lazy_dir_thread_proc name-hash.c:471
#5 <null> <null>
Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31):
#0 hashmap_add hashmap.c:209
#1 hash_dir_entry_with_parent_and_prefix name-hash.c:302
#2 handle_range_dir name-hash.c:347
#3 handle_range_1 name-hash.c:415
#4 handle_range_dir name-hash.c:380
#5 handle_range_1 name-hash.c:415
#6 lazy_dir_thread_proc name-hash.c:471
#7 <null> <null>
Martin gives instructions for running TSan on test t3008 in this post:
https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
static void all_attrs_init(struct attr_hashmap *map, struct attr_check *check)
{
int i;
+ unsigned int size;
hashmap_lock(map);
- if (map->map.size < check->all_attrs_nr)
+ size = hashmap_get_size(&map->map);
+ if (size < check->all_attrs_nr)
die("BUG: interned attributes shouldn't be deleted");
/*
* field), reallocate the provided attr_check instance's all_attrs
* field and fill each entry with its corresponding git_attr.
*/
- if (map->map.size != check->all_attrs_nr) {
+ if (size != check->all_attrs_nr) {
struct attr_hash_entry *e;
struct hashmap_iter iter;
hashmap_iter_init(&map->map, &iter);
- REALLOC_ARRAY(check->all_attrs, map->map.size);
- check->all_attrs_nr = map->map.size;
+ REALLOC_ARRAY(check->all_attrs, size);
+ check->all_attrs_nr = size;
while ((e = hashmap_iter_next(&iter))) {
const struct git_attr *a = e->value;
if (!a) {
FLEX_ALLOC_MEM(a, name, name, namelen);
- a->attr_nr = g_attr_hashmap.map.size;
+ a->attr_nr = hashmap_get_size(&g_attr_hashmap.map);
attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
- assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
+ assert(a->attr_nr ==
+ (hashmap_get_size(&g_attr_hashmap.map) - 1));
}
hashmap_unlock(&g_attr_hashmap);
hashmap_init(&names, commit_name_cmp, NULL, 0);
for_each_rawref(get_name, NULL);
- if (!names.size && !always)
+ if (!hashmap_get_size(&names) && !always)
die(_("No names found, cannot describe anything."));
if (argc == 0) {
unsigned int i, oldsize = map->tablesize;
struct hashmap_entry **oldtable = map->table;
- if (map->disallow_rehash)
- return;
-
alloc_table(map, newsize);
for (i = 0; i < oldsize; i++) {
struct hashmap_entry *e = oldtable[i];
while (initial_size > size)
size <<= HASHMAP_RESIZE_BITS;
alloc_table(map, size);
+
+ /*
+ * Keep track of the number of items in the map and
+ * allow the map to automatically grow as necessary.
+ */
+ map->do_count_items = 1;
}
void hashmap_free(struct hashmap *map, int free_entries)
map->table[b] = entry;
/* fix size and rehash if appropriate */
- map->size++;
- if (map->size > map->grow_at)
- rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
+ if (map->do_count_items) {
+ map->private_size++;
+ if (map->private_size > map->grow_at)
+ rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
+ }
}
void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
old->next = NULL;
/* fix size and rehash if appropriate */
- map->size--;
- if (map->size < map->shrink_at)
- rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
+ if (map->do_count_items) {
+ map->private_size--;
+ if (map->private_size < map->shrink_at)
+ rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
+ }
+
return old;
}
const void *cmpfn_data;
/* total number of entries (0 means the hashmap is empty) */
- unsigned int size;
+ unsigned int private_size; /* use hashmap_get_size() */
/*
* tablesize is the allocated size of the hash table. A non-0 value
unsigned int grow_at;
unsigned int shrink_at;
- /* See `hashmap_disallow_rehash`. */
- unsigned disallow_rehash : 1;
+ unsigned int do_count_items : 1;
};
/* hashmap functions */
e->next = NULL;
}
+/*
+ * Return the number of items in the map.
+ */
+static inline unsigned int hashmap_get_size(struct hashmap *map)
+{
+ if (map->do_count_items)
+ return map->private_size;
+
+ BUG("hashmap_get_size: size not set");
+ return 0;
+}
+
/*
* Returns the hashmap entry for the specified key, or NULL if not found.
*
*/
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
-/*
- * Disallow/allow rehashing of the hashmap.
- * This is useful if the caller knows that the hashmap needs multi-threaded
- * access. The caller is still required to guard/lock searches and inserts
- * in a manner appropriate to their usage. This simply prevents the table
- * from being unexpectedly re-mapped.
- *
- * It is up to the caller to ensure that the hashmap is initialized to a
- * reasonable size to prevent poor performance.
- *
- * A call to allow rehashing does not force a rehash; that might happen
- * with the next insert or delete.
- */
-static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
-{
- map->disallow_rehash = value;
-}
-
/*
* Used to iterate over all entries of a hashmap. Note that it is
* not safe to add or remove entries to the hashmap while
return hashmap_iter_next(iter);
}
+/*
+ * Disable item counting and automatic rehashing when adding/removing items.
+ *
+ * Normally, the hashmap keeps track of the number of items in the map
+ * and uses it to dynamically resize it. This (both the counting and
+ * the resizing) can cause problems when the map is being used by
+ * threaded callers (because the hashmap code does not know about the
+ * locking strategy used by the threaded callers and therefore, does
+ * not know how to protect the "private_size" counter).
+ */
+static inline void hashmap_disable_item_counting(struct hashmap *map)
+{
+ map->do_count_items = 0;
+}
+
+/*
+ * Re-enable item couting when adding/removing items.
+ * If counting is currently disabled, it will force count them.
+ * It WILL NOT automatically rehash them.
+ */
+static inline void hashmap_enable_item_counting(struct hashmap *map)
+{
+ void *item;
+ unsigned int n = 0;
+ struct hashmap_iter iter;
+
+ if (map->do_count_items)
+ return;
+
+ hashmap_iter_init(map, &iter);
+ while ((item = hashmap_iter_next(&iter)))
+ n++;
+
+ map->do_count_items = 1;
+ map->private_size = n;
+}
+
/* String interning */
/*
hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr);
if (lookup_lazy_params(istate)) {
- hashmap_disallow_rehash(&istate->dir_hash, 1);
+ /*
+ * Disable item counting and automatic rehashing because
+ * we do per-chain (mod n) locking rather than whole hashmap
+ * locking and we need to prevent the table-size from changing
+ * and bucket items from being redistributed.
+ */
+ hashmap_disable_item_counting(&istate->dir_hash);
threaded_lazy_init_name_hash(istate);
- hashmap_disallow_rehash(&istate->dir_hash, 0);
+ hashmap_enable_item_counting(&istate->dir_hash);
} else {
int nr;
for (nr = 0; nr < istate->cache_nr; nr++)
} else if (!strcmp("size", cmd)) {
/* print table sizes */
- printf("%u %u\n", map.tablesize, map.size);
+ printf("%u %u\n", map.tablesize,
+ hashmap_get_size(&map));
} else if (!strcmp("intern", cmd) && l1) {