/* Wild guess at the initial hash size */
#define INITIAL_HASH_SIZE 9
-#define HASHBASE 65537 /* next_prime(2^16) */
+
/* We leave more room in smaller hash but do not let it
* grow to have unused hole too much.
*/
#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2))
+/* A prime rather carefully chosen between 2^16..2^17, so that
+ * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable
+ * size under the current 2<<17 maximum, which can hold this many
+ * different values before overflowing to hashtable of size 2<<18.
+ */
+#define HASHBASE 107927
+
struct spanhash {
- unsigned long hashval;
- unsigned long cnt;
+ unsigned int hashval;
+ unsigned int cnt;
};
struct spanhash_top {
int alloc_log2;
};
static struct spanhash *spanhash_find(struct spanhash_top *top,
- unsigned long hashval)
+ unsigned int hashval)
{
int sz = 1 << top->alloc_log2;
int bucket = hashval & (sz - 1);
}
static struct spanhash_top *add_spanhash(struct spanhash_top *top,
- unsigned long hashval)
+ unsigned int hashval, int cnt)
{
int bucket, lim;
struct spanhash *h;
h = &(top->data[bucket++]);
if (!h->cnt) {
h->hashval = hashval;
- h->cnt = 1;
+ h->cnt = cnt;
top->free--;
if (top->free < 0)
return spanhash_rehash(top);
return top;
}
if (h->hashval == hashval) {
- h->cnt++;
+ h->cnt += cnt;
return top;
}
if (lim <= bucket)
}
}
-static struct spanhash_top *hash_chars(unsigned char *buf, unsigned long sz)
+static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
{
- int i;
- unsigned long accum1, accum2, hashval;
+ int i, n;
+ unsigned int accum1, accum2, hashval;
struct spanhash_top *hash;
i = INITIAL_HASH_SIZE;
hash->free = INITIAL_FREE(i);
memset(hash->data, 0, sizeof(struct spanhash) * (1<<i));
- /* an 8-byte shift register made of accum1 and accum2. New
- * bytes come at LSB of accum2, and shifted up to accum1
- */
- for (i = accum1 = accum2 = 0; i < 7; i++, sz--) {
- accum1 = (accum1 << 8) | (accum2 >> 24);
- accum2 = (accum2 << 8) | *buf++;
- }
+ n = 0;
+ accum1 = accum2 = 0;
while (sz) {
- accum1 = (accum1 << 8) | (accum2 >> 24);
- accum2 = (accum2 << 8) | *buf++;
- hashval = (accum1 + accum2 * 0x61) % HASHBASE;
- hash = add_spanhash(hash, hashval);
+ unsigned int c = *buf++;
+ unsigned int old_1 = accum1;
sz--;
+ accum1 = (accum1 << 7) ^ (accum2 >> 25);
+ accum2 = (accum2 << 7) ^ (old_1 >> 25);
+ accum1 += c;
+ if (++n < 64 && c != '\n')
+ continue;
+ hashval = (accum1 + accum2 * 0x61) % HASHBASE;
+ hash = add_spanhash(hash, hashval, n);
+ n = 0;
+ accum1 = accum2 = 0;
}
return hash;
}
struct spanhash_top *src_count, *dst_count;
unsigned long sc, la;
- if (src_size < 8 || dst_size < 8)
- return -1;
-
src_count = dst_count = NULL;
if (src_count_p)
src_count = *src_count_p;