Check and document the options to prevent mistakes.
[gitweb.git] / diffcore-delta.c
index f8a751837e3e67fef57e3de9670e5a2defc6652a..7338a40c5964ae6ddfb855465249fc1a2fa5a2a3 100644 (file)
 
 /* Wild guess at the initial hash size */
 #define INITIAL_HASH_SIZE 9
-#define HASHBASE 65537 /* next_prime(2^16) */
+
 /* We leave more room in smaller hash but do not let it
  * grow to have unused hole too much.
  */
 #define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2))
 
+/* A prime rather carefully chosen between 2^16..2^17, so that
+ * HASHBASE < INITIAL_FREE(17).  We want to keep the maximum hashtable
+ * size under the current 2<<17 maximum, which can hold this many
+ * different values before overflowing to hashtable of size 2<<18.
+ */
+#define HASHBASE 107927
+
 struct spanhash {
-       unsigned long hashval;
-       unsigned long cnt;
+       unsigned int hashval;
+       unsigned int cnt;
 };
 struct spanhash_top {
        int alloc_log2;
@@ -43,7 +50,7 @@ struct spanhash_top {
 };
 
 static struct spanhash *spanhash_find(struct spanhash_top *top,
-                                     unsigned long hashval)
+                                     unsigned int hashval)
 {
        int sz = 1 << top->alloc_log2;
        int bucket = hashval & (sz - 1);
@@ -92,7 +99,7 @@ static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
 }
 
 static struct spanhash_top *add_spanhash(struct spanhash_top *top,
-                                        unsigned long hashval)
+                                        unsigned int hashval, int cnt)
 {
        int bucket, lim;
        struct spanhash *h;
@@ -103,14 +110,14 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
                h = &(top->data[bucket++]);
                if (!h->cnt) {
                        h->hashval = hashval;
-                       h->cnt = 1;
+                       h->cnt = cnt;
                        top->free--;
                        if (top->free < 0)
                                return spanhash_rehash(top);
                        return top;
                }
                if (h->hashval == hashval) {
-                       h->cnt++;
+                       h->cnt += cnt;
                        return top;
                }
                if (lim <= bucket)
@@ -118,10 +125,10 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
        }
 }
 
-static struct spanhash_top *hash_chars(unsigned char *buf, unsigned long sz)
+static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
 {
-       int i;
-       unsigned long accum1, accum2, hashval;
+       int i, n;
+       unsigned int accum1, accum2, hashval;
        struct spanhash_top *hash;
 
        i = INITIAL_HASH_SIZE;
@@ -130,19 +137,21 @@ static struct spanhash_top *hash_chars(unsigned char *buf, unsigned long sz)
        hash->free = INITIAL_FREE(i);
        memset(hash->data, 0, sizeof(struct spanhash) * (1<<i));
 
-       /* an 8-byte shift register made of accum1 and accum2.  New
-        * bytes come at LSB of accum2, and shifted up to accum1
-        */
-       for (i = accum1 = accum2 = 0; i < 7; i++, sz--) {
-               accum1 = (accum1 << 8) | (accum2 >> 24);
-               accum2 = (accum2 << 8) | *buf++;
-       }
+       n = 0;
+       accum1 = accum2 = 0;
        while (sz) {
-               accum1 = (accum1 << 8) | (accum2 >> 24);
-               accum2 = (accum2 << 8) | *buf++;
-               hashval = (accum1 + accum2 * 0x61) % HASHBASE;
-               hash = add_spanhash(hash, hashval);
+               unsigned int c = *buf++;
+               unsigned int old_1 = accum1;
                sz--;
+               accum1 = (accum1 << 7) ^ (accum2 >> 25);
+               accum2 = (accum2 << 7) ^ (old_1 >> 25);
+               accum1 += c;
+               if (++n < 64 && c != '\n')
+                       continue;
+               hashval = (accum1 + accum2 * 0x61) % HASHBASE;
+               hash = add_spanhash(hash, hashval, n);
+               n = 0;
+               accum1 = accum2 = 0;
        }
        return hash;
 }
@@ -159,9 +168,6 @@ int diffcore_count_changes(void *src, unsigned long src_size,
        struct spanhash_top *src_count, *dst_count;
        unsigned long sc, la;
 
-       if (src_size < 8 || dst_size < 8)
-               return -1;
-
        src_count = dst_count = NULL;
        if (src_count_p)
                src_count = *src_count_p;