Merge branch 'master' into jc/fsck-reflog

diff --git a/diff-delta.c b/diff-delta.c

index 45df786b0a099e03bcca57f40047be04635e130d..9f998d0a73e0127d3a68a7caecb3727569149871 100644 (file)
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -18,11 +18,9 @@
   *  licensing gets turned into GPLv2 within this project.
   */
  
-#include <stdlib.h>
-#include <string.h>
+#include "git-compat-util.h"
  #include "delta.h"
  
-
  /* maximum hash entry list for the same hash bucket */
  #define HASH_LIMIT 64
  
@@ -131,33 +129,35 @@ struct delta_index {
         const void *src_buf;
         unsigned long src_size;
         unsigned int hash_mask;
-       struct index_entry *hash[0];
+       struct index_entry *hash[FLEX_ARRAY];
  };
  
  struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
  {
-       unsigned int i, hsize, hmask, entries, *hash_count;
+       unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
         const unsigned char *data, *buffer = buf;
         struct delta_index *index;
         struct index_entry *entry, **hash;
         void *mem;
+       unsigned long memsize;
  
         if (!buf || !bufsize)
                 return NULL;
  
         /* Determine index hash size.  Note that indexing skips the
-          first byte to allow for optimizing the rabin polynomial
+          first byte to allow for optimizing the Rabin's polynomial
            initialization in create_delta(). */
         entries = (bufsize - 1)  / RABIN_WINDOW;
         hsize = entries / 4;
-       for (i = 4; (1 << i) < hsize && i < 31; i++);
+       for (i = 4; (1u << i) < hsize && i < 31; i++);
         hsize = 1 << i;
         hmask = hsize - 1;
  
         /* allocate lookup index */
-       mem = malloc(sizeof(*index) +
-                    sizeof(*hash) * hsize +
-                    sizeof(*entry) * entries);
+       memsize = sizeof(*index) +
+                 sizeof(*hash) * hsize +
+                 sizeof(*entry) * entries;
+       mem = malloc(memsize);
         if (!mem)
                 return NULL;
         index = mem;
@@ -179,23 +179,30 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
         }
  
         /* then populate the index */
-       data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
-       while (data >= buffer) {
+       prev_val = ~0;
+       for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
+            data >= buffer;
+            data -= RABIN_WINDOW) {
                 unsigned int val = 0;
                 for (i = 1; i <= RABIN_WINDOW; i++)
                         val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
-               i = val & hmask;
-               entry->ptr = data + RABIN_WINDOW;
-               entry->val = val;
-               entry->next = hash[i];
-               hash[i] = entry++;
-               hash_count[i]++;
-               data -= RABIN_WINDOW;
+               if (val == prev_val) {
+                       /* keep the lowest of consecutive identical blocks */
+                       entry[-1].ptr = data + RABIN_WINDOW;
+               } else {
+                       prev_val = val;
+                       i = val & hmask;
+                       entry->ptr = data + RABIN_WINDOW;
+                       entry->val = val;
+                       entry->next = hash[i];
+                       hash[i] = entry++;
+                       hash_count[i]++;
+               }
         }
  
         /*
          * Determine a limit on the number of entries in the same hash
-        * bucket.  This guard us against patological data sets causing
+        * bucket.  This guards us against pathological data sets causing
          * really bad hash distribution with most entries in the same hash
          * bucket that would bring us to O(m*n) computing costs (m and n
          * corresponding to reference and target buffer sizes).
@@ -230,7 +237,7 @@ void free_delta_index(struct delta_index *index)
  
  /*
   * The maximum size for any opcode sequence, including the initial header
- * plus rabin window plus biggest copy.
+ * plus Rabin window plus biggest copy.
   */
  #define MAX_OP_SIZE    (5 + 5 + 1 + RABIN_WINDOW + 7)
  
@@ -274,7 +281,7 @@ create_delta(const struct delta_index *index,
         ref_data = index->src_buf;
         ref_top = ref_data + index->src_size;
         data = trg_buf;
-       top = trg_buf + trg_size;
+       top = (const unsigned char *) trg_buf + trg_size;
  
         outpos++;
         val = 0;
@@ -382,7 +389,7 @@ create_delta(const struct delta_index *index,
                                 outsize = max_size + MAX_OP_SIZE + 1;
                         if (max_size && outpos > max_size)
                                 break;
-                       out = realloc(out, outsize);
+                       out = xrealloc(out, outsize);
                         if (!out) {
                                 free(tmp);
                                 return NULL;