static unsigned char null_sha1[20] = { 0, };
 
 static int reverse_diff;
+static int use_size_cache;
 
 static const char *external_diff(void)
 {
        return 1;
 }
 
+static struct sha1_size_cache {
+       unsigned char sha1[20];
+       unsigned long size;
+} **sha1_size_cache;
+static int sha1_size_cache_nr, sha1_size_cache_alloc;
+
+static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
+                                                unsigned long size)
+{
+       int first, last;
+       struct sha1_size_cache *e;
+
+       first = 0;
+       last = sha1_size_cache_nr;
+       while (last > first) {
+               int next = (last + first) >> 1;
+               e = sha1_size_cache[next];
+               int cmp = memcmp(e->sha1, sha1, 20);
+               if (!cmp)
+                       return e;
+               if (cmp < 0) {
+                       last = next;
+                       continue;
+               }
+               first = next+1;
+       }
+       /* not found */
+       if (size == UINT_MAX)
+               return NULL;
+       /* insert to make it at "first" */
+       if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
+               sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
+               sha1_size_cache = xrealloc(sha1_size_cache,
+                                          sha1_size_cache_alloc *
+                                          sizeof(*sha1_size_cache));
+       }
+       sha1_size_cache_nr++;
+       if (first < sha1_size_cache_nr)
+               memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
+                       (sha1_size_cache_nr - first - 1) *
+                       sizeof(*sha1_size_cache));
+       e = xmalloc(sizeof(struct sha1_size_cache));
+       sha1_size_cache[first] = e;
+       memcpy(e->sha1, sha1, 20);
+       e->size = size;
+       return e;
+}
+
 /*
  * While doing rename detection and pickaxe operation, we may need to
  * grab the data for the blob (or file) for our own in-core comparison.
  * diff_filespec has data and size fields for this purpose.
  */
-int diff_populate_filespec(struct diff_filespec *s)
+int diff_populate_filespec(struct diff_filespec *s, int size_only)
 {
        int err = 0;
        if (!DIFF_FILE_VALID(s))
        if (S_ISDIR(s->mode))
                return -1;
 
+       if (!use_size_cache)
+               size_only = 0;
+
        if (s->data)
                return err;
        if (!s->sha1_valid ||
                s->size = st.st_size;
                if (!s->size)
                        goto empty;
+               if (size_only)
+                       return 0;
                if (S_ISLNK(st.st_mode)) {
                        int ret;
                        s->data = xmalloc(s->size);
                close(fd);
        }
        else {
+               /* We cannot do size only for SHA1 blobs */
                char type[20];
+               struct sha1_size_cache *e;
+
+               if (size_only) {
+                       e = locate_size_cache(s->sha1, UINT_MAX);
+                       if (e) {
+                               s->size = e->size;
+                               return 0;
+                       }
+               }
                s->data = read_sha1_file(s->sha1, type, &s->size);
                s->should_free = 1;
+               if (s->data && size_only)
+                       locate_size_cache(s->sha1, s->size);
        }
        return 0;
 }
                return;
        }
        else {
-               if (diff_populate_filespec(one))
+               if (diff_populate_filespec(one, 0))
                        die("cannot read data blob for %s", one->path);
                prep_temp_blob(temp, one->data, one->size,
                               one->sha1, one->mode);
 {
        if (flags & DIFF_SETUP_REVERSE)
                reverse_diff = 1;
+       if (flags & DIFF_SETUP_USE_CACHE) {
+               if (!active_cache)
+                       /* read-cache does not die even when it fails
+                        * so it is safe for us to do this here.  Also
+                        * it does not smudge active_cache or active_nr
+                        * when it fails, so we do not have to worry about
+                        * cleaning it up oufselves either.
+                        */
+                       read_cache();
+       }
+       if (flags & DIFF_SETUP_USE_SIZE_CACHE)
+               use_size_cache = 1;
+       
 }
 
 struct diff_queue_struct diff_queued_diff;
 
        if (src->sha1_valid && dst->sha1_valid &&
            !memcmp(src->sha1, dst->sha1, 20))
                return 1;
-       if (diff_populate_filespec(src) || diff_populate_filespec(dst))
-               /* this is an error but will be caught downstream */
+       if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+               return 0;
+       if (src->size != dst->size)
+               return 0;
+       if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
                return 0;
        if (src->size == dst->size &&
            !memcmp(src->data, dst->data, src->size))
         * dst, and then some edit has been applied to dst.
         *
         * Compare them and return how similar they are, representing
-        * the score as an integer between 0 and 10000, except
-        * where they match exactly it is considered better than anything
-        * else.
+        * the score as an integer between 0 and MAX_SCORE.
+        *
+        * When there is an exact match, it is considered a better
+        * match than anything else; the destination does not even
+        * call into this function in that case.
         */
        void *delta;
        unsigned long delta_size, base_size;
        /* We would not consider edits that change the file size so
         * drastically.  delta_size must be smaller than
         * (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
+        *
         * Note that base_size == 0 case is handled here already
         * and the final score computation below would not have a
         * divide-by-zero issue.
        if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
                return 0;
 
+       if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+               return 0; /* error but caught downstream */
+
        delta = diff_delta(src->data, src->size,
                           dst->data, dst->size,
                           &delta_size);