Merge branch 'jc/maint-1.6.0-keep-pack' into maint
authorJunio C Hamano <gitster@pobox.com>
Thu, 9 Apr 2009 06:21:10 +0000 (23:21 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 9 Apr 2009 06:21:10 +0000 (23:21 -0700)
* jc/maint-1.6.0-keep-pack:
pack-objects: don't loosen objects available in alternate or kept packs
t7700: demonstrate repack flaw which may loosen objects unnecessarily
Remove --kept-pack-only option and associated infrastructure
pack-objects: only repack or loosen objects residing in "local" packs
git-repack.sh: don't use --kept-pack-only option to pack-objects
t7700-repack: add two new tests demonstrating repacking flaws
is_kept_pack(): final clean-up
Simplify is_kept_pack()
Consolidate ignore_packed logic more
has_sha1_kept_pack(): take "struct rev_info"
has_sha1_pack(): refactor "pretend these packs do not exist" interface
git-repack: resist stray environment variable

Conflicts:
t/t7700-repack.sh

1  2 
builtin-count-objects.c
builtin-fsck.c
builtin-pack-objects.c
cache.h
diff.c
git-repack.sh
revision.c
revision.h
sha1_file.c
t/t7700-repack.sh
diff --combined builtin-count-objects.c
index 62fd1f0961a29cc81032d20b3fe37fcc91293407,c095e8dd2bc6ce93df39e9f6dbffb73ae8534491..b814fe5070873f5c87fc6bbfde480e3b0a83e397
@@@ -5,7 -5,6 +5,7 @@@
   */
  
  #include "cache.h"
 +#include "dir.h"
  #include "builtin.h"
  #include "parse-options.h"
  
@@@ -22,7 -21,9 +22,7 @@@ static void count_objects(DIR *d, char 
                const char *cp;
                int bad = 0;
  
 -              if ((ent->d_name[0] == '.') &&
 -                  (ent->d_name[1] == 0 ||
 -                   ((ent->d_name[1] == '.') && (ent->d_name[2] == 0))))
 +              if (is_dot_or_dotdot(ent->d_name))
                        continue;
                for (cp = ent->d_name; *cp; cp++) {
                        int ch = *cp;
@@@ -42,7 -43,7 +42,7 @@@
                        if (lstat(path, &st) || !S_ISREG(st.st_mode))
                                bad = 1;
                        else
 -                              (*loose_size) += xsize_t(st.st_blocks);
 +                              (*loose_size) += xsize_t(on_disk_bytes(st));
                }
                if (bad) {
                        if (verbose) {
@@@ -60,7 -61,7 +60,7 @@@
                hex[40] = 0;
                if (get_sha1_hex(hex, sha1))
                        die("internal error");
-               if (has_sha1_pack(sha1, NULL))
+               if (has_sha1_pack(sha1))
                        (*packed_loose)++;
        }
  }
@@@ -103,7 -104,6 +103,7 @@@ int cmd_count_objects(int argc, const c
        if (verbose) {
                struct packed_git *p;
                unsigned long num_pack = 0;
 +              unsigned long size_pack = 0;
                if (!packed_git)
                        prepare_packed_git();
                for (p = packed_git; p; p = p->next) {
                        if (open_pack_index(p))
                                continue;
                        packed += p->num_objects;
 +                      size_pack += p->pack_size + p->index_size;
                        num_pack++;
                }
                printf("count: %lu\n", loose);
 -              printf("size: %lu\n", loose_size / 2);
 +              printf("size: %lu\n", loose_size / 1024);
                printf("in-pack: %lu\n", packed);
                printf("packs: %lu\n", num_pack);
 +              printf("size-pack: %lu\n", size_pack / 1024);
                printf("prune-packable: %lu\n", packed_loose);
                printf("garbage: %lu\n", garbage);
        }
        else
                printf("%lu objects, %lu kilobytes\n",
 -                     loose, loose_size / 2);
 +                     loose, loose_size / 1024);
        return 0;
  }
diff --combined builtin-fsck.c
index 64dffa542170fcceedc766ae6551134f61779893,491375dc59310ad996f280cb1b0c398154f508a4..6436bc224840f11af2f7fa26c61b62c25d78d865
@@@ -10,7 -10,6 +10,7 @@@
  #include "tree-walk.h"
  #include "fsck.h"
  #include "parse-options.h"
 +#include "dir.h"
  
  #define REACHABLE 0x0001
  #define SEEN      0x0002
@@@ -23,7 -22,6 +23,7 @@@ static int check_full
  static int check_strict;
  static int keep_cache_objects;
  static unsigned char head_sha1[20];
 +static const char *head_points_at;
  static int errors_found;
  static int write_lost_and_found;
  static int verbose;
@@@ -160,7 -158,7 +160,7 @@@ static void check_reachable_object(stru
         * do a full fsck
         */
        if (!obj->parsed) {
-               if (has_sha1_pack(obj->sha1, NULL))
+               if (has_sha1_pack(obj->sha1))
                        return; /* it is in pack - forget about it */
                printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
                errors_found |= ERROR_REACHABLE;
@@@ -224,16 -222,12 +224,16 @@@ static void check_unreachable_object(st
                                char *buf = read_sha1_file(obj->sha1,
                                                &type, &size);
                                if (buf) {
 -                                      fwrite(buf, size, 1, f);
 +                                      if (fwrite(buf, size, 1, f) != 1)
 +                                              die("Could not write %s: %s",
 +                                                  filename, strerror(errno));
                                        free(buf);
                                }
                        } else
                                fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
 -                      fclose(f);
 +                      if (fclose(f))
 +                              die("Could not finish %s: %s",
 +                                  filename, strerror(errno));
                }
                return;
        }
@@@ -397,12 -391,19 +397,12 @@@ static void fsck_dir(int i, char *path
        while ((de = readdir(dir)) != NULL) {
                char name[100];
                unsigned char sha1[20];
 -              int len = strlen(de->d_name);
  
 -              switch (len) {
 -              case 2:
 -                      if (de->d_name[1] != '.')
 -                              break;
 -              case 1:
 -                      if (de->d_name[0] != '.')
 -                              break;
 +              if (is_dot_or_dotdot(de->d_name))
                        continue;
 -              case 38:
 +              if (strlen(de->d_name) == 38) {
                        sprintf(name, "%02x", i);
 -                      memcpy(name+2, de->d_name, len+1);
 +                      memcpy(name+2, de->d_name, 39);
                        if (get_sha1_hex(name, sha1) < 0)
                                break;
                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
@@@ -474,8 -475,6 +474,8 @@@ static int fsck_handle_ref(const char *
  
  static void get_default_heads(void)
  {
 +      if (head_points_at && !is_null_sha1(head_sha1))
 +              fsck_handle_ref("HEAD", head_sha1, 0, NULL);
        for_each_ref(fsck_handle_ref, NULL);
        if (include_reflogs)
                for_each_reflog(fsck_handle_reflog, NULL);
@@@ -515,13 -514,14 +515,13 @@@ static void fsck_object_dir(const char 
  
  static int fsck_head_link(void)
  {
 -      unsigned char sha1[20];
        int flag;
        int null_is_error = 0;
 -      const char *head_points_at = resolve_ref("HEAD", sha1, 0, &flag);
  
        if (verbose)
                fprintf(stderr, "Checking HEAD link\n");
  
 +      head_points_at = resolve_ref("HEAD", head_sha1, 0, &flag);
        if (!head_points_at)
                return error("Invalid HEAD");
        if (!strcmp(head_points_at, "HEAD"))
        else if (prefixcmp(head_points_at, "refs/heads/"))
                return error("HEAD points to something strange (%s)",
                             head_points_at);
 -      if (is_null_sha1(sha1)) {
 +      if (is_null_sha1(head_sha1)) {
                if (null_is_error)
                        return error("HEAD: detached HEAD points at nothing");
                fprintf(stderr, "notice: HEAD points to an unborn branch (%s)\n",
@@@ -586,7 -586,6 +586,7 @@@ static struct option fsck_opts[] = 
  int cmd_fsck(int argc, const char **argv, const char *prefix)
  {
        int i, heads;
 +      struct alternate_object_database *alt;
  
        errors_found = 0;
  
  
        fsck_head_link();
        fsck_object_dir(get_object_directory());
 +
 +      prepare_alt_odb();
 +      for (alt = alt_odb_list; alt; alt = alt->next) {
 +              char namebuf[PATH_MAX];
 +              int namelen = alt->name - alt->base;
 +              memcpy(namebuf, alt->base, namelen);
 +              namebuf[namelen - 1] = 0;
 +              fsck_object_dir(namebuf);
 +      }
 +
        if (check_full) {
 -              struct alternate_object_database *alt;
                struct packed_git *p;
 -              prepare_alt_odb();
 -              for (alt = alt_odb_list; alt; alt = alt->next) {
 -                      char namebuf[PATH_MAX];
 -                      int namelen = alt->name - alt->base;
 -                      memcpy(namebuf, alt->base, namelen);
 -                      namebuf[namelen - 1] = 0;
 -                      fsck_object_dir(namebuf);
 -              }
 +
                prepare_packed_git();
                for (p = packed_git; p; p = p->next)
                        /* verify gives error messages itself */
        }
  
        heads = 0;
 -      for (i = 1; i < argc; i++) {
 +      for (i = 0; i < argc; i++) {
                const char *arg = argv[i];
 -              if (!get_sha1(arg, head_sha1)) {
 -                      struct object *obj = lookup_object(head_sha1);
 +              unsigned char sha1[20];
 +              if (!get_sha1(arg, sha1)) {
 +                      struct object *obj = lookup_object(sha1);
  
                        /* Error is printed by lookup_object(). */
                        if (!obj)
diff --combined builtin-pack-objects.c
index 8ca46c8deb09a4c7d32947033eab804b108e0a34,ad3f8e7751a8de2697288966a39ce54abd7b3242..9fc3b355470466bd5663e1fca1fe759e18869ee2
@@@ -78,7 -78,7 +78,7 @@@ static int progress = 1
  static int window = 10;
  static uint32_t pack_size_limit, pack_size_limit_cfg;
  static int depth = 50;
 -static int delta_search_threads = 1;
 +static int delta_search_threads;
  static int pack_to_stdout;
  static int num_preferred_base;
  static struct progress *progress_state;
@@@ -195,16 -195,16 +195,16 @@@ static int check_pack_inflate(struct pa
        int st;
  
        memset(&stream, 0, sizeof(stream));
 -      inflateInit(&stream);
 +      git_inflate_init(&stream);
        do {
                in = use_pack(p, w_curs, offset, &stream.avail_in);
                stream.next_in = in;
                stream.next_out = fakebuf;
                stream.avail_out = sizeof(fakebuf);
 -              st = inflate(&stream, Z_FINISH);
 +              st = git_inflate(&stream, Z_FINISH);
                offset += stream.next_in - in;
        } while (st == Z_OK || st == Z_BUF_ERROR);
 -      inflateEnd(&stream);
 +      git_inflate_end(&stream);
        return (st == Z_STREAM_END &&
                stream.total_out == expect &&
                stream.total_in == len) ? 0 : -1;
@@@ -286,7 -286,6 +286,7 @@@ static unsigned long write_object(struc
                                 */
  
        if (!to_reuse) {
 +              no_reuse:
                if (!usable_delta) {
                        buf = read_sha1_file(entry->idx.sha1, &type, &size);
                        if (!buf)
                struct revindex_entry *revidx;
                off_t offset;
  
 -              if (entry->delta) {
 +              if (entry->delta)
                        type = (allow_ofs_delta && entry->delta->idx.offset) ?
                                OBJ_OFS_DELTA : OBJ_REF_DELTA;
 -                      reused_delta++;
 -              }
                hdrlen = encode_header(type, entry->size, header);
 +
                offset = entry->in_pack_offset;
                revidx = find_pack_revindex(p, offset);
                datalen = revidx[1].offset - offset;
                if (!pack_to_stdout && p->index_version > 1 &&
 -                  check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
 -                      die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
 +                  check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
 +                      error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
 +                      unuse_pack(&w_curs);
 +                      goto no_reuse;
 +              }
 +
                offset += entry->in_pack_header_size;
                datalen -= entry->in_pack_header_size;
 +              if (!pack_to_stdout && p->index_version == 1 &&
 +                  check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
 +                      error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
 +                      unuse_pack(&w_curs);
 +                      goto no_reuse;
 +              }
 +
                if (type == OBJ_OFS_DELTA) {
                        off_t ofs = entry->idx.offset - entry->delta->idx.offset;
                        unsigned pos = sizeof(dheader) - 1;
                        dheader[pos] = ofs & 127;
                        while (ofs >>= 7)
                                dheader[--pos] = 128 | (--ofs & 127);
 -                      if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit)
 +                      if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
 +                              unuse_pack(&w_curs);
                                return 0;
 +                      }
                        sha1write(f, header, hdrlen);
                        sha1write(f, dheader + pos, sizeof(dheader) - pos);
                        hdrlen += sizeof(dheader) - pos;
 +                      reused_delta++;
                } else if (type == OBJ_REF_DELTA) {
 -                      if (limit && hdrlen + 20 + datalen + 20 >= limit)
 +                      if (limit && hdrlen + 20 + datalen + 20 >= limit) {
 +                              unuse_pack(&w_curs);
                                return 0;
 +                      }
                        sha1write(f, header, hdrlen);
                        sha1write(f, entry->delta->idx.sha1, 20);
                        hdrlen += 20;
 +                      reused_delta++;
                } else {
 -                      if (limit && hdrlen + datalen + 20 >= limit)
 +                      if (limit && hdrlen + datalen + 20 >= limit) {
 +                              unuse_pack(&w_curs);
                                return 0;
 +                      }
                        sha1write(f, header, hdrlen);
                }
 -
 -              if (!pack_to_stdout && p->index_version == 1 &&
 -                  check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
 -                      die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
                copy_pack_data(f, p, &w_curs, offset, datalen);
                unuse_pack(&w_curs);
                reused++;
@@@ -488,8 -473,9 +488,8 @@@ static void write_pack_file(void
                } else {
                        char tmpname[PATH_MAX];
                        int fd;
 -                      snprintf(tmpname, sizeof(tmpname),
 -                               "%s/pack/tmp_pack_XXXXXX", get_object_directory());
 -                      fd = xmkstemp(tmpname);
 +                      fd = odb_mkstemp(tmpname, sizeof(tmpname),
 +                                       "pack/tmp_pack_XXXXXX");
                        pack_tmp_name = xstrdup(tmpname);
                        f = sha1fd(fd, pack_tmp_name);
                }
@@@ -1031,11 -1017,9 +1031,11 @@@ static void check_object(struct object_
                 * We want in_pack_type even if we do not reuse delta
                 * since non-delta representations could still be reused.
                 */
 -              used = unpack_object_header_gently(buf, avail,
 +              used = unpack_object_header_buffer(buf, avail,
                                                   &entry->in_pack_type,
                                                   &entry->size);
 +              if (used == 0)
 +                      goto give_up;
  
                /*
                 * Determine if this is a delta and if so whether we can
                        /* Not a delta hence we've already got all we need. */
                        entry->type = entry->in_pack_type;
                        entry->in_pack_header_size = used;
 +                      if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)
 +                              goto give_up;
                        unuse_pack(&w_curs);
                        return;
                case OBJ_REF_DELTA:
                        ofs = c & 127;
                        while (c & 128) {
                                ofs += 1;
 -                              if (!ofs || MSB(ofs, 7))
 -                                      die("delta base offset overflow in pack for %s",
 -                                          sha1_to_hex(entry->idx.sha1));
 +                              if (!ofs || MSB(ofs, 7)) {
 +                                      error("delta base offset overflow in pack for %s",
 +                                            sha1_to_hex(entry->idx.sha1));
 +                                      goto give_up;
 +                              }
                                c = buf[used_0++];
                                ofs = (ofs << 7) + (c & 127);
                        }
 -                      if (ofs >= entry->in_pack_offset)
 -                              die("delta base offset out of bound for %s",
 -                                  sha1_to_hex(entry->idx.sha1));
                        ofs = entry->in_pack_offset - ofs;
 +                      if (ofs <= 0 || ofs >= entry->in_pack_offset) {
 +                              error("delta base offset out of bound for %s",
 +                                    sha1_to_hex(entry->idx.sha1));
 +                              goto give_up;
 +                      }
                        if (reuse_delta && !entry->preferred_base) {
                                struct revindex_entry *revidx;
                                revidx = find_pack_revindex(p, ofs);
 +                              if (!revidx)
 +                                      goto give_up;
                                base_ref = nth_packed_object_sha1(p, revidx->nr);
                        }
                        entry->in_pack_header_size = used + used_0;
                         */
                        entry->type = entry->in_pack_type;
                        entry->delta = base_entry;
 +                      entry->delta_size = entry->size;
                        entry->delta_sibling = base_entry->delta_child;
                        base_entry->delta_child = entry;
                        unuse_pack(&w_curs);
                         */
                        entry->size = get_size_from_delta(p, &w_curs,
                                        entry->in_pack_offset + entry->in_pack_header_size);
 +                      if (entry->size == 0)
 +                              goto give_up;
                        unuse_pack(&w_curs);
                        return;
                }
                 * with sha1_object_info() to find about the object type
                 * at this point...
                 */
 +              give_up:
                unuse_pack(&w_curs);
        }
  
@@@ -1293,7 -1265,7 +1293,7 @@@ static int try_delta(struct unpacked *t
                max_size = trg_entry->delta_size;
                ref_depth = trg->depth;
        }
 -      max_size = max_size * (max_depth - src->depth) /
 +      max_size = (uint64_t)max_size * (max_depth - src->depth) /
                                                (max_depth - ref_depth + 1);
        if (max_size == 0)
                return 0;
@@@ -1412,10 -1384,12 +1412,10 @@@ static void find_deltas(struct object_e
                        int window, int depth, unsigned *processed)
  {
        uint32_t i, idx = 0, count = 0;
 -      unsigned int array_size = window * sizeof(struct unpacked);
        struct unpacked *array;
        unsigned long mem_usage = 0;
  
 -      array = xmalloc(array_size);
 -      memset(array, 0, array_size);
 +      array = xcalloc(window, sizeof(struct unpacked));
  
        for (;;) {
                struct object_entry *entry;
@@@ -1611,18 -1585,11 +1611,18 @@@ static void ll_find_deltas(struct objec
                find_deltas(list, &list_size, window, depth, processed);
                return;
        }
 +      if (progress > pack_to_stdout)
 +              fprintf(stderr, "Delta compression using %d threads.\n",
 +                              delta_search_threads);
  
        /* Partition the work amongst work threads. */
        for (i = 0; i < delta_search_threads; i++) {
                unsigned sub_size = list_size / (delta_search_threads - i);
  
 +              /* don't use too small segments or no deltas will be found */
 +              if (sub_size < 2*window && i+1 < delta_search_threads)
 +                      sub_size = 0;
 +
                p[i].window = window;
                p[i].depth = depth;
                p[i].processed = processed;
@@@ -1748,16 -1715,6 +1748,16 @@@ static void prepare_pack(int window, in
  
        get_object_details();
  
 +      /*
 +       * If we're locally repacking then we need to be doubly careful
 +       * from now on in order to make sure no stealth corruption gets
 +       * propagated to the new pack.  Clients receiving streamed packs
 +       * should validate everything they get anyway so no need to incur
 +       * the additional cost here in that case.
 +       */
 +      if (!pack_to_stdout)
 +              do_check_packed_object_crc = 1;
 +
        if (!nr_objects || !window || !depth)
                return;
  
                        if (entry->type < 0)
                                die("unable to get type of object %s",
                                    sha1_to_hex(entry->idx.sha1));
 +              } else {
 +                      if (entry->type < 0) {
 +                              /*
 +                               * This object is not found, but we
 +                               * don't have to include it anyway.
 +                               */
 +                              continue;
 +                      }
                }
  
                delta_list[n++] = entry;
@@@ -1966,11 -1915,7 +1966,7 @@@ static void add_objects_in_unpacked_pac
                const unsigned char *sha1;
                struct object *o;
  
-               for (i = 0; i < revs->num_ignore_packed; i++) {
-                       if (matches_pack_name(p, revs->ignore_packed[i]))
-                               break;
-               }
-               if (revs->num_ignore_packed <= i)
+               if (!p->pack_local || p->pack_keep)
                        continue;
                if (open_pack_index(p))
                        die("cannot open pack index");
        free(in_pack.array);
  }
  
+ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
+ {
+       static struct packed_git *last_found = (void *)1;
+       struct packed_git *p;
+       p = (last_found != (void *)1) ? last_found : packed_git;
+       while (p) {
+               if ((!p->pack_local || p->pack_keep) &&
+                       find_pack_entry_one(sha1, p)) {
+                       last_found = p;
+                       return 1;
+               }
+               if (p == last_found)
+                       p = packed_git;
+               else
+                       p = p->next;
+               if (p == last_found)
+                       p = p->next;
+       }
+       return 0;
+ }
  static void loosen_unused_packed_objects(struct rev_info *revs)
  {
        struct packed_git *p;
        const unsigned char *sha1;
  
        for (p = packed_git; p; p = p->next) {
-               for (i = 0; i < revs->num_ignore_packed; i++) {
-                       if (matches_pack_name(p, revs->ignore_packed[i]))
-                               break;
-               }
-               if (revs->num_ignore_packed <= i)
+               if (!p->pack_local || p->pack_keep)
                        continue;
  
                if (open_pack_index(p))
  
                for (i = 0; i < p->num_objects; i++) {
                        sha1 = nth_packed_object_sha1(p, i);
-                       if (!locate_object_entry(sha1))
+                       if (!locate_object_entry(sha1) &&
+                               !has_sha1_pack_kept_or_nonlocal(sha1))
                                if (force_object_loose(sha1, p->mtime))
                                        die("unable to force loose object");
                }
@@@ -2208,7 -2173,6 +2224,6 @@@ int cmd_pack_objects(int argc, const ch
                        continue;
                }
                if (!strcmp("--unpacked", arg) ||
-                   !prefixcmp(arg, "--unpacked=") ||
                    !strcmp("--reflog", arg) ||
                    !strcmp("--all", arg)) {
                        use_internal_rev_list = 1;
diff --combined cache.h
index 189151de25ffd1a6671b7a70f359fa9b94b82173,23c16d0d99777cc9a6f23c6b23fb16d837b1742c..50179687b5fc9ed37933d06bb4cbfe988ce8b6c9
+++ b/cache.h
@@@ -6,22 -6,12 +6,22 @@@
  #include "hash.h"
  
  #include SHA1_HEADER
 -#include <zlib.h>
 +#ifndef git_SHA_CTX
 +#define git_SHA_CTX   SHA_CTX
 +#define git_SHA1_Init SHA1_Init
 +#define git_SHA1_Update       SHA1_Update
 +#define git_SHA1_Final        SHA1_Final
 +#endif
  
 +#include <zlib.h>
  #if defined(NO_DEFLATE_BOUND) || ZLIB_VERNUM < 0x1200
  #define deflateBound(c,s)  ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
  #endif
  
 +void git_inflate_init(z_streamp strm);
 +void git_inflate_end(z_streamp strm);
 +int git_inflate(z_streamp strm, int flush);
 +
  #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
  #define DTYPE(de)     ((de)->d_type)
  #else
@@@ -119,26 -109,6 +119,26 @@@ struct ondisk_cache_entry 
        char name[FLEX_ARRAY]; /* more */
  };
  
 +/*
 + * This struct is used when CE_EXTENDED bit is 1
 + * The struct must match ondisk_cache_entry exactly from
 + * ctime till flags
 + */
 +struct ondisk_cache_entry_extended {
 +      struct cache_time ctime;
 +      struct cache_time mtime;
 +      unsigned int dev;
 +      unsigned int ino;
 +      unsigned int mode;
 +      unsigned int uid;
 +      unsigned int gid;
 +      unsigned int size;
 +      unsigned char sha1[20];
 +      unsigned short flags;
 +      unsigned short flags2;
 +      char name[FLEX_ARRAY]; /* more */
 +};
 +
  struct cache_entry {
        unsigned int ce_ctime;
        unsigned int ce_mtime;
  
  #define CE_NAMEMASK  (0x0fff)
  #define CE_STAGEMASK (0x3000)
 +#define CE_EXTENDED  (0x4000)
  #define CE_VALID     (0x8000)
  #define CE_STAGESHIFT 12
  
 -/* In-memory only */
 +/*
 + * Range 0xFFFF0000 in ce_flags is divided into
 + * two parts: in-memory flags and on-disk ones.
 + * Flags in CE_EXTENDED_FLAGS will get saved on-disk
 + * if you want to save a new flag, add it in
 + * CE_EXTENDED_FLAGS
 + *
 + * In-memory only flags
 + */
  #define CE_UPDATE    (0x10000)
  #define CE_REMOVE    (0x20000)
  #define CE_UPTODATE  (0x40000)
  #define CE_HASHED    (0x100000)
  #define CE_UNHASHED  (0x200000)
  
 +/*
 + * Extended on-disk flags
 + */
 +#define CE_INTENT_TO_ADD 0x20000000
 +/* CE_EXTENDED2 is for future extension */
 +#define CE_EXTENDED2 0x80000000
 +
 +#define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD)
 +
 +/*
 + * Safeguard to avoid saving wrong flags:
 + *  - CE_EXTENDED2 won't get saved until its semantic is known
 + *  - Bits in 0x0000FFFF have been saved in ce_flags already
 + *  - Bits in 0x003F0000 are currently in-memory flags
 + */
 +#if CE_EXTENDED_FLAGS & 0x803FFFFF
 +#error "CE_EXTENDED_FLAGS out of range"
 +#endif
 +
  /*
   * Copy the sha1 and stat state of a cache entry from one to
   * another. But we never change the name, or the hash state!
@@@ -228,9 -170,7 +228,9 @@@ static inline size_t ce_namelen(const s
  }
  
  #define ce_size(ce) cache_entry_size(ce_namelen(ce))
 -#define ondisk_ce_size(ce) ondisk_cache_entry_size(ce_namelen(ce))
 +#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
 +                          ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
 +                          ondisk_cache_entry_size(ce_namelen(ce)))
  #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
  #define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
  #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
@@@ -273,10 -213,8 +273,10 @@@ static inline int ce_to_dtype(const str
        (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \
        S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK)
  
 -#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7)
 -#define ondisk_cache_entry_size(len) ((offsetof(struct ondisk_cache_entry,name) + (len) + 8) & ~7)
 +#define flexible_size(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
 +#define cache_entry_size(len) flexible_size(cache_entry,len)
 +#define ondisk_cache_entry_size(len) flexible_size(ondisk_cache_entry,len)
 +#define ondisk_cache_entry_extended_size(len) flexible_size(ondisk_cache_entry_extended,len)
  
  struct index_state {
        struct cache_entry **cache;
@@@ -317,7 -255,6 +317,7 @@@ static inline void remove_name_hash(str
  
  #define read_cache() read_index(&the_index)
  #define read_cache_from(path) read_index_from(&the_index, (path))
 +#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec))
  #define is_cache_unborn() is_index_unborn(&the_index)
  #define read_cache_unmerged() read_index_unmerged(&the_index)
  #define write_cache(newfd, cache, entries) write_index(&the_index, (newfd))
@@@ -377,7 -314,6 +377,7 @@@ extern int is_bare_repository(void)
  extern int is_inside_git_dir(void);
  extern char *git_work_tree_cfg;
  extern int is_inside_work_tree(void);
 +extern int have_git_dir(void);
  extern const char *get_git_dir(void);
  extern char *get_object_directory(void);
  extern char *get_index_file(void);
@@@ -424,7 -360,6 +424,7 @@@ extern int init_db(const char *template
  
  /* Initialize and use the cache information */
  extern int read_index(struct index_state *);
 +extern int read_index_preload(struct index_state *, const char **pathspec);
  extern int read_index_from(struct index_state *, const char *path);
  extern int is_index_unborn(struct index_state *);
  extern int read_index_unmerged(struct index_state *);
@@@ -438,7 -373,6 +438,7 @@@ extern int index_name_pos(const struct 
  #define ADD_CACHE_OK_TO_REPLACE 2     /* Ok to replace file/directory */
  #define ADD_CACHE_SKIP_DFCHECK 4      /* Ok to skip DF conflict checks */
  #define ADD_CACHE_JUST_APPEND 8               /* Append only; tree.c::read_tree() */
 +#define ADD_CACHE_NEW_ONLY 16         /* Do not replace existing ones */
  extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
  extern struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
  extern void rename_index_entry_at(struct index_state *, int pos, const char *new_name);
@@@ -447,8 -381,6 +447,8 @@@ extern int remove_file_from_index(struc
  #define ADD_CACHE_VERBOSE 1
  #define ADD_CACHE_PRETEND 2
  #define ADD_CACHE_IGNORE_ERRORS       4
 +#define ADD_CACHE_IGNORE_REMOVAL 8
 +#define ADD_CACHE_INTENT 16
  extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags);
  extern int add_file_to_index(struct index_state *, const char *path, int flags);
  extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, int refresh);
@@@ -464,6 -396,7 +464,6 @@@ extern int ie_modified(const struct ind
  
  extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
  extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path);
 -extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object);
  extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
  extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
  
@@@ -484,7 -417,6 +484,7 @@@ struct lock_file 
  };
  #define LOCK_DIE_ON_ERROR 1
  #define LOCK_NODEREF 2
 +extern NORETURN void unable_to_lock_index_die(const char *path, int err);
  extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
  extern int hold_lock_file_for_append(struct lock_file *, const char *path, int);
  extern int commit_lock_file(struct lock_file *);
@@@ -516,7 -448,6 +516,7 @@@ extern size_t packed_git_limit
  extern size_t delta_base_cache_limit;
  extern int auto_crlf;
  extern int fsync_object_files;
 +extern int core_preload_index;
  
  enum safe_crlf {
        SAFE_CRLF_FALSE = 0,
  extern enum safe_crlf safe_crlf;
  
  enum branch_track {
 +      BRANCH_TRACK_UNSPECIFIED = -1,
        BRANCH_TRACK_NEVER = 0,
        BRANCH_TRACK_REMOTE,
        BRANCH_TRACK_ALWAYS,
@@@ -587,13 -517,6 +587,13 @@@ static inline void hashclr(unsigned cha
  {
        memset(hash, 0, 20);
  }
 +extern int is_empty_blob_sha1(const unsigned char *sha1);
 +
 +#define EMPTY_TREE_SHA1_HEX \
 +      "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
 +#define EMPTY_TREE_SHA1_BIN \
 +       "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
 +       "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
  
  int git_mkstemp(char *path, size_t n, const char *template);
  
@@@ -621,13 -544,11 +621,13 @@@ static inline int is_absolute_path(cons
  {
        return path[0] == '/' || has_dos_drive_prefix(path);
  }
 +int is_directory(const char *);
  const char *make_absolute_path(const char *path);
  const char *make_nonrelative_path(const char *path);
  const char *make_relative_path(const char *abs, const char *base);
 -int normalize_absolute_path(char *buf, const char *path);
 +int normalize_path_copy(char *dst, const char *src);
  int longest_ancestor_length(const char *path, const char *prefix_list);
 +char *strip_path_suffix(const char *path, const char *suffix);
  
  /* Read and unpack a sha1 file into memory, write memory to a sha1 file */
  extern int sha1_object_info(const unsigned char *, unsigned long *);
@@@ -637,14 -558,14 +637,14 @@@ extern int write_sha1_file(void *buf, u
  extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
  extern int force_object_loose(const unsigned char *sha1, time_t mtime);
  
 -/* just like read_sha1_file(), but non fatal in presence of bad objects */
 -extern void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size);
 +/* global flag to enable extra checks when accessing packed objects */
 +extern int do_check_packed_object_crc;
  
  extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
  
  extern int move_temp_to_file(const char *tmpfile, const char *filename);
  
- extern int has_sha1_pack(const unsigned char *sha1, const char **ignore);
+ extern int has_sha1_pack(const unsigned char *sha1);
  extern int has_sha1_file(const unsigned char *sha1);
  extern int has_loose_object_nonlocal(const unsigned char *sha1);
  
@@@ -669,7 -590,6 +669,7 @@@ extern int read_ref(const char *filenam
  extern const char *resolve_ref(const char *path, unsigned char *sha1, int, int *);
  extern int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref);
  extern int dwim_log(const char *str, int len, unsigned char *sha1, char **ref);
 +extern int interpret_nth_last_branch(const char *str, struct strbuf *);
  
  extern int refname_match(const char *abbrev_name, const char *full_name, const char **rules);
  extern const char *ref_rev_parse_rules[];
@@@ -696,8 -616,7 +696,8 @@@ enum date_mode 
        DATE_SHORT,
        DATE_LOCAL,
        DATE_ISO8601,
 -      DATE_RFC2822
 +      DATE_RFC2822,
 +      DATE_RAW
  };
  
  const char *show_date(unsigned long time, int timezone, enum date_mode mode);
@@@ -725,10 -644,6 +725,10 @@@ struct checkout 
  
  extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
  extern int has_symlink_leading_path(int len, const char *name);
 +extern int has_symlink_or_noent_leading_path(int len, const char *name);
 +extern int has_dirs_only_path(int len, const char *name, int prefix_len);
 +extern void invalidate_lstat_cache(int len, const char *name);
 +extern void clear_lstat_cache(void);
  
  extern struct alternate_object_database {
        struct alternate_object_database *next;
  } *alt_odb_list;
  extern void prepare_alt_odb(void);
  extern void add_to_alternates_file(const char *reference);
 +typedef int alt_odb_fn(struct alternate_object_database *, void *);
 +extern void foreach_alt_odb(alt_odb_fn, void*);
  
  struct pack_window {
        struct pack_window *next;
@@@ -808,11 -721,7 +808,11 @@@ extern struct child_process *git_connec
  extern int finish_connect(struct child_process *conn);
  extern int path_match(const char *path, int nr, char **match);
  extern int get_ack(int fd, unsigned char *result_sha1);
 -extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags);
 +struct extra_have_objects {
 +      int nr, alloc;
 +      unsigned char (*array)[20];
 +};
 +extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags, struct extra_have_objects *);
  extern int server_supports(const char *feature);
  
  extern struct packed_git *parse_pack_index(unsigned char *sha1);
@@@ -830,16 -739,14 +830,15 @@@ extern unsigned char* use_pack(struct p
  extern void close_pack_windows(struct packed_git *);
  extern void unuse_pack(struct pack_window **);
  extern void free_pack_by_name(const char *);
 +extern void clear_delta_base_cache(void);
  extern struct packed_git *add_packed_git(const char *, int, int);
  extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
  extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
  extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
  extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
 -extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 +extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
  extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
  extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
- extern int matches_pack_name(struct packed_git *p, const char *name);
  
  /* Dumb servers support */
  extern int update_server_info(int);
@@@ -848,6 -755,7 +847,6 @@@ typedef int (*config_fn_t)(const char *
  extern int git_default_config(const char *, const char *, void *);
  extern int git_config_from_file(config_fn_t fn, const char *, void *);
  extern int git_config(config_fn_t fn, void *);
 -extern int git_parse_long(const char *, long *);
  extern int git_parse_ulong(const char *, unsigned long *);
  extern int git_config_int(const char *, const char *);
  extern unsigned long git_config_ulong(const char *, const char *);
@@@ -871,7 -779,6 +870,7 @@@ extern int user_ident_explicitly_given
  
  extern const char *git_commit_encoding;
  extern const char *git_log_output_encoding;
 +extern const char *git_mailmap_file;
  
  /* IO helper functions */
  extern void maybe_flush_or_die(FILE *, const char *);
@@@ -947,6 -854,7 +946,6 @@@ extern int ws_fix_copy(char *, const ch
  extern int ws_blank_line(const char *line, int len, unsigned ws_rule);
  
  /* ls-files */
 -int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen);
  int report_path_error(const char *ps_matched, const char **pathspec, int prefix_offset);
  void overlay_tree_on_cache(const char *tree_name, const char *prefix);
  
diff --combined diff.c
index 11798af0c190f369e8ac9cb0463f794bc3bb5d10,a34d26c23f2f8e7bc3acb27a4596931fa0a7a7f9..37f99209a0580a1ac304d0c1a8a55c12da64c9f2
--- 1/diff.c
--- 2/diff.c
+++ b/diff.c
@@@ -11,8 -11,6 +11,8 @@@
  #include "attr.h"
  #include "run-command.h"
  #include "utf8.h"
 +#include "userdiff.h"
 +#include "sigchain.h"
  
  #ifdef NO_FAST_WORKING_DIRECTORY
  #define FAST_WORKING_DIRECTORY 0
  
  static int diff_detect_rename_default;
  static int diff_rename_limit_default = 200;
 +static int diff_suppress_blank_empty;
  int diff_use_color_default = -1;
 +static const char *diff_word_regex_cfg;
  static const char *external_diff_cmd_cfg;
  int diff_auto_refresh_index = 1;
 +static int diff_mnemonic_prefix;
  
  static char diff_colors[][COLOR_MAXLEN] = {
        "\033[m",       /* reset */
@@@ -40,9 -35,6 +40,9 @@@
        "\033[41m",     /* WHITESPACE (red background) */
  };
  
 +static void diff_filespec_load_driver(struct diff_filespec *one);
 +static char *run_textconv(const char *, struct diff_filespec *, size_t *);
 +
  static int parse_diff_color_slot(const char *var, int ofs)
  {
        if (!strcasecmp(var+ofs, "plain"))
        die("bad config variable '%s'", var);
  }
  
 -static struct ll_diff_driver {
 -      const char *name;
 -      struct ll_diff_driver *next;
 -      const char *cmd;
 -} *user_diff, **user_diff_tail;
 -
 -/*
 - * Currently there is only "diff.<drivername>.command" variable;
 - * because there are "diff.color.<slot>" variables, we are parsing
 - * this in a bit convoluted way to allow low level diff driver
 - * called "color".
 - */
 -static int parse_lldiff_command(const char *var, const char *ep, const char *value)
 -{
 -      const char *name;
 -      int namelen;
 -      struct ll_diff_driver *drv;
 -
 -      name = var + 5;
 -      namelen = ep - name;
 -      for (drv = user_diff; drv; drv = drv->next)
 -              if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
 -                      break;
 -      if (!drv) {
 -              drv = xcalloc(1, sizeof(struct ll_diff_driver));
 -              drv->name = xmemdupz(name, namelen);
 -              if (!user_diff_tail)
 -                      user_diff_tail = &user_diff;
 -              *user_diff_tail = drv;
 -              user_diff_tail = &(drv->next);
 -      }
 -
 -      return git_config_string(&(drv->cmd), var, value);
 -}
 -
 -/*
 - * 'diff.<what>.funcname' attribute can be specified in the configuration
 - * to define a customized regexp to find the beginning of a function to
 - * be used for hunk header lines of "diff -p" style output.
 - */
 -struct funcname_pattern_entry {
 -      char *name;
 -      char *pattern;
 -      int cflags;
 -};
 -static struct funcname_pattern_list {
 -      struct funcname_pattern_list *next;
 -      struct funcname_pattern_entry e;
 -} *funcname_pattern_list;
 -
 -static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags)
 -{
 -      const char *name;
 -      int namelen;
 -      struct funcname_pattern_list *pp;
 -
 -      name = var + 5; /* "diff." */
 -      namelen = ep - name;
 -
 -      for (pp = funcname_pattern_list; pp; pp = pp->next)
 -              if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen])
 -                      break;
 -      if (!pp) {
 -              pp = xcalloc(1, sizeof(*pp));
 -              pp->e.name = xmemdupz(name, namelen);
 -              pp->next = funcname_pattern_list;
 -              funcname_pattern_list = pp;
 -      }
 -      free(pp->e.pattern);
 -      pp->e.pattern = xstrdup(value);
 -      pp->e.cflags = cflags;
 -      return 0;
 -}
 -
  /*
   * These are to give UI layer defaults.
   * The core-level commands such as git-diff-files should
@@@ -88,14 -154,14 +88,14 @@@ int git_diff_ui_config(const char *var
                diff_auto_refresh_index = git_config_bool(var, value);
                return 0;
        }
 +      if (!strcmp(var, "diff.mnemonicprefix")) {
 +              diff_mnemonic_prefix = git_config_bool(var, value);
 +              return 0;
 +      }
        if (!strcmp(var, "diff.external"))
                return git_config_string(&external_diff_cmd_cfg, var, value);
 -      if (!prefixcmp(var, "diff.")) {
 -              const char *ep = strrchr(var, '.');
 -
 -              if (ep != var + 4 && !strcmp(ep, ".command"))
 -                      return parse_lldiff_command(var, ep, value);
 -      }
 +      if (!strcmp(var, "diff.wordregex"))
 +              return git_config_string(&diff_word_regex_cfg, var, value);
  
        return git_diff_basic_config(var, value, cb);
  }
@@@ -107,12 -173,6 +107,12 @@@ int git_diff_basic_config(const char *v
                return 0;
        }
  
 +      switch (userdiff_config(var, value)) {
 +              case 0: break;
 +              case -1: return -1;
 +              default: return 0;
 +      }
 +
        if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
                int slot = parse_diff_color_slot(var, 11);
                if (!value)
                return 0;
        }
  
 -      if (!prefixcmp(var, "diff.")) {
 -              const char *ep = strrchr(var, '.');
 -              if (ep != var + 4) {
 -                      if (!strcmp(ep, ".funcname")) {
 -                              if (!value)
 -                                      return config_error_nonbool(var);
 -                              return parse_funcname_pattern(var, ep, value,
 -                                      0);
 -                      } else if (!strcmp(ep, ".xfuncname")) {
 -                              if (!value)
 -                                      return config_error_nonbool(var);
 -                              return parse_funcname_pattern(var, ep, value,
 -                                      REG_EXTENDED);
 -                      }
 -              }
 +      /* like GNU diff's --suppress-blank-empty option  */
 +      if (!strcmp(var, "diff.suppressblankempty") ||
 +                      /* for backwards compatibility */
 +                      !strcmp(var, "diff.suppress-blank-empty")) {
 +              diff_suppress_blank_empty = git_config_bool(var, value);
 +              return 0;
        }
  
        return git_color_default_config(var, value, cb);
@@@ -136,8 -205,9 +136,8 @@@ static char *quote_two(const char *one
  {
        int need_one = quote_c_style(one, NULL, NULL, 1);
        int need_two = quote_c_style(two, NULL, NULL, 1);
 -      struct strbuf res;
 +      struct strbuf res = STRBUF_INIT;
  
 -      strbuf_init(&res, 0);
        if (need_one + need_two) {
                strbuf_addch(&res, '"');
                quote_c_style(one, &res, NULL, 1);
@@@ -171,33 -241,6 +171,33 @@@ static struct diff_tempfile 
        char tmp_path[PATH_MAX];
  } diff_temp[2];
  
 +static struct diff_tempfile *claim_diff_tempfile(void) {
 +      int i;
 +      for (i = 0; i < ARRAY_SIZE(diff_temp); i++)
 +              if (!diff_temp[i].name)
 +                      return diff_temp + i;
 +      die("BUG: diff is failing to clean up its tempfiles");
 +}
 +
 +static int remove_tempfile_installed;
 +
 +static void remove_tempfile(void)
 +{
 +      int i;
 +      for (i = 0; i < ARRAY_SIZE(diff_temp); i++) {
 +              if (diff_temp[i].name == diff_temp[i].tmp_path)
 +                      unlink(diff_temp[i].name);
 +              diff_temp[i].name = NULL;
 +      }
 +}
 +
 +static void remove_tempfile_on_signal(int signo)
 +{
 +      remove_tempfile();
 +      sigchain_pop(signo);
 +      raise(signo);
 +}
 +
  static int count_lines(const char *data, int size)
  {
        int count, ch, completely_empty = 1, nl_just_seen = 0;
@@@ -262,8 -305,6 +262,8 @@@ static void emit_rewrite_diff(const cha
                              const char *name_b,
                              struct diff_filespec *one,
                              struct diff_filespec *two,
 +                            const char *textconv_one,
 +                            const char *textconv_two,
                              struct diff_options *o)
  {
        int lc_a, lc_b;
        const char *new = diff_get_color(color_diff, DIFF_FILE_NEW);
        const char *reset = diff_get_color(color_diff, DIFF_RESET);
        static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
 +      const char *a_prefix, *b_prefix;
 +      const char *data_one, *data_two;
 +      size_t size_one, size_two;
 +
 +      if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
 +              a_prefix = o->b_prefix;
 +              b_prefix = o->a_prefix;
 +      } else {
 +              a_prefix = o->a_prefix;
 +              b_prefix = o->b_prefix;
 +      }
  
        name_a += (*name_a == '/');
        name_b += (*name_b == '/');
  
        strbuf_reset(&a_name);
        strbuf_reset(&b_name);
 -      quote_two_c_style(&a_name, o->a_prefix, name_a, 0);
 -      quote_two_c_style(&b_name, o->b_prefix, name_b, 0);
 +      quote_two_c_style(&a_name, a_prefix, name_a, 0);
 +      quote_two_c_style(&b_name, b_prefix, name_b, 0);
  
        diff_populate_filespec(one, 0);
        diff_populate_filespec(two, 0);
 -      lc_a = count_lines(one->data, one->size);
 -      lc_b = count_lines(two->data, two->size);
 +      if (textconv_one) {
 +              data_one = run_textconv(textconv_one, one, &size_one);
 +              if (!data_one)
 +                      die("unable to read files to diff");
 +      }
 +      else {
 +              data_one = one->data;
 +              size_one = one->size;
 +      }
 +      if (textconv_two) {
 +              data_two = run_textconv(textconv_two, two, &size_two);
 +              if (!data_two)
 +                      die("unable to read files to diff");
 +      }
 +      else {
 +              data_two = two->data;
 +              size_two = two->size;
 +      }
 +
 +      lc_a = count_lines(data_one, size_one);
 +      lc_b = count_lines(data_two, size_two);
        fprintf(o->file,
                "%s--- %s%s%s\n%s+++ %s%s%s\n%s@@ -",
                metainfo, a_name.buf, name_a_tab, reset,
        print_line_count(o->file, lc_b);
        fprintf(o->file, " @@%s\n", reset);
        if (lc_a)
 -              copy_file_with_prefix(o->file, '-', one->data, one->size, old, reset);
 +              copy_file_with_prefix(o->file, '-', data_one, size_one, old, reset);
        if (lc_b)
 -              copy_file_with_prefix(o->file, '+', two->data, two->size, new, reset);
 +              copy_file_with_prefix(o->file, '+', data_two, size_two, new, reset);
  }
  
  static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
        }
        else if (diff_populate_filespec(one, 0))
                return -1;
 +
        mf->ptr = one->data;
        mf->size = one->size;
        return 0;
  struct diff_words_buffer {
        mmfile_t text;
        long alloc;
 -      long current; /* output pointer */
 -      int suppressed_newline;
 +      struct diff_words_orig {
 +              const char *begin, *end;
 +      } *orig;
 +      int orig_nr, orig_alloc;
  };
  
  static void diff_words_append(char *line, unsigned long len,
                struct diff_words_buffer *buffer)
  {
 -      if (buffer->text.size + len > buffer->alloc) {
 -              buffer->alloc = (buffer->text.size + len) * 3 / 2;
 -              buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
 -      }
 +      ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc);
        line++;
        len--;
        memcpy(buffer->text.ptr + buffer->text.size, line, len);
        buffer->text.size += len;
 +      buffer->text.ptr[buffer->text.size] = '\0';
  }
  
  struct diff_words_data {
 -      struct xdiff_emit_state xm;
        struct diff_words_buffer minus, plus;
 +      const char *current_plus;
        FILE *file;
 +      regex_t *word_regex;
  };
  
 -static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
 -              int suppress_newline)
 +static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
  {
 -      const char *ptr;
 -      int eol = 0;
 +      struct diff_words_data *diff_words = priv;
 +      int minus_first, minus_len, plus_first, plus_len;
 +      const char *minus_begin, *minus_end, *plus_begin, *plus_end;
  
 -      if (len == 0)
 +      if (line[0] != '@' || parse_hunk_header(line, len,
 +                      &minus_first, &minus_len, &plus_first, &plus_len))
                return;
  
 -      ptr  = buffer->text.ptr + buffer->current;
 -      buffer->current += len;
 +      /* POSIX requires that first be decremented by one if len == 0... */
 +      if (minus_len) {
 +              minus_begin = diff_words->minus.orig[minus_first].begin;
 +              minus_end =
 +                      diff_words->minus.orig[minus_first + minus_len - 1].end;
 +      } else
 +              minus_begin = minus_end =
 +                      diff_words->minus.orig[minus_first].end;
  
 -      if (ptr[len - 1] == '\n') {
 -              eol = 1;
 -              len--;
 +      if (plus_len) {
 +              plus_begin = diff_words->plus.orig[plus_first].begin;
 +              plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end;
 +      } else
 +              plus_begin = plus_end = diff_words->plus.orig[plus_first].end;
 +
 +      if (diff_words->current_plus != plus_begin)
 +              fwrite(diff_words->current_plus,
 +                              plus_begin - diff_words->current_plus, 1,
 +                              diff_words->file);
 +      if (minus_begin != minus_end)
 +              color_fwrite_lines(diff_words->file,
 +                              diff_get_color(1, DIFF_FILE_OLD),
 +                              minus_end - minus_begin, minus_begin);
 +      if (plus_begin != plus_end)
 +              color_fwrite_lines(diff_words->file,
 +                              diff_get_color(1, DIFF_FILE_NEW),
 +                              plus_end - plus_begin, plus_begin);
 +
 +      diff_words->current_plus = plus_end;
 +}
 +
 +/* This function starts looking at *begin, and returns 0 iff a word was found. */
 +static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
 +              int *begin, int *end)
 +{
 +      if (word_regex && *begin < buffer->size) {
 +              regmatch_t match[1];
 +              if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
 +                      char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
 +                                      '\n', match[0].rm_eo - match[0].rm_so);
 +                      *end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
 +                      *begin += match[0].rm_so;
 +                      return *begin >= *end;
 +              }
 +              return -1;
        }
  
 -      fputs(diff_get_color(1, color), file);
 -      fwrite(ptr, len, 1, file);
 -      fputs(diff_get_color(1, DIFF_RESET), file);
 +      /* find the next word */
 +      while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
 +              (*begin)++;
 +      if (*begin >= buffer->size)
 +              return -1;
  
 -      if (eol) {
 -              if (suppress_newline)
 -                      buffer->suppressed_newline = 1;
 -              else
 -                      putc('\n', file);
 -      }
 +      /* find the end of the word */
 +      *end = *begin + 1;
 +      while (*end < buffer->size && !isspace(buffer->ptr[*end]))
 +              (*end)++;
 +
 +      return 0;
  }
  
 -static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 +/*
 + * This function splits the words in buffer->text, stores the list with
 + * newline separator into out, and saves the offsets of the original words
 + * in buffer->orig.
 + */
 +static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
 +              regex_t *word_regex)
  {
 -      struct diff_words_data *diff_words = priv;
 +      int i, j;
 +      long alloc = 0;
  
 -      if (diff_words->minus.suppressed_newline) {
 -              if (line[0] != '+')
 -                      putc('\n', diff_words->file);
 -              diff_words->minus.suppressed_newline = 0;
 -      }
 +      out->size = 0;
 +      out->ptr = NULL;
  
 -      len--;
 -      switch (line[0]) {
 -              case '-':
 -                      print_word(diff_words->file,
 -                                 &diff_words->minus, len, DIFF_FILE_OLD, 1);
 -                      break;
 -              case '+':
 -                      print_word(diff_words->file,
 -                                 &diff_words->plus, len, DIFF_FILE_NEW, 0);
 -                      break;
 -              case ' ':
 -                      print_word(diff_words->file,
 -                                 &diff_words->plus, len, DIFF_PLAIN, 0);
 -                      diff_words->minus.current += len;
 -                      break;
 +      /* fake an empty "0th" word */
 +      ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
 +      buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
 +      buffer->orig_nr = 1;
 +
 +      for (i = 0; i < buffer->text.size; i++) {
 +              if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
 +                      return;
 +
 +              /* store original boundaries */
 +              ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
 +                              buffer->orig_alloc);
 +              buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i;
 +              buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j;
 +              buffer->orig_nr++;
 +
 +              /* store one word */
 +              ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
 +              memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
 +              out->ptr[out->size + j - i] = '\n';
 +              out->size += j - i + 1;
 +
 +              i = j - 1;
        }
  }
  
@@@ -494,41 -449,46 +494,41 @@@ static void diff_words_show(struct diff
        xdemitconf_t xecfg;
        xdemitcb_t ecb;
        mmfile_t minus, plus;
 -      int i;
  
 -      memset(&xecfg, 0, sizeof(xecfg));
 -      minus.size = diff_words->minus.text.size;
 -      minus.ptr = xmalloc(minus.size);
 -      memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
 -      for (i = 0; i < minus.size; i++)
 -              if (isspace(minus.ptr[i]))
 -                      minus.ptr[i] = '\n';
 -      diff_words->minus.current = 0;
 -
 -      plus.size = diff_words->plus.text.size;
 -      plus.ptr = xmalloc(plus.size);
 -      memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
 -      for (i = 0; i < plus.size; i++)
 -              if (isspace(plus.ptr[i]))
 -                      plus.ptr[i] = '\n';
 -      diff_words->plus.current = 0;
 +      /* special case: only removal */
 +      if (!diff_words->plus.text.size) {
 +              color_fwrite_lines(diff_words->file,
 +                      diff_get_color(1, DIFF_FILE_OLD),
 +                      diff_words->minus.text.size, diff_words->minus.text.ptr);
 +              diff_words->minus.text.size = 0;
 +              return;
 +      }
  
 -      xpp.flags = XDF_NEED_MINIMAL;
 -      xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
 -      ecb.outf = xdiff_outf;
 -      ecb.priv = diff_words;
 -      diff_words->xm.consume = fn_out_diff_words_aux;
 -      xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb);
 +      diff_words->current_plus = diff_words->plus.text.ptr;
  
 +      memset(&xpp, 0, sizeof(xpp));
 +      memset(&xecfg, 0, sizeof(xecfg));
 +      diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
 +      diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
 +      xpp.flags = XDF_NEED_MINIMAL;
 +      /* as only the hunk header will be parsed, we need a 0-context */
 +      xecfg.ctxlen = 0;
 +      xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
 +                    &xpp, &xecfg, &ecb);
        free(minus.ptr);
        free(plus.ptr);
 +      if (diff_words->current_plus != diff_words->plus.text.ptr +
 +                      diff_words->plus.text.size)
 +              fwrite(diff_words->current_plus,
 +                      diff_words->plus.text.ptr + diff_words->plus.text.size
 +                      - diff_words->current_plus, 1,
 +                      diff_words->file);
        diff_words->minus.text.size = diff_words->plus.text.size = 0;
 -
 -      if (diff_words->minus.suppressed_newline) {
 -              putc('\n', diff_words->file);
 -              diff_words->minus.suppressed_newline = 0;
 -      }
  }
  
  typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
  
  struct emit_callback {
 -      struct xdiff_emit_state xm;
        int nparents, color_diff;
        unsigned ws_rule;
        sane_truncate_fn truncate;
@@@ -547,10 -507,7 +547,10 @@@ static void free_diff_words_data(struc
                        diff_words_show(ecbdata->diff_words);
  
                free (ecbdata->diff_words->minus.text.ptr);
 +              free (ecbdata->diff_words->minus.orig);
                free (ecbdata->diff_words->plus.text.ptr);
 +              free (ecbdata->diff_words->plus.orig);
 +              free(ecbdata->diff_words->word_regex);
                free(ecbdata->diff_words);
                ecbdata->diff_words = NULL;
        }
@@@ -641,12 -598,6 +641,12 @@@ static void fn_out_consume(void *priv, 
                ecbdata->label_path[0] = ecbdata->label_path[1] = NULL;
        }
  
 +      if (diff_suppress_blank_empty
 +          && len == 2 && line[0] == ' ' && line[1] == '\n') {
 +              line[0] = '\n';
 +              len = 1;
 +      }
 +
        /* This is not really necessary for now because
         * this codepath only deals with two-way diffs.
         */
@@@ -710,7 -661,7 +710,7 @@@ static char *pprint_rename(const char *
  {
        const char *old = a;
        const char *new = b;
 -      struct strbuf name;
 +      struct strbuf name = STRBUF_INIT;
        int pfx_length, sfx_length;
        int len_a = strlen(a);
        int len_b = strlen(b);
        int qlen_a = quote_c_style(a, NULL, NULL, 0);
        int qlen_b = quote_c_style(b, NULL, NULL, 0);
  
 -      strbuf_init(&name, 0);
        if (qlen_a || qlen_b) {
                quote_c_style(a, &name, NULL, 0);
                strbuf_addstr(&name, " => ");
  }
  
  struct diffstat_t {
 -      struct xdiff_emit_state xm;
 -
        int nr;
        int alloc;
        struct diffstat_file {
@@@ -860,7 -814,8 +860,7 @@@ static void fill_print_name(struct diff
                return;
  
        if (!file->is_renamed) {
 -              struct strbuf buf;
 -              strbuf_init(&buf, 0);
 +              struct strbuf buf = STRBUF_INIT;
                if (quote_c_style(file->name, &buf, NULL, 0)) {
                        pname = strbuf_detach(&buf, NULL);
                } else {
@@@ -1167,13 -1122,9 +1167,13 @@@ static void show_dirstat(struct diff_op
                /*
                 * Original minus copied is the removed material,
                 * added is the new material.  They are both damages
 -               * made to the preimage.
 +               * made to the preimage. In --dirstat-by-file mode, count
 +               * damaged files, not damaged lines. This is done by
 +               * counting only a single damaged line per file.
                 */
                damage = (p->one->size - copied) + added;
 +              if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0)
 +                      damage = 1;
  
                ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
                dir.files[dir.nr].name = name;
@@@ -1206,6 -1157,7 +1206,6 @@@ static void free_diffstat_info(struct d
  }
  
  struct checkdiff_t {
 -      struct xdiff_emit_state xm;
        const char *filename;
        int lineno;
        struct diff_options *o;
@@@ -1380,61 -1332,123 +1380,61 @@@ static void emit_binary_diff(FILE *file
        emit_binary_diff_body(file, two, one);
  }
  
 -static void setup_diff_attr_check(struct git_attr_check *check)
 +static void diff_filespec_load_driver(struct diff_filespec *one)
  {
 -      static struct git_attr *attr_diff;
 -
 -      if (!attr_diff) {
 -              attr_diff = git_attr("diff", 4);
 -      }
 -      check[0].attr = attr_diff;
 +      if (!one->driver)
 +              one->driver = userdiff_find_by_path(one->path);
 +      if (!one->driver)
 +              one->driver = userdiff_find_by_name("default");
  }
  
 -static void diff_filespec_check_attr(struct diff_filespec *one)
 +int diff_filespec_is_binary(struct diff_filespec *one)
  {
 -      struct git_attr_check attr_diff_check;
 -      int check_from_data = 0;
 -
 -      if (one->checked_attr)
 -              return;
 -
 -      setup_diff_attr_check(&attr_diff_check);
 -      one->is_binary = 0;
 -      one->funcname_pattern_ident = NULL;
 -
 -      if (!git_checkattr(one->path, 1, &attr_diff_check)) {
 -              const char *value;
 -
 -              /* binaryness */
 -              value = attr_diff_check.value;
 -              if (ATTR_TRUE(value))
 -                      ;
 -              else if (ATTR_FALSE(value))
 -                      one->is_binary = 1;
 -              else
 -                      check_from_data = 1;
 -
 -              /* funcname pattern ident */
 -              if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
 -                      ;
 -              else
 -                      one->funcname_pattern_ident = value;
 -      }
 -
 -      if (check_from_data) {
 -              if (!one->data && DIFF_FILE_VALID(one))
 -                      diff_populate_filespec(one, 0);
 -
 -              if (one->data)
 -                      one->is_binary = buffer_is_binary(one->data, one->size);
 +      if (one->is_binary == -1) {
 +              diff_filespec_load_driver(one);
 +              if (one->driver->binary != -1)
 +                      one->is_binary = one->driver->binary;
 +              else {
 +                      if (!one->data && DIFF_FILE_VALID(one))
 +                              diff_populate_filespec(one, 0);
 +                      if (one->data)
 +                              one->is_binary = buffer_is_binary(one->data,
 +                                              one->size);
 +                      if (one->is_binary == -1)
 +                              one->is_binary = 0;
 +              }
        }
 +      return one->is_binary;
  }
  
 -int diff_filespec_is_binary(struct diff_filespec *one)
 +static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one)
  {
 -      diff_filespec_check_attr(one);
 -      return one->is_binary;
 +      diff_filespec_load_driver(one);
 +      return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
  }
  
 -static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 -{
 -      struct funcname_pattern_list *pp;
 -
 -      for (pp = funcname_pattern_list; pp; pp = pp->next)
 -              if (!strcmp(ident, pp->e.name))
 -                      return &pp->e;
 -      return NULL;
 -}
 -
 -static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 -      { "java",
 -        "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
 -        "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
 -        REG_EXTENDED },
 -      { "pascal",
 -        "^((procedure|function|constructor|destructor|interface|"
 -              "implementation|initialization|finalization)[ \t]*.*)$"
 -        "|"
 -        "^(.*=[ \t]*(class|record).*)$",
 -        REG_EXTENDED },
 -      { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 -        REG_EXTENDED },
 -      { "tex",
 -        "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
 -        REG_EXTENDED },
 -      { "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
 -        REG_EXTENDED },
 -};
 -
 -static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
 +static const char *userdiff_word_regex(struct diff_filespec *one)
  {
 -      const char *ident;
 -      const struct funcname_pattern_entry *pe;
 -      int i;
 -
 -      diff_filespec_check_attr(one);
 -      ident = one->funcname_pattern_ident;
 -
 -      if (!ident)
 -              /*
 -               * If the config file has "funcname.default" defined, that
 -               * regexp is used; otherwise NULL is returned and xemit uses
 -               * the built-in default.
 -               */
 -              return funcname_pattern("default");
 -
 -      /* Look up custom "funcname.$ident" regexp from config. */
 -      pe = funcname_pattern(ident);
 -      if (pe)
 -              return pe;
 +      diff_filespec_load_driver(one);
 +      return one->driver->word_regex;
 +}
  
 -      /*
 -       * And define built-in fallback patterns here.  Note that
 -       * these can be overridden by the user's config settings.
 -       */
 -      for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++)
 -              if (!strcmp(ident, builtin_funcname_pattern[i].name))
 -                      return &builtin_funcname_pattern[i];
 +void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
 +{
 +      if (!options->a_prefix)
 +              options->a_prefix = a;
 +      if (!options->b_prefix)
 +              options->b_prefix = b;
 +}
  
 -      return NULL;
 +static const char *get_textconv(struct diff_filespec *one)
 +{
 +      if (!DIFF_FILE_VALID(one))
 +              return NULL;
 +      if (!S_ISREG(one->mode))
 +              return NULL;
 +      diff_filespec_load_driver(one);
 +      return one->driver->textconv;
  }
  
  static void builtin_diff(const char *name_a,
        char *a_one, *b_two;
        const char *set = diff_get_color_opt(o, DIFF_METAINFO);
        const char *reset = diff_get_color_opt(o, DIFF_RESET);
 +      const char *a_prefix, *b_prefix;
 +      const char *textconv_one = NULL, *textconv_two = NULL;
 +
 +      if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) {
 +              textconv_one = get_textconv(one);
 +              textconv_two = get_textconv(two);
 +      }
 +
 +      diff_set_mnemonic_prefix(o, "a/", "b/");
 +      if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
 +              a_prefix = o->b_prefix;
 +              b_prefix = o->a_prefix;
 +      } else {
 +              a_prefix = o->a_prefix;
 +              b_prefix = o->b_prefix;
 +      }
  
        /* Never use a non-valid filename anywhere if at all possible */
        name_a = DIFF_FILE_VALID(one) ? name_a : name_b;
        name_b = DIFF_FILE_VALID(two) ? name_b : name_a;
  
 -      a_one = quote_two(o->a_prefix, name_a + (*name_a == '/'));
 -      b_two = quote_two(o->b_prefix, name_b + (*name_b == '/'));
 +      a_one = quote_two(a_prefix, name_a + (*name_a == '/'));
 +      b_two = quote_two(b_prefix, name_b + (*name_b == '/'));
        lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
        lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
        fprintf(o->file, "%sdiff --git %s %s%s\n", set, a_one, b_two, reset);
                 */
                if ((one->mode ^ two->mode) & S_IFMT)
                        goto free_ab_and_return;
 -              if (complete_rewrite) {
 -                      emit_rewrite_diff(name_a, name_b, one, two, o);
 +              if (complete_rewrite &&
 +                  (textconv_one || !diff_filespec_is_binary(one)) &&
 +                  (textconv_two || !diff_filespec_is_binary(two))) {
 +                      emit_rewrite_diff(name_a, name_b, one, two,
 +                                              textconv_one, textconv_two, o);
                        o->found_changes = 1;
                        goto free_ab_and_return;
                }
                die("unable to read files to diff");
  
        if (!DIFF_OPT_TST(o, TEXT) &&
 -          (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
 +          ( (diff_filespec_is_binary(one) && !textconv_one) ||
 +            (diff_filespec_is_binary(two) && !textconv_two) )) {
                /* Quite common confusing case */
                if (mf1.size == mf2.size &&
                    !memcmp(mf1.ptr, mf2.ptr, mf1.size))
                xdemitconf_t xecfg;
                xdemitcb_t ecb;
                struct emit_callback ecbdata;
 -              const struct funcname_pattern_entry *pe;
 +              const struct userdiff_funcname *pe;
 +
 +              if (textconv_one) {
 +                      size_t size;
 +                      mf1.ptr = run_textconv(textconv_one, one, &size);
 +                      if (!mf1.ptr)
 +                              die("unable to read files to diff");
 +                      mf1.size = size;
 +              }
 +              if (textconv_two) {
 +                      size_t size;
 +                      mf2.ptr = run_textconv(textconv_two, two, &size);
 +                      if (!mf2.ptr)
 +                              die("unable to read files to diff");
 +                      mf2.size = size;
 +              }
  
                pe = diff_funcname_pattern(one);
                if (!pe)
                        pe = diff_funcname_pattern(two);
  
 +              memset(&xpp, 0, sizeof(xpp));
                memset(&xecfg, 0, sizeof(xecfg));
                memset(&ecbdata, 0, sizeof(ecbdata));
                ecbdata.label_path = lbl;
                ecbdata.file = o->file;
                xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
                xecfg.ctxlen = o->context;
 +              xecfg.interhunkctxlen = o->interhunkcontext;
                xecfg.flags = XDL_EMIT_FUNCNAMES;
                if (pe)
                        xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
                        xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10);
                else if (!prefixcmp(diffopts, "-u"))
                        xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10);
 -              ecb.outf = xdiff_outf;
 -              ecb.priv = &ecbdata;
 -              ecbdata.xm.consume = fn_out_consume;
                if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) {
                        ecbdata.diff_words =
                                xcalloc(1, sizeof(struct diff_words_data));
                        ecbdata.diff_words->file = o->file;
 +                      if (!o->word_regex)
 +                              o->word_regex = userdiff_word_regex(one);
 +                      if (!o->word_regex)
 +                              o->word_regex = userdiff_word_regex(two);
 +                      if (!o->word_regex)
 +                              o->word_regex = diff_word_regex_cfg;
 +                      if (o->word_regex) {
 +                              ecbdata.diff_words->word_regex = (regex_t *)
 +                                      xmalloc(sizeof(regex_t));
 +                              if (regcomp(ecbdata.diff_words->word_regex,
 +                                              o->word_regex,
 +                                              REG_EXTENDED | REG_NEWLINE))
 +                                      die ("Invalid regular expression: %s",
 +                                                      o->word_regex);
 +                      }
                }
 -              xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
 +              xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
 +                            &xpp, &xecfg, &ecb);
                if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS))
                        free_diff_words_data(&ecbdata);
 +              if (textconv_one)
 +                      free(mf1.ptr);
 +              if (textconv_two)
 +                      free(mf2.ptr);
        }
  
   free_ab_and_return:
@@@ -1649,11 -1609,11 +1649,11 @@@ static void builtin_diffstat(const cha
                xdemitconf_t xecfg;
                xdemitcb_t ecb;
  
 +              memset(&xpp, 0, sizeof(xpp));
                memset(&xecfg, 0, sizeof(xecfg));
                xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
 -              ecb.outf = xdiff_outf;
 -              ecb.priv = diffstat;
 -              xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
 +              xdi_diff_outf(&mf1, &mf2, diffstat_consume, diffstat,
 +                            &xpp, &xecfg, &ecb);
        }
  
   free_and_return:
@@@ -1674,6 -1634,7 +1674,6 @@@ static void builtin_checkdiff(const cha
                return;
  
        memset(&data, 0, sizeof(data));
 -      data.xm.consume = checkdiff_consume;
        data.filename = name_b ? name_b : name_a;
        data.lineno = 0;
        data.o = o;
                xdemitconf_t xecfg;
                xdemitcb_t ecb;
  
 +              memset(&xpp, 0, sizeof(xpp));
                memset(&xecfg, 0, sizeof(xecfg));
                xecfg.ctxlen = 1; /* at least one context line */
                xpp.flags = XDF_NEED_MINIMAL;
 -              ecb.outf = xdiff_outf;
 -              ecb.priv = &data;
 -              xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
 +              xdi_diff_outf(&mf1, &mf2, checkdiff_consume, &data,
 +                            &xpp, &xecfg, &ecb);
  
                if ((data.ws_rule & WS_TRAILING_SPACE) &&
                    data.trailing_blanks_start) {
@@@ -1726,7 -1687,6 +1726,7 @@@ struct diff_filespec *alloc_filespec(co
        spec->path = (char *)(spec + 1);
        memcpy(spec->path, path, namelen+1);
        spec->count = 1;
 +      spec->is_binary = -1;
        return spec;
  }
  
@@@ -1783,7 -1743,7 +1783,7 @@@ static int reuse_worktree_file(const ch
         * objects however would tend to be slower as they need
         * to be individually opened and inflated.
         */
-       if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1, NULL))
+       if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1))
                return 0;
  
        len = strlen(name);
  
  static int populate_from_stdin(struct diff_filespec *s)
  {
 -      struct strbuf buf;
 +      struct strbuf buf = STRBUF_INIT;
        size_t size = 0;
  
 -      strbuf_init(&buf, 0);
        if (strbuf_read(&buf, 0, 0) < 0)
                return error("error while reading from stdin %s",
                                     strerror(errno));
@@@ -1865,7 -1826,7 +1865,7 @@@ int diff_populate_filespec(struct diff_
  
        if (!s->sha1_valid ||
            reuse_worktree_file(s->path, s->sha1, 0)) {
 -              struct strbuf buf;
 +              struct strbuf buf = STRBUF_INIT;
                struct stat st;
                int fd;
  
                s->size = xsize_t(st.st_size);
                if (!s->size)
                        goto empty;
 -              if (size_only)
 -                      return 0;
                if (S_ISLNK(st.st_mode)) {
 -                      int ret;
 -                      s->data = xmalloc(s->size);
 -                      s->should_free = 1;
 -                      ret = readlink(s->path, s->data, s->size);
 -                      if (ret < 0) {
 -                              free(s->data);
 +                      struct strbuf sb = STRBUF_INIT;
 +
 +                      if (strbuf_readlink(&sb, s->path, s->size))
                                goto err_empty;
 -                      }
 +                      s->size = sb.len;
 +                      s->data = strbuf_detach(&sb, NULL);
 +                      s->should_free = 1;
                        return 0;
                }
 +              if (size_only)
 +                      return 0;
                fd = open(s->path, O_RDONLY);
                if (fd < 0)
                        goto err_empty;
                /*
                 * Convert from working tree format to canonical git format
                 */
 -              strbuf_init(&buf, 0);
                if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
                        size_t size = 0;
                        munmap(s->data, s->size);
@@@ -1948,23 -1911,17 +1948,23 @@@ void diff_free_filespec_data(struct dif
        s->cnt_data = NULL;
  }
  
 -static void prep_temp_blob(struct diff_tempfile *temp,
 +static void prep_temp_blob(const char *path, struct diff_tempfile *temp,
                           void *blob,
                           unsigned long size,
                           const unsigned char *sha1,
                           int mode)
  {
        int fd;
 +      struct strbuf buf = STRBUF_INIT;
  
        fd = git_mkstemp(temp->tmp_path, PATH_MAX, ".diff_XXXXXX");
        if (fd < 0)
                die("unable to create temp-file: %s", strerror(errno));
 +      if (convert_to_working_tree(path,
 +                      (const char *)blob, (size_t)size, &buf)) {
 +              blob = buf.buf;
 +              size = buf.len;
 +      }
        if (write_in_full(fd, blob, size) != size)
                die("unable to write temp-file");
        close(fd);
        strcpy(temp->hex, sha1_to_hex(sha1));
        temp->hex[40] = 0;
        sprintf(temp->mode, "%06o", mode);
 +      strbuf_release(&buf);
  }
  
 -static void prepare_temp_file(const char *name,
 -                            struct diff_tempfile *temp,
 -                            struct diff_filespec *one)
 +static struct diff_tempfile *prepare_temp_file(const char *name,
 +              struct diff_filespec *one)
  {
 +      struct diff_tempfile *temp = claim_diff_tempfile();
 +
        if (!DIFF_FILE_VALID(one)) {
        not_a_valid_file:
                /* A '-' entry produces this for file-2, and
                temp->name = "/dev/null";
                strcpy(temp->hex, ".");
                strcpy(temp->mode, ".");
 -              return;
 +              return temp;
 +      }
 +
 +      if (!remove_tempfile_installed) {
 +              atexit(remove_tempfile);
 +              sigchain_push_common(remove_tempfile_on_signal);
 +              remove_tempfile_installed = 1;
        }
  
        if (!one->sha1_valid ||
                if (S_ISLNK(st.st_mode)) {
                        int ret;
                        char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */
 -                      size_t sz = xsize_t(st.st_size);
 -                      if (sizeof(buf) <= st.st_size)
 -                              die("symlink too long: %s", name);
 -                      ret = readlink(name, buf, sz);
 +                      ret = readlink(name, buf, sizeof(buf));
                        if (ret < 0)
                                die("readlink(%s)", name);
 -                      prep_temp_blob(temp, buf, sz,
 +                      if (ret == sizeof(buf))
 +                              die("symlink too long: %s", name);
 +                      prep_temp_blob(name, temp, buf, ret,
                                       (one->sha1_valid ?
                                        one->sha1 : null_sha1),
                                       (one->sha1_valid ?
                         */
                        sprintf(temp->mode, "%06o", one->mode);
                }
 -              return;
 +              return temp;
        }
        else {
                if (diff_populate_filespec(one, 0))
                        die("cannot read data blob for %s", one->path);
 -              prep_temp_blob(temp, one->data, one->size,
 +              prep_temp_blob(name, temp, one->data, one->size,
                               one->sha1, one->mode);
        }
 -}
 -
 -static void remove_tempfile(void)
 -{
 -      int i;
 -
 -      for (i = 0; i < 2; i++)
 -              if (diff_temp[i].name == diff_temp[i].tmp_path) {
 -                      unlink(diff_temp[i].name);
 -                      diff_temp[i].name = NULL;
 -              }
 -}
 -
 -static void remove_tempfile_on_signal(int signo)
 -{
 -      remove_tempfile();
 -      signal(SIGINT, SIG_DFL);
 -      raise(signo);
 +      return temp;
  }
  
  /* An external diff command takes:
@@@ -2060,22 -2027,34 +2060,22 @@@ static void run_external_diff(const cha
                              int complete_rewrite)
  {
        const char *spawn_arg[10];
 -      struct diff_tempfile *temp = diff_temp;
        int retval;
 -      static int atexit_asked = 0;
 -      const char *othername;
        const char **arg = &spawn_arg[0];
  
 -      othername = (other? other : name);
 -      if (one && two) {
 -              prepare_temp_file(name, &temp[0], one);
 -              prepare_temp_file(othername, &temp[1], two);
 -              if (! atexit_asked &&
 -                  (temp[0].name == temp[0].tmp_path ||
 -                   temp[1].name == temp[1].tmp_path)) {
 -                      atexit_asked = 1;
 -                      atexit(remove_tempfile);
 -              }
 -              signal(SIGINT, remove_tempfile_on_signal);
 -      }
 -
        if (one && two) {
 +              struct diff_tempfile *temp_one, *temp_two;
 +              const char *othername = (other ? other : name);
 +              temp_one = prepare_temp_file(name, one);
 +              temp_two = prepare_temp_file(othername, two);
                *arg++ = pgm;
                *arg++ = name;
 -              *arg++ = temp[0].name;
 -              *arg++ = temp[0].hex;
 -              *arg++ = temp[0].mode;
 -              *arg++ = temp[1].name;
 -              *arg++ = temp[1].hex;
 -              *arg++ = temp[1].mode;
 +              *arg++ = temp_one->name;
 +              *arg++ = temp_one->hex;
 +              *arg++ = temp_one->mode;
 +              *arg++ = temp_two->name;
 +              *arg++ = temp_two->hex;
 +              *arg++ = temp_two->mode;
                if (other) {
                        *arg++ = other;
                        *arg++ = xfrm_msg;
        }
  }
  
 -static const char *external_diff_attr(const char *name)
 +static int similarity_index(struct diff_filepair *p)
  {
 -      struct git_attr_check attr_diff_check;
 +      return p->score * 100 / MAX_SCORE;
 +}
  
 -      if (!name)
 -              return NULL;
 +static void fill_metainfo(struct strbuf *msg,
 +                        const char *name,
 +                        const char *other,
 +                        struct diff_filespec *one,
 +                        struct diff_filespec *two,
 +                        struct diff_options *o,
 +                        struct diff_filepair *p)
 +{
 +      strbuf_init(msg, PATH_MAX * 2 + 300);
 +      switch (p->status) {
 +      case DIFF_STATUS_COPIED:
 +              strbuf_addf(msg, "similarity index %d%%", similarity_index(p));
 +              strbuf_addstr(msg, "\ncopy from ");
 +              quote_c_style(name, msg, NULL, 0);
 +              strbuf_addstr(msg, "\ncopy to ");
 +              quote_c_style(other, msg, NULL, 0);
 +              strbuf_addch(msg, '\n');
 +              break;
 +      case DIFF_STATUS_RENAMED:
 +              strbuf_addf(msg, "similarity index %d%%", similarity_index(p));
 +              strbuf_addstr(msg, "\nrename from ");
 +              quote_c_style(name, msg, NULL, 0);
 +              strbuf_addstr(msg, "\nrename to ");
 +              quote_c_style(other, msg, NULL, 0);
 +              strbuf_addch(msg, '\n');
 +              break;
 +      case DIFF_STATUS_MODIFIED:
 +              if (p->score) {
 +                      strbuf_addf(msg, "dissimilarity index %d%%\n",
 +                                  similarity_index(p));
 +                      break;
 +              }
 +              /* fallthru */
 +      default:
 +              /* nothing */
 +              ;
 +      }
 +      if (one && two && hashcmp(one->sha1, two->sha1)) {
 +              int abbrev = DIFF_OPT_TST(o, FULL_INDEX) ? 40 : DEFAULT_ABBREV;
  
 -      setup_diff_attr_check(&attr_diff_check);
 -      if (!git_checkattr(name, 1, &attr_diff_check)) {
 -              const char *value = attr_diff_check.value;
 -              if (!ATTR_TRUE(value) &&
 -                  !ATTR_FALSE(value) &&
 -                  !ATTR_UNSET(value)) {
 -                      struct ll_diff_driver *drv;
 -
 -                      for (drv = user_diff; drv; drv = drv->next)
 -                              if (!strcmp(drv->name, value))
 -                                      return drv->cmd;
 +              if (DIFF_OPT_TST(o, BINARY)) {
 +                      mmfile_t mf;
 +                      if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) ||
 +                          (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two)))
 +                              abbrev = 40;
                }
 +              strbuf_addf(msg, "index %.*s..%.*s",
 +                          abbrev, sha1_to_hex(one->sha1),
 +                          abbrev, sha1_to_hex(two->sha1));
 +              if (one->mode == two->mode)
 +                      strbuf_addf(msg, " %06o", one->mode);
 +              strbuf_addch(msg, '\n');
        }
 -      return NULL;
 +      if (msg->len)
 +              strbuf_setlen(msg, msg->len - 1);
  }
  
  static void run_diff_cmd(const char *pgm,
                         const char *attr_path,
                         struct diff_filespec *one,
                         struct diff_filespec *two,
 -                       const char *xfrm_msg,
 +                       struct strbuf *msg,
                         struct diff_options *o,
 -                       int complete_rewrite)
 +                       struct diff_filepair *p)
  {
 +      const char *xfrm_msg = NULL;
 +      int complete_rewrite = (p->status == DIFF_STATUS_MODIFIED) && p->score;
 +
 +      if (msg) {
 +              fill_metainfo(msg, name, other, one, two, o, p);
 +              xfrm_msg = msg->len ? msg->buf : NULL;
 +      }
 +
        if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL))
                pgm = NULL;
        else {
 -              const char *cmd = external_diff_attr(attr_path);
 -              if (cmd)
 -                      pgm = cmd;
 +              struct userdiff_driver *drv = userdiff_find_by_path(attr_path);
 +              if (drv && drv->external)
 +                      pgm = drv->external;
        }
  
        if (pgm) {
@@@ -2206,13 -2138,18 +2206,13 @@@ static void diff_fill_sha1_info(struct 
                        if (lstat(one->path, &st) < 0)
                                die("stat %s", one->path);
                        if (index_path(one->sha1, one->path, &st, 0))
 -                              die("cannot hash %s\n", one->path);
 +                              die("cannot hash %s", one->path);
                }
        }
        else
                hashclr(one->sha1);
  }
  
 -static int similarity_index(struct diff_filepair *p)
 -{
 -      return p->score * 100 / MAX_SCORE;
 -}
 -
  static void strip_prefix(int prefix_length, const char **namep, const char **otherp)
  {
        /* Strip the prefix but do not molest /dev/null and absolute paths */
@@@ -2226,11 -2163,13 +2226,11 @@@ static void run_diff(struct diff_filepa
  {
        const char *pgm = external_diff();
        struct strbuf msg;
 -      char *xfrm_msg;
        struct diff_filespec *one = p->one;
        struct diff_filespec *two = p->two;
        const char *name;
        const char *other;
        const char *attr_path;
 -      int complete_rewrite = 0;
  
        name  = p->one->path;
        other = (strcmp(name, p->two->path) ? p->two->path : NULL);
  
        if (DIFF_PAIR_UNMERGED(p)) {
                run_diff_cmd(pgm, name, NULL, attr_path,
 -                           NULL, NULL, NULL, o, 0);
 +                           NULL, NULL, NULL, o, p);
                return;
        }
  
        diff_fill_sha1_info(one);
        diff_fill_sha1_info(two);
  
 -      strbuf_init(&msg, PATH_MAX * 2 + 300);
 -      switch (p->status) {
 -      case DIFF_STATUS_COPIED:
 -              strbuf_addf(&msg, "similarity index %d%%", similarity_index(p));
 -              strbuf_addstr(&msg, "\ncopy from ");
 -              quote_c_style(name, &msg, NULL, 0);
 -              strbuf_addstr(&msg, "\ncopy to ");
 -              quote_c_style(other, &msg, NULL, 0);
 -              strbuf_addch(&msg, '\n');
 -              break;
 -      case DIFF_STATUS_RENAMED:
 -              strbuf_addf(&msg, "similarity index %d%%", similarity_index(p));
 -              strbuf_addstr(&msg, "\nrename from ");
 -              quote_c_style(name, &msg, NULL, 0);
 -              strbuf_addstr(&msg, "\nrename to ");
 -              quote_c_style(other, &msg, NULL, 0);
 -              strbuf_addch(&msg, '\n');
 -              break;
 -      case DIFF_STATUS_MODIFIED:
 -              if (p->score) {
 -                      strbuf_addf(&msg, "dissimilarity index %d%%\n",
 -                                      similarity_index(p));
 -                      complete_rewrite = 1;
 -                      break;
 -              }
 -              /* fallthru */
 -      default:
 -              /* nothing */
 -              ;
 -      }
 -
 -      if (hashcmp(one->sha1, two->sha1)) {
 -              int abbrev = DIFF_OPT_TST(o, FULL_INDEX) ? 40 : DEFAULT_ABBREV;
 -
 -              if (DIFF_OPT_TST(o, BINARY)) {
 -                      mmfile_t mf;
 -                      if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) ||
 -                          (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two)))
 -                              abbrev = 40;
 -              }
 -              strbuf_addf(&msg, "index %.*s..%.*s",
 -                              abbrev, sha1_to_hex(one->sha1),
 -                              abbrev, sha1_to_hex(two->sha1));
 -              if (one->mode == two->mode)
 -                      strbuf_addf(&msg, " %06o", one->mode);
 -              strbuf_addch(&msg, '\n');
 -      }
 -
 -      if (msg.len)
 -              strbuf_setlen(&msg, msg.len - 1);
 -      xfrm_msg = msg.len ? msg.buf : NULL;
 -
        if (!pgm &&
            DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
            (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
 -              /* a filepair that changes between file and symlink
 +              /*
 +               * a filepair that changes between file and symlink
                 * needs to be split into deletion and creation.
                 */
                struct diff_filespec *null = alloc_filespec(two->path);
                run_diff_cmd(NULL, name, other, attr_path,
 -                           one, null, xfrm_msg, o, 0);
 +                           one, null, &msg, o, p);
                free(null);
 +              strbuf_release(&msg);
 +
                null = alloc_filespec(one->path);
                run_diff_cmd(NULL, name, other, attr_path,
 -                           null, two, xfrm_msg, o, 0);
 +                           null, two, &msg, o, p);
                free(null);
        }
        else
                run_diff_cmd(pgm, name, other, attr_path,
 -                           one, two, xfrm_msg, o, complete_rewrite);
 +                           one, two, &msg, o, p);
  
        strbuf_release(&msg);
  }
@@@ -2333,18 -2321,19 +2333,18 @@@ void diff_setup(struct diff_options *op
        options->break_opt = -1;
        options->rename_limit = -1;
        options->dirstat_percent = 3;
 -      DIFF_OPT_CLR(options, DIRSTAT_CUMULATIVE);
        options->context = 3;
  
        options->change = diff_change;
        options->add_remove = diff_addremove;
        if (diff_use_color_default > 0)
                DIFF_OPT_SET(options, COLOR_DIFF);
 -      else
 -              DIFF_OPT_CLR(options, COLOR_DIFF);
        options->detect_rename = diff_detect_rename_default;
  
 -      options->a_prefix = "a/";
 -      options->b_prefix = "b/";
 +      if (!diff_mnemonic_prefix) {
 +              options->a_prefix = "a/";
 +              options->b_prefix = "b/";
 +      }
  }
  
  int diff_setup_done(struct diff_options *options)
@@@ -2501,10 -2490,6 +2501,10 @@@ int diff_opt_parse(struct diff_options 
        else if (!strcmp(arg, "--cumulative")) {
                options->output_format |= DIFF_FORMAT_DIRSTAT;
                DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE);
 +      } else if (opt_arg(arg, 0, "dirstat-by-file",
 +                         &options->dirstat_percent)) {
 +              options->output_format |= DIFF_FORMAT_DIRSTAT;
 +              DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
        }
        else if (!strcmp(arg, "--check"))
                options->output_format |= DIFF_FORMAT_CHECKDIFF;
                options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
        else if (!strcmp(arg, "--ignore-space-at-eol"))
                options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL;
 +      else if (!strcmp(arg, "--patience"))
 +              options->xdl_opts |= XDF_PATIENCE_DIFF;
  
        /* flags options */
        else if (!strcmp(arg, "--binary")) {
                DIFF_OPT_CLR(options, COLOR_DIFF);
        else if (!strcmp(arg, "--color-words"))
                options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
 +      else if (!prefixcmp(arg, "--color-words=")) {
 +              options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
 +              options->word_regex = arg + 14;
 +      }
        else if (!strcmp(arg, "--exit-code"))
                DIFF_OPT_SET(options, EXIT_WITH_STATUS);
        else if (!strcmp(arg, "--quiet"))
                DIFF_OPT_SET(options, ALLOW_EXTERNAL);
        else if (!strcmp(arg, "--no-ext-diff"))
                DIFF_OPT_CLR(options, ALLOW_EXTERNAL);
 +      else if (!strcmp(arg, "--textconv"))
 +              DIFF_OPT_SET(options, ALLOW_TEXTCONV);
 +      else if (!strcmp(arg, "--no-textconv"))
 +              DIFF_OPT_CLR(options, ALLOW_TEXTCONV);
        else if (!strcmp(arg, "--ignore-submodules"))
                DIFF_OPT_SET(options, IGNORE_SUBMODULES);
  
                options->b_prefix = arg + 13;
        else if (!strcmp(arg, "--no-prefix"))
                options->a_prefix = options->b_prefix = "";
 +      else if (opt_arg(arg, '\0', "inter-hunk-context",
 +                       &options->interhunkcontext))
 +              ;
        else if (!prefixcmp(arg, "--output=")) {
                options->file = fopen(arg + strlen("--output="), "w");
                options->close_file = 1;
@@@ -3074,7 -3046,8 +3074,7 @@@ static void diff_summary(FILE *file, st
  }
  
  struct patch_id_t {
 -      struct xdiff_emit_state xm;
 -      SHA_CTX *ctx;
 +      git_SHA_CTX *ctx;
        int patchlen;
  };
  
@@@ -3102,7 -3075,7 +3102,7 @@@ static void patch_id_consume(void *priv
  
        new_len = remove_space(line, len);
  
 -      SHA1_Update(data->ctx, line, new_len);
 +      git_SHA1_Update(data->ctx, line, new_len);
        data->patchlen += new_len;
  }
  
@@@ -3111,13 -3084,14 +3111,13 @@@ static int diff_get_patch_id(struct dif
  {
        struct diff_queue_struct *q = &diff_queued_diff;
        int i;
 -      SHA_CTX ctx;
 +      git_SHA_CTX ctx;
        struct patch_id_t data;
        char buffer[PATH_MAX * 4 + 20];
  
 -      SHA1_Init(&ctx);
 +      git_SHA1_Init(&ctx);
        memset(&data, 0, sizeof(struct patch_id_t));
        data.ctx = &ctx;
 -      data.xm.consume = patch_id_consume;
  
        for (i = 0; i < q->nr; i++) {
                xpparam_t xpp;
                struct diff_filepair *p = q->queue[i];
                int len1, len2;
  
 +              memset(&xpp, 0, sizeof(xpp));
                memset(&xecfg, 0, sizeof(xecfg));
                if (p->status == 0)
                        return error("internal diff status error");
                                        len2, p->two->path,
                                        len1, p->one->path,
                                        len2, p->two->path);
 -              SHA1_Update(&ctx, buffer, len1);
 +              git_SHA1_Update(&ctx, buffer, len1);
  
                xpp.flags = XDF_NEED_MINIMAL;
                xecfg.ctxlen = 3;
                xecfg.flags = XDL_EMIT_FUNCNAMES;
 -              ecb.outf = xdiff_outf;
 -              ecb.priv = &data;
 -              xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
 +              xdi_diff_outf(&mf1, &mf2, patch_id_consume, &data,
 +                            &xpp, &xecfg, &ecb);
        }
  
 -      SHA1_Final(sha1, &ctx);
 +      git_SHA1_Final(sha1, &ctx);
        return 0;
  }
  
@@@ -3261,6 -3235,7 +3261,6 @@@ void diff_flush(struct diff_options *op
                struct diffstat_t diffstat;
  
                memset(&diffstat, 0, sizeof(struct diffstat_t));
 -              diffstat.xm.consume = diffstat_consume;
                for (i = 0; i < q->nr; i++) {
                        struct diff_filepair *p = q->queue[i];
                        if (check_pair_status(p))
@@@ -3557,32 -3532,3 +3557,32 @@@ void diff_unmerge(struct diff_options *
        fill_filespec(one, sha1, mode);
        diff_queue(&diff_queued_diff, one, two)->is_unmerged = 1;
  }
 +
 +static char *run_textconv(const char *pgm, struct diff_filespec *spec,
 +              size_t *outsize)
 +{
 +      struct diff_tempfile *temp;
 +      const char *argv[3];
 +      const char **arg = argv;
 +      struct child_process child;
 +      struct strbuf buf = STRBUF_INIT;
 +
 +      temp = prepare_temp_file(spec->path, spec);
 +      *arg++ = pgm;
 +      *arg++ = temp->name;
 +      *arg = NULL;
 +
 +      memset(&child, 0, sizeof(child));
 +      child.argv = argv;
 +      child.out = -1;
 +      if (start_command(&child) != 0 ||
 +          strbuf_read(&buf, child.out, 0) < 0 ||
 +          finish_command(&child) != 0) {
 +              remove_tempfile();
 +              error("error running textconv command '%s'", pgm);
 +              return NULL;
 +      }
 +      remove_tempfile();
 +
 +      return strbuf_detach(&buf, outsize);
 +}
diff --combined git-repack.sh
index 00c597e97c8fd5a97105d2c68315cef414175669,e02bf27aa6a7ee9552f5d0a937e38e6689704438..0868734723b3c96144bfa9360a9e19ebae1995f7
@@@ -60,6 -60,7 +60,7 @@@ case ",$all_into_one," i
        args='--unpacked --incremental'
        ;;
  ,t,)
+       args= existing=
        if [ -d "$PACKDIR" ]; then
                for e in `cd "$PACKDIR" && find . -type f -name '*.pack' \
                        | sed -e 's/^\.\///' -e 's/\.pack$//'`
                        if [ -e "$PACKDIR/$e.keep" ]; then
                                : keep
                        else
-                               args="$args --unpacked=$e.pack"
                                existing="$existing $e"
                        fi
                done
-               if test -n "$args" -a -n "$unpack_unreachable" -a \
+               if test -n "$existing" -a -n "$unpack_unreachable" -a \
                        -n "$remove_redundant"
                then
                        args="$args $unpack_unreachable"
@@@ -88,79 -88,32 +88,79 @@@ if [ -z "$names" ]; the
                echo Nothing new to pack.
        fi
  fi
 -for name in $names ; do
 -      fullbases="$fullbases pack-$name"
 -      chmod a-w "$PACKTMP-$name.pack"
 -      chmod a-w "$PACKTMP-$name.idx"
 -      mkdir -p "$PACKDIR" || exit
  
 +# Ok we have prepared all new packfiles.
 +mkdir -p "$PACKDIR" || exit
 +
 +# First see if there are packs of the same name and if so
 +# if we can move them out of the way (this can happen if we
 +# repacked immediately after packing fully.
 +rollback=
 +failed=
 +for name in $names
 +do
        for sfx in pack idx
        do
 -              if test -f "$PACKDIR/pack-$name.$sfx"
 -              then
 -                      mv -f "$PACKDIR/pack-$name.$sfx" \
 -                              "$PACKDIR/old-pack-$name.$sfx"
 -              fi
 -      done &&
 +              file=pack-$name.$sfx
 +              test -f "$PACKDIR/$file" || continue
 +              rm -f "$PACKDIR/old-$file" &&
 +              mv "$PACKDIR/$file" "$PACKDIR/old-$file" || {
 +                      failed=t
 +                      break
 +              }
 +              rollback="$rollback $file"
 +      done
 +      test -z "$failed" || break
 +done
 +
 +# If renaming failed for any of them, roll the ones we have
 +# already renamed back to their original names.
 +if test -n "$failed"
 +then
 +      rollback_failure=
 +      for file in $rollback
 +      do
 +              mv "$PACKDIR/old-$file" "$PACKDIR/$file" ||
 +              rollback_failure="$rollback_failure $file"
 +      done
 +      if test -n "$rollback_failure"
 +      then
 +              echo >&2 "WARNING: Some packs in use have been renamed by"
 +              echo >&2 "WARNING: prefixing old- to their name, in order to"
 +              echo >&2 "WARNING: replace them with the new version of the"
 +              echo >&2 "WARNING: file.  But the operation failed, and"
 +              echo >&2 "WARNING: attempt to rename them back to their"
 +              echo >&2 "WARNING: original names also failed."
 +              echo >&2 "WARNING: Please rename them in $PACKDIR manually:"
 +              for file in $rollback_failure
 +              do
 +                      echo >&2 "WARNING:   old-$file -> $file"
 +              done
 +      fi
 +      exit 1
 +fi
 +
 +# Now the ones with the same name are out of the way...
 +fullbases=
 +for name in $names
 +do
 +      fullbases="$fullbases pack-$name"
 +      chmod a-w "$PACKTMP-$name.pack"
 +      chmod a-w "$PACKTMP-$name.idx"
        mv -f "$PACKTMP-$name.pack" "$PACKDIR/pack-$name.pack" &&
 -      mv -f "$PACKTMP-$name.idx"  "$PACKDIR/pack-$name.idx" &&
 -      test -f "$PACKDIR/pack-$name.pack" &&
 -      test -f "$PACKDIR/pack-$name.idx" || {
 -              echo >&2 "Couldn't replace the existing pack with updated one."
 -              echo >&2 "The original set of packs have been saved as"
 -              echo >&2 "old-pack-$name.{pack,idx} in $PACKDIR."
 -              exit 1
 -      }
 -      rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx"
 +      mv -f "$PACKTMP-$name.idx"  "$PACKDIR/pack-$name.idx" ||
 +      exit
 +done
 +
 +# Remove the "old-" files
 +for name in $names
 +do
 +      rm -f "$PACKDIR/old-pack-$name.idx"
 +      rm -f "$PACKDIR/old-pack-$name.pack"
  done
  
 +# End of pack replacement.
 +
  if test "$remove_redundant" = t
  then
        # We know $existing are all redundant.
@@@ -181,5 -134,5 +181,5 @@@ f
  
  case "$no_update_info" in
  t) : ;;
 -*) git-update-server-info ;;
 +*) git update-server-info ;;
  esac
diff --combined revision.c
index 286e416b757fa8df731330992fca96773082f75d,50a5b5f3943a89dab65dbfaa1657b9792aca51d5..34ee490ea0181091c375765141bd83e71ab2defe
@@@ -11,7 -11,6 +11,7 @@@
  #include "reflog-walk.h"
  #include "patch-ids.h"
  #include "decorate.h"
 +#include "log-tree.h"
  
  volatile show_early_output_fn_t show_early_output;
  
@@@ -183,11 -182,8 +183,11 @@@ static struct commit *handle_commit(str
                if (!tag->tagged)
                        die("bad tag");
                object = parse_object(tag->tagged->sha1);
 -              if (!object)
 +              if (!object) {
 +                      if (flags & UNINTERESTING)
 +                              return NULL;
                        die("bad object %s", sha1_to_hex(tag->tagged->sha1));
 +              }
        }
  
        /*
                        mark_parents_uninteresting(commit);
                        revs->limited = 1;
                }
 +              if (revs->show_source && !commit->util)
 +                      commit->util = (void *) name;
                return commit;
        }
  
@@@ -298,31 -292,10 +298,31 @@@ static void file_change(struct diff_opt
        DIFF_OPT_SET(options, HAS_CHANGES);
  }
  
 -static int rev_compare_tree(struct rev_info *revs, struct tree *t1, struct tree *t2)
 +static int rev_compare_tree(struct rev_info *revs, struct commit *parent, struct commit *commit)
  {
 +      struct tree *t1 = parent->tree;
 +      struct tree *t2 = commit->tree;
 +
        if (!t1)
                return REV_TREE_NEW;
 +
 +      if (revs->simplify_by_decoration) {
 +              /*
 +               * If we are simplifying by decoration, then the commit
 +               * is worth showing if it has a tag pointing at it.
 +               */
 +              if (lookup_decoration(&name_decoration, &commit->object))
 +                      return REV_TREE_DIFFERENT;
 +              /*
 +               * A commit that is not pointed by a tag is uninteresting
 +               * if we are not limited by path.  This means that you will
 +               * see the usual "commits that touch the paths" plus any
 +               * tagged commit by specifying both --simplify-by-decoration
 +               * and pathspec.
 +               */
 +              if (!revs->prune_data)
 +                      return REV_TREE_SAME;
 +      }
        if (!t2)
                return REV_TREE_DIFFERENT;
        tree_difference = REV_TREE_SAME;
        return tree_difference;
  }
  
 -static int rev_same_tree_as_empty(struct rev_info *revs, struct tree *t1)
 +static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
  {
        int retval;
        void *tree;
        unsigned long size;
        struct tree_desc empty, real;
 +      struct tree *t1 = commit->tree;
  
        if (!t1)
                return 0;
@@@ -373,7 -345,7 +373,7 @@@ static void try_to_simplify_commit(stru
                return;
  
        if (!commit->parents) {
 -              if (rev_same_tree_as_empty(revs, commit->tree))
 +              if (rev_same_tree_as_empty(revs, commit))
                        commit->object.flags |= TREESAME;
                return;
        }
                        die("cannot simplify commit %s (because of %s)",
                            sha1_to_hex(commit->object.sha1),
                            sha1_to_hex(p->object.sha1));
 -              switch (rev_compare_tree(revs, p->tree, commit->tree)) {
 +              switch (rev_compare_tree(revs, p, commit)) {
                case REV_TREE_SAME:
                        tree_same = 1;
                        if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) {
  
                case REV_TREE_NEW:
                        if (revs->remove_empty_trees &&
 -                          rev_same_tree_as_empty(revs, p->tree)) {
 +                          rev_same_tree_as_empty(revs, p)) {
                                /* We are adding all the specified
                                 * paths from this parent, so the
                                 * history beyond this parent is not
@@@ -482,10 -454,9 +482,10 @@@ static int add_parents_to_list(struct r
                while (parent) {
                        struct commit *p = parent->item;
                        parent = parent->next;
 +                      if (p)
 +                              p->object.flags |= UNINTERESTING;
                        if (parse_commit(p) < 0)
 -                              return -1;
 -                      p->object.flags |= UNINTERESTING;
 +                              continue;
                        if (p->parents)
                                mark_parents_uninteresting(p);
                        if (p->object.flags & SEEN)
  
                if (parse_commit(p) < 0)
                        return -1;
 +              if (revs->show_source && !p->util)
 +                      p->util = commit->util;
                p->object.flags |= left_flag;
                if (!(p->object.flags & SEEN)) {
                        p->object.flags |= SEEN;
                        insert_by_date_cached(p, list, cached_base, cache_ptr);
                }
 -              if(revs->first_parent_only)
 +              if (revs->first_parent_only)
                        break;
        }
        return 0;
@@@ -994,16 -963,6 +994,6 @@@ static void add_message_grep(struct rev
        add_grep(revs, pattern, GREP_PATTERN_BODY);
  }
  
- static void add_ignore_packed(struct rev_info *revs, const char *name)
- {
-       int num = ++revs->num_ignore_packed;
-       revs->ignore_packed = xrealloc(revs->ignore_packed,
-                                      sizeof(const char *) * (num + 1));
-       revs->ignore_packed[num-1] = name;
-       revs->ignore_packed[num] = NULL;
- }
  static int handle_revision_opt(struct rev_info *revs, int argc, const char **argv,
                               int *unkc, const char **unkv)
  {
        } else if (!strcmp(arg, "--topo-order")) {
                revs->lifo = 1;
                revs->topo_order = 1;
 +      } else if (!strcmp(arg, "--simplify-merges")) {
 +              revs->simplify_merges = 1;
 +              revs->rewrite_parents = 1;
 +              revs->simplify_history = 0;
 +              revs->limited = 1;
 +      } else if (!strcmp(arg, "--simplify-by-decoration")) {
 +              revs->simplify_merges = 1;
 +              revs->rewrite_parents = 1;
 +              revs->simplify_history = 0;
 +              revs->simplify_by_decoration = 1;
 +              revs->limited = 1;
 +              revs->prune = 1;
 +              load_ref_decorations();
        } else if (!strcmp(arg, "--date-order")) {
                revs->lifo = 0;
                revs->topo_order = 1;
                revs->edge_hint = 1;
        } else if (!strcmp(arg, "--unpacked")) {
                revs->unpacked = 1;
-               free(revs->ignore_packed);
-               revs->ignore_packed = NULL;
-               revs->num_ignore_packed = 0;
        } else if (!prefixcmp(arg, "--unpacked=")) {
-               revs->unpacked = 1;
-               add_ignore_packed(revs, arg+11);
+               die("--unpacked=<packfile> no longer supported.");
        } else if (!strcmp(arg, "-r")) {
                revs->diff = 1;
                DIFF_OPT_SET(&revs->diffopt, RECURSIVE);
@@@ -1267,7 -1209,6 +1253,7 @@@ int setup_revisions(int argc, const cha
  
                        if (!strcmp(arg, "--all")) {
                                handle_refs(revs, flags, for_each_ref);
 +                              handle_refs(revs, flags, head_ref);
                                continue;
                        }
                        if (!strcmp(arg, "--branches")) {
@@@ -1400,179 -1341,6 +1386,179 @@@ static void add_child(struct rev_info *
        l->next = add_decoration(&revs->children, &parent->object, l);
  }
  
 +static int remove_duplicate_parents(struct commit *commit)
 +{
 +      struct commit_list **pp, *p;
 +      int surviving_parents;
 +
 +      /* Examine existing parents while marking ones we have seen... */
 +      pp = &commit->parents;
 +      while ((p = *pp) != NULL) {
 +              struct commit *parent = p->item;
 +              if (parent->object.flags & TMP_MARK) {
 +                      *pp = p->next;
 +                      continue;
 +              }
 +              parent->object.flags |= TMP_MARK;
 +              pp = &p->next;
 +      }
 +      /* count them while clearing the temporary mark */
 +      surviving_parents = 0;
 +      for (p = commit->parents; p; p = p->next) {
 +              p->item->object.flags &= ~TMP_MARK;
 +              surviving_parents++;
 +      }
 +      return surviving_parents;
 +}
 +
 +struct merge_simplify_state {
 +      struct commit *simplified;
 +};
 +
 +static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit)
 +{
 +      struct merge_simplify_state *st;
 +
 +      st = lookup_decoration(&revs->merge_simplification, &commit->object);
 +      if (!st) {
 +              st = xcalloc(1, sizeof(*st));
 +              add_decoration(&revs->merge_simplification, &commit->object, st);
 +      }
 +      return st;
 +}
 +
 +static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
 +{
 +      struct commit_list *p;
 +      struct merge_simplify_state *st, *pst;
 +      int cnt;
 +
 +      st = locate_simplify_state(revs, commit);
 +
 +      /*
 +       * Have we handled this one?
 +       */
 +      if (st->simplified)
 +              return tail;
 +
 +      /*
 +       * An UNINTERESTING commit simplifies to itself, so does a
 +       * root commit.  We do not rewrite parents of such commit
 +       * anyway.
 +       */
 +      if ((commit->object.flags & UNINTERESTING) || !commit->parents) {
 +              st->simplified = commit;
 +              return tail;
 +      }
 +
 +      /*
 +       * Do we know what commit all of our parents should be rewritten to?
 +       * Otherwise we are not ready to rewrite this one yet.
 +       */
 +      for (cnt = 0, p = commit->parents; p; p = p->next) {
 +              pst = locate_simplify_state(revs, p->item);
 +              if (!pst->simplified) {
 +                      tail = &commit_list_insert(p->item, tail)->next;
 +                      cnt++;
 +              }
 +      }
 +      if (cnt) {
 +              tail = &commit_list_insert(commit, tail)->next;
 +              return tail;
 +      }
 +
 +      /*
 +       * Rewrite our list of parents.
 +       */
 +      for (p = commit->parents; p; p = p->next) {
 +              pst = locate_simplify_state(revs, p->item);
 +              p->item = pst->simplified;
 +      }
 +      cnt = remove_duplicate_parents(commit);
 +
 +      /*
 +       * It is possible that we are a merge and one side branch
 +       * does not have any commit that touches the given paths;
 +       * in such a case, the immediate parents will be rewritten
 +       * to different commits.
 +       *
 +       *      o----X          X: the commit we are looking at;
 +       *     /    /           o: a commit that touches the paths;
 +       * ---o----'
 +       *
 +       * Further reduce the parents by removing redundant parents.
 +       */
 +      if (1 < cnt) {
 +              struct commit_list *h = reduce_heads(commit->parents);
 +              cnt = commit_list_count(h);
 +              free_commit_list(commit->parents);
 +              commit->parents = h;
 +      }
 +
 +      /*
 +       * A commit simplifies to itself if it is a root, if it is
 +       * UNINTERESTING, if it touches the given paths, or if it is a
 +       * merge and its parents simplifies to more than one commits
 +       * (the first two cases are already handled at the beginning of
 +       * this function).
 +       *
 +       * Otherwise, it simplifies to what its sole parent simplifies to.
 +       */
 +      if (!cnt ||
 +          (commit->object.flags & UNINTERESTING) ||
 +          !(commit->object.flags & TREESAME) ||
 +          (1 < cnt))
 +              st->simplified = commit;
 +      else {
 +              pst = locate_simplify_state(revs, commit->parents->item);
 +              st->simplified = pst->simplified;
 +      }
 +      return tail;
 +}
 +
 +static void simplify_merges(struct rev_info *revs)
 +{
 +      struct commit_list *list;
 +      struct commit_list *yet_to_do, **tail;
 +
 +      if (!revs->topo_order)
 +              sort_in_topological_order(&revs->commits, revs->lifo);
 +      if (!revs->prune)
 +              return;
 +
 +      /* feed the list reversed */
 +      yet_to_do = NULL;
 +      for (list = revs->commits; list; list = list->next)
 +              commit_list_insert(list->item, &yet_to_do);
 +      while (yet_to_do) {
 +              list = yet_to_do;
 +              yet_to_do = NULL;
 +              tail = &yet_to_do;
 +              while (list) {
 +                      struct commit *commit = list->item;
 +                      struct commit_list *next = list->next;
 +                      free(list);
 +                      list = next;
 +                      tail = simplify_one(revs, commit, tail);
 +              }
 +      }
 +
 +      /* clean up the result, removing the simplified ones */
 +      list = revs->commits;
 +      revs->commits = NULL;
 +      tail = &revs->commits;
 +      while (list) {
 +              struct commit *commit = list->item;
 +              struct commit_list *next = list->next;
 +              struct merge_simplify_state *st;
 +              free(list);
 +              list = next;
 +              st = locate_simplify_state(revs, commit);
 +              if (st->simplified == commit)
 +                      tail = &commit_list_insert(commit, tail)->next;
 +      }
 +}
 +
  static void set_children(struct rev_info *revs)
  {
        struct commit_list *l;
@@@ -1613,8 -1381,6 +1599,8 @@@ int prepare_revision_walk(struct rev_in
                        return -1;
        if (revs->topo_order)
                sort_in_topological_order(&revs->commits, revs->lifo);
 +      if (revs->simplify_merges)
 +              simplify_merges(revs);
        if (revs->children.name)
                set_children(revs);
        return 0;
@@@ -1647,6 -1413,26 +1633,6 @@@ static enum rewrite_result rewrite_one(
        }
  }
  
 -static void remove_duplicate_parents(struct commit *commit)
 -{
 -      struct commit_list **pp, *p;
 -
 -      /* Examine existing parents while marking ones we have seen... */
 -      pp = &commit->parents;
 -      while ((p = *pp) != NULL) {
 -              struct commit *parent = p->item;
 -              if (parent->object.flags & TMP_MARK) {
 -                      *pp = p->next;
 -                      continue;
 -              }
 -              parent->object.flags |= TMP_MARK;
 -              pp = &p->next;
 -      }
 -      /* ... and clear the temporary mark */
 -      for (p = commit->parents; p; p = p->next)
 -              p->item->object.flags &= ~TMP_MARK;
 -}
 -
  static int rewrite_parents(struct rev_info *revs, struct commit *commit)
  {
        struct commit_list **pp = &commit->parents;
@@@ -1685,7 -1471,7 +1671,7 @@@ enum commit_action simplify_commit(stru
  {
        if (commit->object.flags & SHOWN)
                return commit_ignore;
-       if (revs->unpacked && has_sha1_pack(commit->object.sha1, revs->ignore_packed))
+       if (revs->unpacked && has_sha1_pack(commit->object.sha1))
                return commit_ignore;
        if (revs->show_all)
                return commit_show;
@@@ -1738,16 -1524,14 +1724,16 @@@ static struct commit *get_revision_1(st
                            (commit->date < revs->max_age))
                                continue;
                        if (add_parents_to_list(revs, commit, &revs->commits, NULL) < 0)
 -                              return NULL;
 +                              die("Failed to traverse parents of commit %s",
 +                                  sha1_to_hex(commit->object.sha1));
                }
  
                switch (simplify_commit(revs, commit)) {
                case commit_ignore:
                        continue;
                case commit_error:
 -                      return NULL;
 +                      die("Failed to simplify parents of commit %s",
 +                          sha1_to_hex(commit->object.sha1));
                default:
                        return commit;
                }
@@@ -1835,6 -1619,26 +1821,6 @@@ static struct commit *get_revision_inte
                return c;
        }
  
 -      if (revs->reverse) {
 -              int limit = -1;
 -
 -              if (0 <= revs->max_count) {
 -                      limit = revs->max_count;
 -                      if (0 < revs->skip_count)
 -                              limit += revs->skip_count;
 -              }
 -              l = NULL;
 -              while ((c = get_revision_1(revs))) {
 -                      commit_list_insert(c, &l);
 -                      if ((0 < limit) && !--limit)
 -                              break;
 -              }
 -              revs->commits = l;
 -              revs->reverse = 0;
 -              revs->max_count = -1;
 -              c = NULL;
 -      }
 -
        /*
         * Now pick up what they want to give us
         */
  
  struct commit *get_revision(struct rev_info *revs)
  {
 -      struct commit *c = get_revision_internal(revs);
 +      struct commit *c;
 +      struct commit_list *reversed;
 +
 +      if (revs->reverse) {
 +              reversed = NULL;
 +              while ((c = get_revision_internal(revs))) {
 +                      commit_list_insert(c, &reversed);
 +              }
 +              revs->commits = reversed;
 +              revs->reverse = 0;
 +              revs->reverse_output_stage = 1;
 +      }
 +
 +      if (revs->reverse_output_stage)
 +              return pop_commit(&revs->commits);
 +
 +      c = get_revision_internal(revs);
        if (c && revs->graph)
                graph_update(revs->graph, c);
        return c;
diff --combined revision.h
index 7cf848771b5be811f7741ce988b860760202f6f3,1d322759aab49f608ca9f225689ec2d945813d2a..66d211ac2e56be20fec686416dd6a2816b891239
@@@ -42,22 -42,17 +42,22 @@@ struct rev_info 
                        simplify_history:1,
                        lifo:1,
                        topo_order:1,
 +                      simplify_merges:1,
 +                      simplify_by_decoration:1,
                        tag_objects:1,
                        tree_objects:1,
                        blob_objects:1,
                        edge_hint:1,
                        limited:1,
-                       unpacked:1, /* see also ignore_packed below */
+                       unpacked:1,
                        boundary:2,
                        left_right:1,
                        rewrite_parents:1,
                        print_parents:1,
 +                      show_source:1,
 +                      show_decorations:1,
                        reverse:1,
 +                      reverse_output_stage:1,
                        cherry_pick:1,
                        first_parent_only:1;
  
@@@ -80,9 -75,6 +80,6 @@@
                        missing_newline:1;
        enum date_mode date_mode;
  
-       const char **ignore_packed; /* pretend objects in these are unpacked */
-       int num_ignore_packed;
        unsigned int    abbrev;
        enum cmit_fmt   commit_format;
        struct log_info *loginfo;
  
        struct reflog_walk_info *reflog_info;
        struct decoration children;
 +      struct decoration merge_simplification;
  };
  
  #define REV_TREE_SAME         0
diff --combined sha1_file.c
index a07aa4e5c491d16f65bc109673e806e24efb2274,500fd93127246fad72edca165fd4673070813f82..37e833b77d1ee556d18256154073a706ab11b3a4
@@@ -99,11 -99,7 +99,11 @@@ int safe_create_leading_directories(cha
                pos = strchr(pos, '/');
                if (!pos)
                        break;
 -              *pos = 0;
 +              while (*++pos == '/')
 +                      ;
 +              if (!*pos)
 +                      break;
 +              *--pos = '\0';
                if (!stat(path, &st)) {
                        /* path exists */
                        if (!S_ISDIR(st.st_mode)) {
@@@ -254,6 -250,7 +254,6 @@@ static void read_info_alternates(const 
   */
  static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth)
  {
 -      struct stat st;
        const char *objdir = get_object_directory();
        struct alternate_object_database *ent;
        struct alternate_object_database *alt;
        ent->base[pfxlen] = ent->base[entlen-1] = 0;
  
        /* Detect cases where alternate disappeared */
 -      if (stat(ent->base, &st) || !S_ISDIR(st.st_mode)) {
 +      if (!is_directory(ent->base)) {
                error("object directory %s does not exist; "
                      "check .git/objects/info/alternates.",
                      ent->base);
@@@ -397,16 -394,6 +397,16 @@@ void add_to_alternates_file(const char 
                link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0);
  }
  
 +void foreach_alt_odb(alt_odb_fn fn, void *cb)
 +{
 +      struct alternate_object_database *ent;
 +
 +      prepare_alt_odb();
 +      for (ent = alt_odb_list; ent; ent = ent->next)
 +              if (fn(ent, cb))
 +                      return;
 +}
 +
  void prepare_alt_odb(void)
  {
        const char *alt;
@@@ -689,7 -676,6 +689,7 @@@ void free_pack_by_name(const char *pack
        while (*pp) {
                p = *pp;
                if (strcmp(pack_name, p->pack_name) == 0) {
 +                      clear_delta_base_cache();
                        close_pack_windows(p);
                        if (p->pack_fd != -1)
                                close(p->pack_fd);
@@@ -801,7 -787,7 +801,7 @@@ unsigned char* use_pack(struct packed_g
        if (p->pack_fd == -1 && open_packed_git(p))
                die("packfile %s cannot be accessed", p->pack_name);
  
 -      /* Since packfiles end in a hash of their content and its
 +      /* Since packfiles end in a hash of their content and it's
         * pointless to ask for an offset into the middle of that
         * hash, and the in_window function above wouldn't match
         * don't allow an offset too close to the end of the file.
@@@ -1154,8 -1140,7 +1154,8 @@@ static int legacy_loose_object(unsigne
                return 0;
  }
  
 -unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep)
 +unsigned long unpack_object_header_buffer(const unsigned char *buf,
 +              unsigned long len, enum object_type *type, unsigned long *sizep)
  {
        unsigned shift;
        unsigned char c;
        size = c & 15;
        shift = 4;
        while (c & 0x80) {
 -              if (len <= used)
 -                      return 0;
 -              if (sizeof(long) * 8 <= shift)
 +              if (len <= used || sizeof(long) * 8 <= shift) {
 +                      error("bad object header");
                        return 0;
 +              }
                c = buf[used++];
                size += (c & 0x7f) << shift;
                shift += 7;
@@@ -1197,8 -1182,8 +1197,8 @@@ static int unpack_sha1_header(z_stream 
        stream->avail_out = bufsiz;
  
        if (legacy_loose_object(map)) {
 -              inflateInit(stream);
 -              return inflate(stream, 0);
 +              git_inflate_init(stream);
 +              return git_inflate(stream, 0);
        }
  
  
         * really worth it and we don't write it any longer.  But we
         * can still read it.
         */
 -      used = unpack_object_header_gently(map, mapsize, &type, &size);
 +      used = unpack_object_header_buffer(map, mapsize, &type, &size);
        if (!used || !valid_loose_object_type[type])
                return -1;
        map += used;
        /* Set up the stream for the rest.. */
        stream->next_in = map;
        stream->avail_in = mapsize;
 -      inflateInit(stream);
 +      git_inflate_init(stream);
  
        /* And generate the fake traditional header */
        stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu",
@@@ -1255,11 -1240,11 +1255,11 @@@ static void *unpack_sha1_rest(z_stream 
                stream->next_out = buf + bytes;
                stream->avail_out = size - bytes;
                while (status == Z_OK)
 -                      status = inflate(stream, Z_FINISH);
 +                      status = git_inflate(stream, Z_FINISH);
        }
        buf[size] = 0;
        if (status == Z_STREAM_END && !stream->avail_in) {
 -              inflateEnd(stream);
 +              git_inflate_end(stream);
                return buf;
        }
  
@@@ -1349,19 -1334,17 +1349,19 @@@ unsigned long get_size_from_delta(struc
        stream.next_out = delta_head;
        stream.avail_out = sizeof(delta_head);
  
 -      inflateInit(&stream);
 +      git_inflate_init(&stream);
        do {
                in = use_pack(p, w_curs, curpos, &stream.avail_in);
                stream.next_in = in;
 -              st = inflate(&stream, Z_FINISH);
 +              st = git_inflate(&stream, Z_FINISH);
                curpos += stream.next_in - in;
        } while ((st == Z_OK || st == Z_BUF_ERROR) &&
                 stream.total_out < sizeof(delta_head));
 -      inflateEnd(&stream);
 -      if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head))
 -              die("delta data unpack-initial failed");
 +      git_inflate_end(&stream);
 +      if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
 +              error("delta data unpack-initial failed");
 +              return 0;
 +      }
  
        /* Examine the initial part of the delta to figure out
         * the result size.
@@@ -1402,7 -1385,7 +1402,7 @@@ static off_t get_delta_base(struct pack
                        base_offset = (base_offset << 7) + (c & 127);
                }
                base_offset = delta_obj_offset - base_offset;
 -              if (base_offset >= delta_obj_offset)
 +              if (base_offset <= 0 || base_offset >= delta_obj_offset)
                        return 0;  /* out of bound */
                *curpos += used;
        } else if (type == OBJ_REF_DELTA) {
@@@ -1428,32 -1411,15 +1428,32 @@@ static int packed_delta_info(struct pac
        off_t base_offset;
  
        base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
 +      if (!base_offset)
 +              return OBJ_BAD;
        type = packed_object_info(p, base_offset, NULL);
 +      if (type <= OBJ_NONE) {
 +              struct revindex_entry *revidx;
 +              const unsigned char *base_sha1;
 +              revidx = find_pack_revindex(p, base_offset);
 +              if (!revidx)
 +                      return OBJ_BAD;
 +              base_sha1 = nth_packed_object_sha1(p, revidx->nr);
 +              mark_bad_packed_object(p, base_sha1);
 +              type = sha1_object_info(base_sha1, NULL);
 +              if (type <= OBJ_NONE)
 +                      return OBJ_BAD;
 +      }
  
        /* We choose to only get the type of the base object and
         * ignore potentially corrupt pack file that expects the delta
         * based on a base with a wrong size.  This saves tons of
         * inflate() calls.
         */
 -      if (sizep)
 +      if (sizep) {
                *sizep = get_size_from_delta(p, w_curs, curpos);
 +              if (*sizep == 0)
 +                      type = OBJ_BAD;
 +      }
  
        return type;
  }
@@@ -1475,11 -1441,10 +1475,11 @@@ static int unpack_object_header(struct 
         * insane, so we know won't exceed what we have been given.
         */
        base = use_pack(p, w_curs, *curpos, &left);
 -      used = unpack_object_header_gently(base, left, &type, sizep);
 -      if (!used)
 -              die("object offset outside of pack file");
 -      *curpos += used;
 +      used = unpack_object_header_buffer(base, left, &type, sizep);
 +      if (!used) {
 +              type = OBJ_BAD;
 +      } else
 +              *curpos += used;
  
        return type;
  }
@@@ -1563,9 -1528,8 +1563,9 @@@ static int packed_object_info(struct pa
                        *sizep = size;
                break;
        default:
 -              die("pack %s contains unknown object type %d",
 -                  p->pack_name, type);
 +              error("unknown object type %i at offset %"PRIuMAX" in %s",
 +                    type, (uintmax_t)obj_offset, p->pack_name);
 +              type = OBJ_BAD;
        }
        unuse_pack(&w_curs);
        return type;
@@@ -1586,14 -1550,14 +1586,14 @@@ static void *unpack_compressed_entry(st
        stream.next_out = buffer;
        stream.avail_out = size;
  
 -      inflateInit(&stream);
 +      git_inflate_init(&stream);
        do {
                in = use_pack(p, w_curs, curpos, &stream.avail_in);
                stream.next_in = in;
 -              st = inflate(&stream, Z_FINISH);
 +              st = git_inflate(&stream, Z_FINISH);
                curpos += stream.next_in - in;
        } while (st == Z_OK || st == Z_BUF_ERROR);
 -      inflateEnd(&stream);
 +      git_inflate_end(&stream);
        if ((st != Z_STREAM_END) || stream.total_out != size) {
                free(buffer);
                return NULL;
@@@ -1637,9 -1601,11 +1637,9 @@@ static void *cache_or_unpack_entry(stru
        struct delta_base_cache_entry *ent = delta_base_cache + hash;
  
        ret = ent->data;
 -      if (ret && ent->p == p && ent->base_offset == base_offset)
 -              goto found_cache_entry;
 -      return unpack_entry(p, base_offset, type, base_size);
 +      if (!ret || ent->p != p || ent->base_offset != base_offset)
 +              return unpack_entry(p, base_offset, type, base_size);
  
 -found_cache_entry:
        if (!keep_cache) {
                ent->data = NULL;
                ent->lru.next->prev = ent->lru.prev;
@@@ -1664,13 -1630,6 +1664,13 @@@ static inline void release_delta_base_c
        }
  }
  
 +void clear_delta_base_cache(void)
 +{
 +      unsigned long p;
 +      for (p = 0; p < MAX_DELTA_CACHE; p++)
 +              release_delta_base_cache(&delta_base_cache[p]);
 +}
 +
  static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
        void *base, unsigned long base_size, enum object_type type)
  {
        delta_base_cache_lru.prev = &ent->lru;
  }
  
 +static void *read_object(const unsigned char *sha1, enum object_type *type,
 +                       unsigned long *size);
 +
  static void *unpack_delta_entry(struct packed_git *p,
                                struct pack_window **w_curs,
                                off_t curpos,
                 * This is costly but should happen only in the presence
                 * of a corrupted pack, and is better than failing outright.
                 */
 -              struct revindex_entry *revidx = find_pack_revindex(p, base_offset);
 -              const unsigned char *base_sha1 =
 -                                      nth_packed_object_sha1(p, revidx->nr);
 +              struct revindex_entry *revidx;
 +              const unsigned char *base_sha1;
 +              revidx = find_pack_revindex(p, base_offset);
 +              if (!revidx)
 +                      return NULL;
 +              base_sha1 = nth_packed_object_sha1(p, revidx->nr);
                error("failed to read delta base object %s"
                      " at offset %"PRIuMAX" from %s",
                      sha1_to_hex(base_sha1), (uintmax_t)base_offset,
        return result;
  }
  
 +int do_check_packed_object_crc;
 +
  void *unpack_entry(struct packed_git *p, off_t obj_offset,
                   enum object_type *type, unsigned long *sizep)
  {
        off_t curpos = obj_offset;
        void *data;
  
 +      if (do_check_packed_object_crc && p->index_version > 1) {
 +              struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
 +              unsigned long len = revidx[1].offset - obj_offset;
 +              if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
 +                      const unsigned char *sha1 =
 +                              nth_packed_object_sha1(p, revidx->nr);
 +                      error("bad packed object CRC for %s",
 +                            sha1_to_hex(sha1));
 +                      mark_bad_packed_object(p, sha1);
 +                      unuse_pack(&w_curs);
 +                      return NULL;
 +              }
 +      }
 +
        *type = unpack_object_header(p, &w_curs, &curpos, sizep);
        switch (*type) {
        case OBJ_OFS_DELTA:
@@@ -1919,25 -1856,7 +1919,7 @@@ off_t find_pack_entry_one(const unsigne
        return 0;
  }
  
- int matches_pack_name(struct packed_git *p, const char *name)
- {
-       const char *last_c, *c;
-       if (!strcmp(p->pack_name, name))
-               return 1;
-       for (c = p->pack_name, last_c = c; *c;)
-               if (*c == '/')
-                       last_c = ++c;
-               else
-                       ++c;
-       if (!strcmp(last_c, name))
-               return 1;
-       return 0;
- }
- static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed)
+ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
  {
        static struct packed_git *last_found = (void *)1;
        struct packed_git *p;
        p = (last_found == (void *)1) ? packed_git : last_found;
  
        do {
-               if (ignore_packed) {
-                       const char **ig;
-                       for (ig = ignore_packed; *ig; ig++)
-                               if (matches_pack_name(p, *ig))
-                                       break;
-                       if (*ig)
-                               goto next;
-               }
                if (p->num_bad_objects) {
                        unsigned i;
                        for (i = 0; i < p->num_bad_objects; i++)
@@@ -2028,7 -1938,7 +2001,7 @@@ static int sha1_loose_object_info(cons
                status = error("unable to parse %s header", sha1_to_hex(sha1));
        else if (sizep)
                *sizep = size;
 -      inflateEnd(&stream);
 +      git_inflate_end(&stream);
        munmap(map, mapsize);
        return status;
  }
@@@ -2038,7 -1948,7 +2011,7 @@@ int sha1_object_info(const unsigned cha
        struct pack_entry e;
        int status;
  
-       if (!find_pack_entry(sha1, &e, NULL)) {
+       if (!find_pack_entry(sha1, &e)) {
                /* Most likely it's a loose object. */
                status = sha1_loose_object_info(sha1, sizep);
                if (status >= 0)
  
                /* Not a loose object; someone else may have just packed it. */
                reprepare_packed_git();
-               if (!find_pack_entry(sha1, &e, NULL))
+               if (!find_pack_entry(sha1, &e))
                        return status;
        }
 -      return packed_object_info(e.p, e.offset, sizep);
 +
 +      status = packed_object_info(e.p, e.offset, sizep);
 +      if (status < 0) {
 +              mark_bad_packed_object(e.p, sha1);
 +              status = sha1_object_info(sha1, sizep);
 +      }
 +
 +      return status;
  }
  
  static void *read_packed_sha1(const unsigned char *sha1,
        struct pack_entry e;
        void *data;
  
-       if (!find_pack_entry(sha1, &e, NULL))
+       if (!find_pack_entry(sha1, &e))
                return NULL;
        data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
        if (!data) {
@@@ -2098,7 -2001,9 +2071,7 @@@ static struct cached_object 
  static int cached_object_nr, cached_object_alloc;
  
  static struct cached_object empty_tree = {
 -      /* empty tree sha1: 4b825dc642cb6eb9a060e54bf8d69288fbee4904 */
 -      "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60"
 -      "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04",
 +      EMPTY_TREE_SHA1_BIN,
        OBJ_TREE,
        "",
        0
@@@ -2141,8 -2046,8 +2114,8 @@@ int pretend_sha1_file(void *buf, unsign
        return 0;
  }
  
 -void *read_object(const unsigned char *sha1, enum object_type *type,
 -                unsigned long *size)
 +static void *read_object(const unsigned char *sha1, enum object_type *type,
 +                       unsigned long *size)
  {
        unsigned long mapsize;
        void *map, *buf;
@@@ -2230,16 -2135,16 +2203,16 @@@ static void write_sha1_file_prepare(con
                                      const char *type, unsigned char *sha1,
                                      char *hdr, int *hdrlen)
  {
 -      SHA_CTX c;
 +      git_SHA_CTX c;
  
        /* Generate the header */
        *hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
  
        /* Sha1.. */
 -      SHA1_Init(&c);
 -      SHA1_Update(&c, hdr, *hdrlen);
 -      SHA1_Update(&c, buf, len);
 -      SHA1_Final(sha1, &c);
 +      git_SHA1_Init(&c);
 +      git_SHA1_Update(&c, hdr, *hdrlen);
 +      git_SHA1_Update(&c, buf, len);
 +      git_SHA1_Final(sha1, &c);
  }
  
  /*
@@@ -2301,7 -2206,7 +2274,7 @@@ static void close_sha1_file(int fd
                fsync_or_die(fd, "sha1 file");
        fchmod(fd, 0444);
        if (close(fd) != 0)
 -              die("unable to write sha1 file");
 +              die("error when closing sha1 file (%s)", strerror(errno));
  }
  
  /* Size of directory component, including the ending '/' */
@@@ -2348,8 -2253,7 +2321,8 @@@ static int create_tmpfile(char *buffer
  static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
                              void *buf, unsigned long len, time_t mtime)
  {
 -      int fd, size, ret;
 +      int fd, ret;
 +      size_t size;
        unsigned char *compressed;
        z_stream stream;
        char *filename;
@@@ -2464,36 -2368,66 +2437,36 @@@ int has_pack_file(const unsigned char *
        return 1;
  }
  
- int has_sha1_pack(const unsigned char *sha1, const char **ignore_packed)
+ int has_sha1_pack(const unsigned char *sha1)
  {
        struct pack_entry e;
-       return find_pack_entry(sha1, &e, ignore_packed);
+       return find_pack_entry(sha1, &e);
  }
  
  int has_sha1_file(const unsigned char *sha1)
  {
        struct pack_entry e;
  
-       if (find_pack_entry(sha1, &e, NULL))
+       if (find_pack_entry(sha1, &e))
                return 1;
        return has_loose_object(sha1);
  }
  
 -int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object)
 -{
 -      struct strbuf buf;
 -      int ret;
 -
 -      strbuf_init(&buf, 0);
 -      if (strbuf_read(&buf, fd, 4096) < 0) {
 -              strbuf_release(&buf);
 -              return -1;
 -      }
 -
 -      if (!type)
 -              type = blob_type;
 -      if (write_object)
 -              ret = write_sha1_file(buf.buf, buf.len, type, sha1);
 -      else
 -              ret = hash_sha1_file(buf.buf, buf.len, type, sha1);
 -      strbuf_release(&buf);
 -
 -      return ret;
 -}
 -
 -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 -           enum object_type type, const char *path)
 +static int index_mem(unsigned char *sha1, void *buf, size_t size,
 +                   int write_object, enum object_type type, const char *path)
  {
 -      size_t size = xsize_t(st->st_size);
 -      void *buf = NULL;
        int ret, re_allocated = 0;
  
 -      if (size)
 -              buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
 -      close(fd);
 -
        if (!type)
                type = OBJ_BLOB;
  
        /*
         * Convert blobs to git internal format
         */
 -      if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
 -              struct strbuf nbuf;
 -              strbuf_init(&nbuf, 0);
 +      if ((type == OBJ_BLOB) && path) {
 +              struct strbuf nbuf = STRBUF_INIT;
                if (convert_to_git(path, buf, size, &nbuf,
                                   write_object ? safe_crlf : 0)) {
 -                      munmap(buf, size);
                        buf = strbuf_detach(&nbuf, &size);
                        re_allocated = 1;
                }
                ret = write_sha1_file(buf, size, typename(type), sha1);
        else
                ret = hash_sha1_file(buf, size, typename(type), sha1);
 -      if (re_allocated) {
 +      if (re_allocated)
                free(buf);
 -              return ret;
 -      }
 -      if (size)
 +      return ret;
 +}
 +
 +int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 +           enum object_type type, const char *path)
 +{
 +      int ret;
 +      size_t size = xsize_t(st->st_size);
 +
 +      if (!S_ISREG(st->st_mode)) {
 +              struct strbuf sbuf = STRBUF_INIT;
 +              if (strbuf_read(&sbuf, fd, 4096) >= 0)
 +                      ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
 +                                      type, path);
 +              else
 +                      ret = -1;
 +              strbuf_release(&sbuf);
 +      } else if (size) {
 +              void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
 +              ret = index_mem(sha1, buf, size, write_object, type, path);
                munmap(buf, size);
 +      } else
 +              ret = index_mem(sha1, NULL, size, write_object, type, path);
 +      close(fd);
        return ret;
  }
  
  int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object)
  {
        int fd;
 -      char *target;
 -      size_t len;
 +      struct strbuf sb = STRBUF_INIT;
  
        switch (st->st_mode & S_IFMT) {
        case S_IFREG:
                                     path);
                break;
        case S_IFLNK:
 -              len = xsize_t(st->st_size);
 -              target = xmalloc(len + 1);
 -              if (readlink(path, target, len + 1) != st->st_size) {
 +              if (strbuf_readlink(&sb, path, st->st_size)) {
                        char *errstr = strerror(errno);
 -                      free(target);
                        return error("readlink(\"%s\"): %s", path,
                                     errstr);
                }
                if (!write_object)
 -                      hash_sha1_file(target, len, blob_type, sha1);
 -              else if (write_sha1_file(target, len, blob_type, sha1))
 +                      hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
 +              else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
                        return error("%s: failed to insert into database",
                                     path);
 -              free(target);
 +              strbuf_release(&sb);
                break;
        case S_IFDIR:
                return resolve_gitlink_ref(path, "HEAD", sha1);
diff --combined t/t7700-repack.sh
index f5682d66db2832311774fb68b7264002dfeb091f,9ce546e3b225563279c54eb6ceafd87398a3e5cc..6b29bff782f5a46bb6970d70598fd3be82c679fa
@@@ -69,24 -69,66 +69,85 @@@ test_expect_success 'packed obs in alt 
        done
  '
  
 +test_expect_failure 'packed obs in alt ODB are repacked when local repo has packs' '
 +      rm -f .git/objects/pack/* &&
 +      echo new_content >> file1 &&
 +      git add file1 &&
 +      git commit -m more_content &&
 +      git repack &&
 +      git repack -a -d &&
 +      myidx=$(ls -1 .git/objects/pack/*.idx) &&
 +      test -f "$myidx" &&
 +      for p in alt_objects/pack/*.idx; do
 +              git verify-pack -v $p | sed -n -e "/^[0-9a-f]\{40\}/p"
 +      done | while read sha1 rest; do
 +              if ! ( git verify-pack -v $myidx | grep "^$sha1" ); then
 +                      echo "Missing object in local pack: $sha1"
 +                      return 1
 +              fi
 +      done
 +'
 +
+ test_expect_success 'packed obs in alternate ODB kept pack are repacked' '
+       # swap the .keep so the commit object is in the pack with .keep
+       for p in alt_objects/pack/*.pack
+       do
+               base_name=$(basename $p .pack)
+               if test -f alt_objects/pack/$base_name.keep
+               then
+                       rm alt_objects/pack/$base_name.keep
+               else
+                       touch alt_objects/pack/$base_name.keep
+               fi
+       done
+       git repack -a -d &&
+       myidx=$(ls -1 .git/objects/pack/*.idx) &&
+       test -f "$myidx" &&
+       for p in alt_objects/pack/*.idx; do
+               git verify-pack -v $p | sed -n -e "/^[0-9a-f]\{40\}/p"
+       done | while read sha1 rest; do
+               if ! ( git verify-pack -v $myidx | grep "^$sha1" ); then
+                       echo "Missing object in local pack: $sha1"
+                       return 1
+               fi
+       done
+ '
+ test_expect_success 'packed unreachable obs in alternate ODB are not loosened' '
+       rm -f alt_objects/pack/*.keep &&
+       mv .git/objects/pack/* alt_objects/pack/ &&
+       csha1=$(git rev-parse HEAD^{commit}) &&
+       git reset --hard HEAD^ &&
+       sleep 1 &&
+       git reflog expire --expire=now --expire-unreachable=now --all &&
+       # The pack-objects call on the next line is equivalent to
+       # git repack -A -d without the call to prune-packed
+       git pack-objects --honor-pack-keep --non-empty --all --reflog \
+           --unpack-unreachable </dev/null pack &&
+       rm -f .git/objects/pack/* &&
+       mv pack-* .git/objects/pack/ &&
+       test 0 = $(git verify-pack -v -- .git/objects/pack/*.idx |
+               egrep "^$csha1 " | sort | uniq | wc -l) &&
+       echo > .git/objects/info/alternates &&
+       test_must_fail git show $csha1
+ '
+ test_expect_success 'local packed unreachable obs that exist in alternate ODB are not loosened' '
+       echo `pwd`/alt_objects > .git/objects/info/alternates &&
+       echo "$csha1" | git pack-objects --non-empty --all --reflog pack &&
+       rm -f .git/objects/pack/* &&
+       mv pack-* .git/objects/pack/ &&
+       # The pack-objects call on the next line is equivalent to
+       # git repack -A -d without the call to prune-packed
+       git pack-objects --honor-pack-keep --non-empty --all --reflog \
+           --unpack-unreachable </dev/null pack &&
+       rm -f .git/objects/pack/* &&
+       mv pack-* .git/objects/pack/ &&
+       test 0 = $(git verify-pack -v -- .git/objects/pack/*.idx |
+               egrep "^$csha1 " | sort | uniq | wc -l) &&
+       echo > .git/objects/info/alternates &&
+       test_must_fail git show $csha1
+ '
  test_done