Merge branch 'jk/maint-null-in-trees' into maint-1.7.11
authorJunio C Hamano <gitster@pobox.com>
Mon, 10 Sep 2012 22:24:53 +0000 (15:24 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 10 Sep 2012 22:24:54 +0000 (15:24 -0700)
"git diff" had a confusion between taking data from a path in the
working tree and taking data from an object that happens to have
name 0{40} recorded in a tree.

* jk/maint-null-in-trees:
fsck: detect null sha1 in tree entries
do not write null sha1s to on-disk index
diff: do not use null sha1 as a sentinel value

14 files changed:
1  2 
builtin.h
builtin/blame.c
builtin/cat-file.c
builtin/diff.c
combine-diff.c
diff-no-index.c
diff.c
diff.h
diffcore-rename.c
diffcore.h
fsck.c
read-cache.c
revision.c
t/t1450-fsck.sh
diff --combined builtin.h
index cd9141932b3d77c3d5c5b04908ea3b3298c4a20d,47f540f37f908c7539bcdbde5dd7550af8c92e72..dffb34ef4e29799bcb9a8c78bdf439547845794f
+++ b/builtin.h
@@@ -41,10 -41,8 +41,10 @@@ int copy_note_for_rewrite(struct notes_
  void finish_copy_notes_for_rewrite(struct notes_rewrite_cfg *c);
  
  extern int check_pager_config(const char *cmd);
 +struct diff_options;
 +extern void setup_diff_pager(struct diff_options *);
  
- extern int textconv_object(const char *path, unsigned mode, const unsigned char *sha1, char **buf, unsigned long *buf_size);
+ extern int textconv_object(const char *path, unsigned mode, const unsigned char *sha1, int sha1_valid, char **buf, unsigned long *buf_size);
  
  extern int cmd_add(int argc, const char **argv, const char *prefix);
  extern int cmd_annotate(int argc, const char **argv, const char *prefix);
@@@ -63,7 -61,6 +63,7 @@@ extern int cmd_cherry(int argc, const c
  extern int cmd_cherry_pick(int argc, const char **argv, const char *prefix);
  extern int cmd_clone(int argc, const char **argv, const char *prefix);
  extern int cmd_clean(int argc, const char **argv, const char *prefix);
 +extern int cmd_column(int argc, const char **argv, const char *prefix);
  extern int cmd_commit(int argc, const char **argv, const char *prefix);
  extern int cmd_commit_tree(int argc, const char **argv, const char *prefix);
  extern int cmd_config(int argc, const char **argv, const char *prefix);
@@@ -85,6 -82,7 +85,6 @@@ extern int cmd_get_tar_commit_id(int ar
  extern int cmd_grep(int argc, const char **argv, const char *prefix);
  extern int cmd_hash_object(int argc, const char **argv, const char *prefix);
  extern int cmd_help(int argc, const char **argv, const char *prefix);
 -extern int cmd_http_fetch(int argc, const char **argv, const char *prefix);
  extern int cmd_index_pack(int argc, const char **argv, const char *prefix);
  extern int cmd_init_db(int argc, const char **argv, const char *prefix);
  extern int cmd_log(int argc, const char **argv, const char *prefix);
@@@ -109,6 -107,7 +109,6 @@@ extern int cmd_notes(int argc, const ch
  extern int cmd_pack_objects(int argc, const char **argv, const char *prefix);
  extern int cmd_pack_redundant(int argc, const char **argv, const char *prefix);
  extern int cmd_patch_id(int argc, const char **argv, const char *prefix);
 -extern int cmd_pickaxe(int argc, const char **argv, const char *prefix);
  extern int cmd_prune(int argc, const char **argv, const char *prefix);
  extern int cmd_prune_packed(int argc, const char **argv, const char *prefix);
  extern int cmd_push(int argc, const char **argv, const char *prefix);
@@@ -141,6 -140,7 +141,6 @@@ extern int cmd_update_ref(int argc, con
  extern int cmd_update_server_info(int argc, const char **argv, const char *prefix);
  extern int cmd_upload_archive(int argc, const char **argv, const char *prefix);
  extern int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix);
 -extern int cmd_upload_tar(int argc, const char **argv, const char *prefix);
  extern int cmd_var(int argc, const char **argv, const char *prefix);
  extern int cmd_verify_tag(int argc, const char **argv, const char *prefix);
  extern int cmd_version(int argc, const char **argv, const char *prefix);
diff --combined builtin/blame.c
index 960c58d855a6f1a04ad1d08637fc75c3da240a30,fac0e93e67eeee8bef709670da1cbfe77055137d..a9705d06a5364e881eb894c5b401e6a3b953202d
@@@ -88,20 -88,6 +88,20 @@@ struct origin 
        char path[FLEX_ARRAY];
  };
  
 +static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, long ctxlen,
 +                    xdl_emit_hunk_consume_func_t hunk_func, void *cb_data)
 +{
 +      xpparam_t xpp = {0};
 +      xdemitconf_t xecfg = {0};
 +      xdemitcb_t ecb = {NULL};
 +
 +      xpp.flags = xdl_opts;
 +      xecfg.ctxlen = ctxlen;
 +      xecfg.hunk_func = hunk_func;
 +      ecb.priv = cb_data;
 +      return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
 +}
 +
  /*
   * Prepare diff_filespec and convert it using diff textconv API
   * if the textconv driver exists.
  int textconv_object(const char *path,
                    unsigned mode,
                    const unsigned char *sha1,
+                   int sha1_valid,
                    char **buf,
                    unsigned long *buf_size)
  {
        struct userdiff_driver *textconv;
  
        df = alloc_filespec(path);
-       fill_filespec(df, sha1, mode);
+       fill_filespec(df, sha1, sha1_valid, mode);
        textconv = get_textconv(df);
        if (!textconv) {
                free_filespec(df);
@@@ -142,7 -129,7 +143,7 @@@ static void fill_origin_blob(struct dif
  
                num_read_blob++;
                if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
-                   textconv_object(o->path, o->mode, o->blob_sha1, &file->ptr, &file_size))
+                   textconv_object(o->path, o->mode, o->blob_sha1, 1, &file->ptr, &file_size))
                        ;
                else
                        file->ptr = read_sha1_file(o->blob_sha1, &type, &file_size);
@@@ -773,14 -760,12 +774,14 @@@ struct blame_chunk_cb_data 
        long tlno;
  };
  
 -static void blame_chunk_cb(void *data, long same, long p_next, long t_next)
 +static int blame_chunk_cb(long start_a, long count_a,
 +                        long start_b, long count_b, void *data)
  {
        struct blame_chunk_cb_data *d = data;
 -      blame_chunk(d->sb, d->tlno, d->plno, same, d->target, d->parent);
 -      d->plno = p_next;
 -      d->tlno = t_next;
 +      blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent);
 +      d->plno = start_a + count_a;
 +      d->tlno = start_b + count_b;
 +      return 0;
  }
  
  /*
@@@ -795,7 -780,8 +796,7 @@@ static int pass_blame_to_parent(struct 
        int last_in_target;
        mmfile_t file_p, file_o;
        struct blame_chunk_cb_data d;
 -      xpparam_t xpp;
 -      xdemitconf_t xecfg;
 +
        memset(&d, 0, sizeof(d));
        d.sb = sb; d.target = target; d.parent = parent;
        last_in_target = find_last_in_target(sb, target);
        fill_origin_blob(&sb->revs->diffopt, target, &file_o);
        num_get_patch++;
  
 -      memset(&xpp, 0, sizeof(xpp));
 -      xpp.flags = xdl_opts;
 -      memset(&xecfg, 0, sizeof(xecfg));
 -      xecfg.ctxlen = 0;
 -      xdi_diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, &xpp, &xecfg);
 +      diff_hunks(&file_p, &file_o, 0, blame_chunk_cb, &d);
        /* The rest (i.e. anything after tlno) are the same as the parent */
        blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
  
@@@ -910,15 -900,12 +911,15 @@@ struct handle_split_cb_data 
        long tlno;
  };
  
 -static void handle_split_cb(void *data, long same, long p_next, long t_next)
 +static int handle_split_cb(long start_a, long count_a,
 +                         long start_b, long count_b, void *data)
  {
        struct handle_split_cb_data *d = data;
 -      handle_split(d->sb, d->ent, d->tlno, d->plno, same, d->parent, d->split);
 -      d->plno = p_next;
 -      d->tlno = t_next;
 +      handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent,
 +                   d->split);
 +      d->plno = start_a + count_a;
 +      d->tlno = start_b + count_b;
 +      return 0;
  }
  
  /*
@@@ -936,7 -923,8 +937,7 @@@ static void find_copy_in_blob(struct sc
        int cnt;
        mmfile_t file_o;
        struct handle_split_cb_data d;
 -      xpparam_t xpp;
 -      xdemitconf_t xecfg;
 +
        memset(&d, 0, sizeof(d));
        d.sb = sb; d.ent = ent; d.parent = parent; d.split = split;
        /*
         * file_o is a part of final image we are annotating.
         * file_p partially may match that image.
         */
 -      memset(&xpp, 0, sizeof(xpp));
 -      xpp.flags = xdl_opts;
 -      memset(&xecfg, 0, sizeof(xecfg));
 -      xecfg.ctxlen = 1;
        memset(split, 0, sizeof(struct blame_entry [3]));
 -      xdi_diff_hunks(file_p, &file_o, handle_split_cb, &d, &xpp, &xecfg);
 +      diff_hunks(file_p, &file_o, 1, handle_split_cb, &d);
        /* remainder, if any, all match the preimage */
        handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
  }
@@@ -1837,14 -1829,16 +1838,14 @@@ static int read_ancestry(const char *gr
        return 0;
  }
  
 -/*
 - * How many columns do we need to show line numbers in decimal?
 - */
 -static int lineno_width(int lines)
 +static int update_auto_abbrev(int auto_abbrev, struct origin *suspect)
  {
 -      int i, width;
 -
 -      for (width = 1, i = 10; i <= lines; width++)
 -              i *= 10;
 -      return width;
 +      const char *uniq = find_unique_abbrev(suspect->commit->object.sha1,
 +                                            auto_abbrev);
 +      int len = strlen(uniq);
 +      if (auto_abbrev < len)
 +              return len;
 +      return auto_abbrev;
  }
  
  /*
@@@ -1857,16 -1851,12 +1858,16 @@@ static void find_alignment(struct score
        int longest_dst_lines = 0;
        unsigned largest_score = 0;
        struct blame_entry *e;
 +      int compute_auto_abbrev = (abbrev < 0);
 +      int auto_abbrev = default_abbrev;
  
        for (e = sb->ent; e; e = e->next) {
                struct origin *suspect = e->suspect;
                struct commit_info ci;
                int num;
  
 +              if (compute_auto_abbrev)
 +                      auto_abbrev = update_auto_abbrev(auto_abbrev, suspect);
                if (strcmp(suspect->path, sb->path))
                        *option |= OUTPUT_SHOW_NAME;
                num = strlen(suspect->path);
                if (largest_score < ent_score(sb, e))
                        largest_score = ent_score(sb, e);
        }
 -      max_orig_digits = lineno_width(longest_src_lines);
 -      max_digits = lineno_width(longest_dst_lines);
 -      max_score_digits = lineno_width(largest_score);
 +      max_orig_digits = decimal_width(longest_src_lines);
 +      max_digits = decimal_width(longest_dst_lines);
 +      max_score_digits = decimal_width(largest_score);
 +
 +      if (compute_auto_abbrev)
 +              /* one more abbrev length is needed for the boundary commit */
 +              abbrev = auto_abbrev + 1;
  }
  
  /*
@@@ -2065,8 -2051,14 +2066,8 @@@ static int git_blame_config(const char 
                return 0;
        }
  
 -      switch (userdiff_config(var, value)) {
 -      case 0:
 -              break;
 -      case -1:
 +      if (userdiff_config(var, value) < 0)
                return -1;
 -      default:
 -              return 0;
 -      }
  
        return git_default_config(var, value, cb);
  }
@@@ -2123,7 -2115,7 +2124,7 @@@ static struct commit *fake_working_tree
                switch (st.st_mode & S_IFMT) {
                case S_IFREG:
                        if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
-                           textconv_object(read_from, mode, null_sha1, &buf_ptr, &buf_len))
+                           textconv_object(read_from, mode, null_sha1, 0, &buf_ptr, &buf_len))
                                strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1);
                        else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
                                die_errno("cannot open or read '%s'", read_from);
@@@ -2329,7 -2321,6 +2330,7 @@@ int cmd_blame(int argc, const char **ar
                OPT_BIT('s', NULL, &output_option, "Suppress author name and timestamp (Default: off)", OUTPUT_NO_AUTHOR),
                OPT_BIT('e', "show-email", &output_option, "Show author email instead of name (Default: off)", OUTPUT_SHOW_EMAIL),
                OPT_BIT('w', NULL, &xdl_opts, "Ignore whitespace differences", XDF_IGNORE_WHITESPACE),
 +              OPT_BIT(0, "minimal", &xdl_opts, "Spend extra cycles to find better match", XDF_NEED_MINIMAL),
                OPT_STRING('S', NULL, &revs_file, "file", "Use revisions from <file> instead of calling git-rev-list"),
                OPT_STRING(0, "contents", &contents_from, "file", "Use <file>'s contents as the final image"),
                { OPTION_CALLBACK, 'C', NULL, &opt, "score", "Find line copies within and across files", PARSE_OPT_OPTARG, blame_copy_callback },
  parse_done:
        argc = parse_options_end(&ctx);
  
 -      if (abbrev == -1)
 -              abbrev = default_abbrev;
 -      /* one more abbrev length is needed for the boundary commit */
 -      abbrev++;
 +      if (0 < abbrev)
 +              /* one more abbrev length is needed for the boundary commit */
 +              abbrev++;
  
        if (revs_file && read_ancestry(revs_file))
                die_errno("reading graft file '%s' failed", revs_file);
                        die("no such path %s in %s", path, final_commit_name);
  
                if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) &&
-                   textconv_object(path, o->mode, o->blob_sha1, (char **) &sb.final_buf,
+                   textconv_object(path, o->mode, o->blob_sha1, 1, (char **) &sb.final_buf,
                                    &sb.final_buf_size))
                        ;
                else
diff --combined builtin/cat-file.c
index 36a9104433e23422aab39b1912e998a7f54cd3f4,72205faeafcaf91bc75a1d79bd72eecd4e3a6edb..60568f9c39955e72e454c7d22b143d3e72a1a9a5
@@@ -11,7 -11,6 +11,7 @@@
  #include "parse-options.h"
  #include "diff.h"
  #include "userdiff.h"
 +#include "streaming.h"
  
  #define BATCH 1
  #define BATCH_CHECK 2
@@@ -128,8 -127,6 +128,8 @@@ static int cat_one_file(int opt, const 
                        return cmd_ls_tree(2, ls_args, NULL);
                }
  
 +              if (type == OBJ_BLOB)
 +                      return stream_blob_to_fd(1, sha1, NULL, 0);
                buf = read_sha1_file(sha1, &type, &size);
                if (!buf)
                        die("Cannot read object %s", obj_name);
                        die("git cat-file --textconv %s: <object> must be <sha1:path>",
                            obj_name);
  
-               if (!textconv_object(obj_context.path, obj_context.mode, sha1, &buf, &size))
+               if (!textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size))
                        die("git cat-file --textconv: unable to run textconv on %s",
                            obj_name);
                break;
  
        case 0:
 +              if (type_from_string(exp_type) == OBJ_BLOB) {
 +                      unsigned char blob_sha1[20];
 +                      if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
 +                              enum object_type type;
 +                              unsigned long size;
 +                              char *buffer = read_sha1_file(sha1, &type, &size);
 +                              if (memcmp(buffer, "object ", 7) ||
 +                                  get_sha1_hex(buffer + 7, blob_sha1))
 +                                      die("%s not a valid tag", sha1_to_hex(sha1));
 +                              free(buffer);
 +                      } else
 +                              hashcpy(blob_sha1, sha1);
 +
 +                      if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
 +                              return stream_blob_to_fd(1, blob_sha1, NULL, 0);
 +                      /*
 +                       * we attempted to dereference a tag to a blob
 +                       * and failed; there may be new dereference
 +                       * mechanisms this code is not aware of.
 +                       * fall-back to the usual case.
 +                       */
 +              }
                buf = read_object_with_reference(sha1, exp_type, &size, NULL);
                break;
  
@@@ -251,8 -226,14 +251,8 @@@ static const char * const cat_file_usag
  
  static int git_cat_file_config(const char *var, const char *value, void *cb)
  {
 -      switch (userdiff_config(var, value)) {
 -      case 0:
 -              break;
 -      case -1:
 +      if (userdiff_config(var, value) < 0)
                return -1;
 -      default:
 -              return 0;
 -      }
  
        return git_default_config(var, value, cb);
  }
diff --combined builtin/diff.c
index da8f6aac2bde9bb93cb059898a21c19c9bb27634,ac2b1cc63f8dcc655c353032f94b06c7c273f7a2..bf722987526f58f507acda2ea2f8da06da83d6ca
@@@ -29,6 -29,8 +29,8 @@@ static void stuff_change(struct diff_op
                         unsigned old_mode, unsigned new_mode,
                         const unsigned char *old_sha1,
                         const unsigned char *new_sha1,
+                        int old_sha1_valid,
+                        int new_sha1_valid,
                         const char *old_name,
                         const char *new_name)
  {
@@@ -54,8 -56,8 +56,8 @@@
  
        one = alloc_filespec(old_name);
        two = alloc_filespec(new_name);
-       fill_filespec(one, old_sha1, old_mode);
-       fill_filespec(two, new_sha1, new_mode);
+       fill_filespec(one, old_sha1, old_sha1_valid, old_mode);
+       fill_filespec(two, new_sha1, new_sha1_valid, new_mode);
  
        diff_queue(&diff_queued_diff, one, two);
  }
@@@ -84,6 -86,7 +86,7 @@@ static int builtin_diff_b_f(struct rev_
        stuff_change(&revs->diffopt,
                     blob[0].mode, canon_mode(st.st_mode),
                     blob[0].sha1, null_sha1,
+                    1, 0,
                     path, path);
        diffcore_std(&revs->diffopt);
        diff_flush(&revs->diffopt);
@@@ -108,6 -111,7 +111,7 @@@ static int builtin_diff_blobs(struct re
        stuff_change(&revs->diffopt,
                     blob[0].mode, blob[1].mode,
                     blob[0].sha1, blob[1].sha1,
+                    1, 1,
                     blob[0].name, blob[1].name);
        diffcore_std(&revs->diffopt);
        diff_flush(&revs->diffopt);
@@@ -285,10 -289,6 +289,10 @@@ int cmd_diff(int argc, const char **arg
        /* Otherwise, we are doing the usual "git" diff */
        rev.diffopt.skip_stat_unmatch = !!diff_auto_refresh_index;
  
 +      /* Scale to real terminal size and respect statGraphWidth config */
 +      rev.diffopt.stat_width = -1;
 +      rev.diffopt.stat_graph_width = -1;
 +
        /* Default to let external and textconv be used */
        DIFF_OPT_SET(&rev.diffopt, ALLOW_EXTERNAL);
        DIFF_OPT_SET(&rev.diffopt, ALLOW_TEXTCONV);
  
        DIFF_OPT_SET(&rev.diffopt, RECURSIVE);
  
 -      /*
 -       * If the user asked for our exit code then don't start a
 -       * pager or we would end up reporting its exit code instead.
 -       */
 -      if (!DIFF_OPT_TST(&rev.diffopt, EXIT_WITH_STATUS) &&
 -          check_pager_config("diff") != 0)
 -              setup_pager();
 +      setup_diff_pager(&rev.diffopt);
  
        /*
         * Do we have --cached and not have a pending object, then
                                add_head_to_pending(&rev);
                                if (!rev.pending.nr) {
                                        struct tree *tree;
 -                                      tree = lookup_tree((const unsigned char*)EMPTY_TREE_SHA1_BIN);
 +                                      tree = lookup_tree(EMPTY_TREE_SHA1_BIN);
                                        add_pending_object(&rev, &tree->object, "HEAD");
                                }
                                break;
                refresh_index_quietly();
        return result;
  }
 +
 +void setup_diff_pager(struct diff_options *opt)
 +{
 +      /*
 +       * If the user asked for our exit code, then either they want --quiet
 +       * or --exit-code. We should definitely not bother with a pager in the
 +       * former case, as we will generate no output. Since we still properly
 +       * report our exit code even when a pager is run, we _could_ run a
 +       * pager with --exit-code. But since we have not done so historically,
 +       * and because it is easy to find people oneline advising "git diff
 +       * --exit-code" in hooks and other scripts, we do not do so.
 +       */
 +      if (!DIFF_OPT_TST(opt, EXIT_WITH_STATUS) &&
 +          check_pager_config("diff") != 0)
 +              setup_pager();
 +}
diff --combined combine-diff.c
index 978668036835e16df4b6bfd37a7b1e9f8494cf07,e9abdbd0b9b65c4ff3b36f0bd1cceeb92edd580f..bb1cc96c4e73c90ee327858aa3b36cf2bfe043a4
@@@ -111,7 -111,7 +111,7 @@@ static char *grab_blob(const unsigned c
                return xcalloc(1, 1);
        } else if (textconv) {
                struct diff_filespec *df = alloc_filespec(path);
-               fill_filespec(df, sha1, mode);
+               fill_filespec(df, sha1, 1, mode);
                *size = fill_textconv(textconv, df, &blob);
                free_filespec(df);
        } else {
@@@ -423,7 -423,7 +423,7 @@@ static int make_hunks(struct sline *sli
                                                     hunk_begin, j);
                                la = (la + context < cnt + 1) ?
                                        (la + context) : cnt + 1;
 -                              while (j <= --la) {
 +                              while (la && j <= --la) {
                                        if (sline[la].flag & mark) {
                                                contin = 1;
                                                break;
@@@ -823,7 -823,7 +823,7 @@@ static void show_patch_diff(struct comb
                                                   &result_size, NULL, NULL);
                } else if (textconv) {
                        struct diff_filespec *df = alloc_filespec(elem->path);
-                       fill_filespec(df, null_sha1, st.st_mode);
+                       fill_filespec(df, null_sha1, 0, st.st_mode);
                        result_size = fill_textconv(textconv, df, &result);
                        free_filespec(df);
                } else if (0 <= (fd = open(elem->path, O_RDONLY))) {
diff --combined diff-no-index.c
index 7d805a06afacae7eaa36a192e3a16406ef0fb41f,6568eea6f4a465a40f522584a6fcaf92841f16e7..0b46a0f79f1376a935e4a0a408a224de711b60cb
@@@ -32,13 -32,6 +32,13 @@@ static int read_directory(const char *p
        return 0;
  }
  
 +/*
 + * This should be "(standard input)" or something, but it will
 + * probably expose many more breakages in the way no-index code
 + * is bolted onto the diff callchain.
 + */
 +static const char file_from_standard_input[] = "-";
 +
  static int get_mode(const char *path, int *mode)
  {
        struct stat st;
@@@ -49,7 -42,7 +49,7 @@@
        else if (!strcasecmp(path, "nul"))
                *mode = 0;
  #endif
 -      else if (!strcmp(path, "-"))
 +      else if (path == file_from_standard_input)
                *mode = create_ce_mode(0666);
        else if (lstat(path, &st))
                return error("Could not access '%s'", path);
        return 0;
  }
  
-       fill_filespec(s, null_sha1, mode);
 +static int populate_from_stdin(struct diff_filespec *s)
 +{
 +      struct strbuf buf = STRBUF_INIT;
 +      size_t size = 0;
 +
 +      if (strbuf_read(&buf, 0, 0) < 0)
 +              return error("error while reading from stdin %s",
 +                                   strerror(errno));
 +
 +      s->should_munmap = 0;
 +      s->data = strbuf_detach(&buf, &size);
 +      s->size = size;
 +      s->should_free = 1;
 +      s->is_stdin = 1;
 +      return 0;
 +}
 +
 +static struct diff_filespec *noindex_filespec(const char *name, int mode)
 +{
 +      struct diff_filespec *s;
 +
 +      if (!name)
 +              name = "/dev/null";
 +      s = alloc_filespec(name);
++      fill_filespec(s, null_sha1, 0, mode);
 +      if (name == file_from_standard_input)
 +              populate_from_stdin(s);
 +      return s;
 +}
 +
  static int queue_diff(struct diff_options *o,
 -              const char *name1, const char *name2)
 +                    const char *name1, const char *name2)
  {
        int mode1 = 0, mode2 = 0;
  
                return error("file/directory conflict: %s, %s", name1, name2);
  
        if (S_ISDIR(mode1) || S_ISDIR(mode2)) {
 -              char buffer1[PATH_MAX], buffer2[PATH_MAX];
 +              struct strbuf buffer1 = STRBUF_INIT;
 +              struct strbuf buffer2 = STRBUF_INIT;
                struct string_list p1 = STRING_LIST_INIT_DUP;
                struct string_list p2 = STRING_LIST_INIT_DUP;
 -              int len1 = 0, len2 = 0, i1, i2, ret = 0;
 +              int i1, i2, ret = 0;
 +              size_t len1 = 0, len2 = 0;
  
                if (name1 && read_directory(name1, &p1))
                        return -1;
                }
  
                if (name1) {
 -                      len1 = strlen(name1);
 -                      if (len1 > 0 && name1[len1 - 1] == '/')
 -                              len1--;
 -                      memcpy(buffer1, name1, len1);
 -                      buffer1[len1++] = '/';
 +                      strbuf_addstr(&buffer1, name1);
 +                      if (buffer1.len && buffer1.buf[buffer1.len - 1] != '/')
 +                              strbuf_addch(&buffer1, '/');
 +                      len1 = buffer1.len;
                }
  
                if (name2) {
 -                      len2 = strlen(name2);
 -                      if (len2 > 0 && name2[len2 - 1] == '/')
 -                              len2--;
 -                      memcpy(buffer2, name2, len2);
 -                      buffer2[len2++] = '/';
 +                      strbuf_addstr(&buffer2, name2);
 +                      if (buffer2.len && buffer2.buf[buffer2.len - 1] != '/')
 +                              strbuf_addch(&buffer2, '/');
 +                      len2 = buffer2.len;
                }
  
                for (i1 = i2 = 0; !ret && (i1 < p1.nr || i2 < p2.nr); ) {
                        const char *n1, *n2;
                        int comp;
  
 +                      strbuf_setlen(&buffer1, len1);
 +                      strbuf_setlen(&buffer2, len2);
 +
                        if (i1 == p1.nr)
                                comp = 1;
                        else if (i2 == p2.nr)
                                comp = -1;
                        else
 -                              comp = strcmp(p1.items[i1].string,
 -                                      p2.items[i2].string);
 +                              comp = strcmp(p1.items[i1].string, p2.items[i2].string);
  
                        if (comp > 0)
                                n1 = NULL;
                        else {
 -                              n1 = buffer1;
 -                              strncpy(buffer1 + len1, p1.items[i1++].string,
 -                                              PATH_MAX - len1);
 +                              strbuf_addstr(&buffer1, p1.items[i1++].string);
 +                              n1 = buffer1.buf;
                        }
  
                        if (comp < 0)
                                n2 = NULL;
                        else {
 -                              n2 = buffer2;
 -                              strncpy(buffer2 + len2, p2.items[i2++].string,
 -                                              PATH_MAX - len2);
 +                              strbuf_addstr(&buffer2, p2.items[i2++].string);
 +                              n2 = buffer2.buf;
                        }
  
                        ret = queue_diff(o, n1, n2);
                }
                string_list_clear(&p1, 0);
                string_list_clear(&p2, 0);
 +              strbuf_release(&buffer1);
 +              strbuf_release(&buffer2);
  
                return ret;
        } else {
                        tmp_c = name1; name1 = name2; name2 = tmp_c;
                }
  
 -              if (!name1)
 -                      name1 = "/dev/null";
 -              if (!name2)
 -                      name2 = "/dev/null";
 -              d1 = alloc_filespec(name1);
 -              d2 = alloc_filespec(name2);
 -              fill_filespec(d1, null_sha1, 0, mode1);
 -              fill_filespec(d2, null_sha1, 0, mode2);
 -
 +              d1 = noindex_filespec(name1, mode1);
 +              d2 = noindex_filespec(name2, mode2);
                diff_queue(&diff_queued_diff, d1, d2);
                return 0;
        }
  }
  
 -static int path_outside_repo(const char *path)
 -{
 -      const char *work_tree;
 -      size_t len;
 -
 -      if (!is_absolute_path(path))
 -              return 0;
 -      work_tree = get_git_work_tree();
 -      if (!work_tree)
 -              return 1;
 -      len = strlen(work_tree);
 -      if (strncmp(path, work_tree, len) ||
 -          (path[len] != '\0' && path[len] != '/'))
 -              return 1;
 -      return 0;
 -}
 -
  void diff_no_index(struct rev_info *revs,
                   int argc, const char **argv,
                   int nongit, const char *prefix)
  {
 -      int i;
 +      int i, prefixlen;
        int no_index = 0;
        unsigned options = 0;
 +      const char *paths[2];
  
        /* Were we asked to do --no-index explicitly? */
        for (i = 1; i < argc; i++) {
                 * a colourful "diff" replacement.
                 */
                if ((argc != i + 2) ||
 -                  (!path_outside_repo(argv[i]) &&
 -                   !path_outside_repo(argv[i+1])))
 +                  (path_inside_repo(prefix, argv[i]) &&
 +                   path_inside_repo(prefix, argv[i+1])))
                        return;
        }
        if (argc != i + 2)
                }
        }
  
 -      /*
 -       * If the user asked for our exit code then don't start a
 -       * pager or we would end up reporting its exit code instead.
 -       */
 -      if (!DIFF_OPT_TST(&revs->diffopt, EXIT_WITH_STATUS))
 -              setup_pager();
 -
 -      if (prefix) {
 -              int len = strlen(prefix);
 -              const char *paths[3];
 -              memset(paths, 0, sizeof(paths));
 -
 -              for (i = 0; i < 2; i++) {
 -                      const char *p = argv[argc - 2 + i];
 +      prefixlen = prefix ? strlen(prefix) : 0;
 +      for (i = 0; i < 2; i++) {
 +              const char *p = argv[argc - 2 + i];
 +              if (!strcmp(p, "-"))
                        /*
 -                       * stdin should be spelled as '-'; if you have
 -                       * path that is '-', spell it as ./-.
 +                       * stdin should be spelled as "-"; if you have
 +                       * path that is "-", spell it as "./-".
                         */
 -                      p = (strcmp(p, "-")
 -                           ? xstrdup(prefix_filename(prefix, len, p))
 -                           : p);
 -                      paths[i] = p;
 -              }
 -              diff_tree_setup_paths(paths, &revs->diffopt);
 +                      p = file_from_standard_input;
 +              else if (prefixlen)
 +                      p = xstrdup(prefix_filename(prefix, prefixlen, p));
 +              paths[i] = p;
        }
 -      else
 -              diff_tree_setup_paths(argv + argc - 2, &revs->diffopt);
        revs->diffopt.skip_stat_unmatch = 1;
        if (!revs->diffopt.output_format)
                revs->diffopt.output_format = DIFF_FORMAT_PATCH;
  
 -      DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS);
        DIFF_OPT_SET(&revs->diffopt, NO_INDEX);
  
        revs->max_count = -2;
        if (diff_setup_done(&revs->diffopt) < 0)
                die("diff_setup_done failed");
  
 -      if (queue_diff(&revs->diffopt, revs->diffopt.pathspec.raw[0],
 -                     revs->diffopt.pathspec.raw[1]))
 +      setup_diff_pager(&revs->diffopt);
 +      DIFF_OPT_SET(&revs->diffopt, EXIT_WITH_STATUS);
 +
 +      if (queue_diff(&revs->diffopt, paths[0], paths[1]))
                exit(1);
        diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/");
        diffcore_std(&revs->diffopt);
         * The return code for --no-index imitates diff(1):
         * 0 = no changes, 1 = changes, else error
         */
 -      exit(revs->diffopt.found_changes);
 +      exit(diff_result_code(&revs->diffopt, 0));
  }
diff --combined diff.c
index 62cbe141efb411e831484fc36c0cba95a2b8846b,8933dd19969124e33a6433aaa54d3ea54e573a33..15125e8894c61f895c8f054cac0e8890210d3b7b
--- 1/diff.c
--- 2/diff.c
+++ b/diff.c
@@@ -31,7 -31,6 +31,7 @@@ static const char *external_diff_cmd_cf
  int diff_auto_refresh_index = 1;
  static int diff_mnemonic_prefix;
  static int diff_no_prefix;
 +static int diff_stat_graph_width;
  static int diff_dirstat_permille_default = 30;
  static struct diff_options default_diff_options;
  
@@@ -157,10 -156,6 +157,10 @@@ int git_diff_ui_config(const char *var
                diff_no_prefix = git_config_bool(var, value);
                return 0;
        }
 +      if (!strcmp(var, "diff.statgraphwidth")) {
 +              diff_stat_graph_width = git_config_int(var, value);
 +              return 0;
 +      }
        if (!strcmp(var, "diff.external"))
                return git_config_string(&external_diff_cmd_cfg, var, value);
        if (!strcmp(var, "diff.wordregex"))
@@@ -182,8 -177,11 +182,8 @@@ int git_diff_basic_config(const char *v
                return 0;
        }
  
 -      switch (userdiff_config(var, value)) {
 -              case 0: break;
 -              case -1: return -1;
 -              default: return 0;
 -      }
 +      if (userdiff_config(var, value) < 0)
 +              return -1;
  
        if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
                int slot = parse_diff_color_slot(var, 11);
@@@ -989,74 -987,10 +989,74 @@@ static void diff_words_flush(struct emi
                diff_words_show(ecbdata->diff_words);
  }
  
 +static void diff_filespec_load_driver(struct diff_filespec *one)
 +{
 +      /* Use already-loaded driver */
 +      if (one->driver)
 +              return;
 +
 +      if (S_ISREG(one->mode))
 +              one->driver = userdiff_find_by_path(one->path);
 +
 +      /* Fallback to default settings */
 +      if (!one->driver)
 +              one->driver = userdiff_find_by_name("default");
 +}
 +
 +static const char *userdiff_word_regex(struct diff_filespec *one)
 +{
 +      diff_filespec_load_driver(one);
 +      return one->driver->word_regex;
 +}
 +
 +static void init_diff_words_data(struct emit_callback *ecbdata,
 +                               struct diff_options *orig_opts,
 +                               struct diff_filespec *one,
 +                               struct diff_filespec *two)
 +{
 +      int i;
 +      struct diff_options *o = xmalloc(sizeof(struct diff_options));
 +      memcpy(o, orig_opts, sizeof(struct diff_options));
 +
 +      ecbdata->diff_words =
 +              xcalloc(1, sizeof(struct diff_words_data));
 +      ecbdata->diff_words->type = o->word_diff;
 +      ecbdata->diff_words->opt = o;
 +      if (!o->word_regex)
 +              o->word_regex = userdiff_word_regex(one);
 +      if (!o->word_regex)
 +              o->word_regex = userdiff_word_regex(two);
 +      if (!o->word_regex)
 +              o->word_regex = diff_word_regex_cfg;
 +      if (o->word_regex) {
 +              ecbdata->diff_words->word_regex = (regex_t *)
 +                      xmalloc(sizeof(regex_t));
 +              if (regcomp(ecbdata->diff_words->word_regex,
 +                          o->word_regex,
 +                          REG_EXTENDED | REG_NEWLINE))
 +                      die ("Invalid regular expression: %s",
 +                           o->word_regex);
 +      }
 +      for (i = 0; i < ARRAY_SIZE(diff_words_styles); i++) {
 +              if (o->word_diff == diff_words_styles[i].type) {
 +                      ecbdata->diff_words->style =
 +                              &diff_words_styles[i];
 +                      break;
 +              }
 +      }
 +      if (want_color(o->use_color)) {
 +              struct diff_words_style *st = ecbdata->diff_words->style;
 +              st->old.color = diff_get_color_opt(o, DIFF_FILE_OLD);
 +              st->new.color = diff_get_color_opt(o, DIFF_FILE_NEW);
 +              st->ctx.color = diff_get_color_opt(o, DIFF_PLAIN);
 +      }
 +}
 +
  static void free_diff_words_data(struct emit_callback *ecbdata)
  {
        if (ecbdata->diff_words) {
                diff_words_flush(ecbdata);
 +              free (ecbdata->diff_words->opt);
                free (ecbdata->diff_words->minus.text.ptr);
                free (ecbdata->diff_words->minus.orig);
                free (ecbdata->diff_words->plus.text.ptr);
@@@ -1443,8 -1377,8 +1443,8 @@@ static void show_stats(struct diffstat_
  {
        int i, len, add, del, adds = 0, dels = 0;
        uintmax_t max_change = 0, max_len = 0;
 -      int total_files = data->nr;
 -      int width, name_width, count;
 +      int total_files = data->nr, count;
 +      int width, name_width, graph_width, number_width = 0, bin_width = 0;
        const char *reset, *add_c, *del_c;
        const char *line_prefix = "";
        int extra_shown = 0;
                line_prefix = msg->buf;
        }
  
 -      width = options->stat_width ? options->stat_width : 80;
 -      name_width = options->stat_name_width ? options->stat_name_width : 50;
        count = options->stat_count ? options->stat_count : data->nr;
  
 -      /* Sanity: give at least 5 columns to the graph,
 -       * but leave at least 10 columns for the name.
 -       */
 -      if (width < 25)
 -              width = 25;
 -      if (name_width < 10)
 -              name_width = 10;
 -      else if (width < name_width + 15)
 -              name_width = width - 15;
 -
 -      /* Find the longest filename and max number of changes */
        reset = diff_get_color_opt(options, DIFF_RESET);
        add_c = diff_get_color_opt(options, DIFF_FILE_NEW);
        del_c = diff_get_color_opt(options, DIFF_FILE_OLD);
  
 +      /*
 +       * Find the longest filename and max number of changes
 +       */
        for (i = 0; (i < count) && (i < data->nr); i++) {
                struct diffstat_file *file = data->files[i];
                uintmax_t change = file->added + file->deleted;
                if (max_len < len)
                        max_len = len;
  
 -              if (file->is_binary || file->is_unmerged)
 +              if (file->is_unmerged) {
 +                      /* "Unmerged" is 8 characters */
 +                      bin_width = bin_width < 8 ? 8 : bin_width;
 +                      continue;
 +              }
 +              if (file->is_binary) {
 +                      /* "Bin XXX -> YYY bytes" */
 +                      int w = 14 + decimal_width(file->added)
 +                              + decimal_width(file->deleted);
 +                      bin_width = bin_width < w ? w : bin_width;
 +                      /* Display change counts aligned with "Bin" */
 +                      number_width = 3;
                        continue;
 +              }
 +
                if (max_change < change)
                        max_change = change;
        }
        count = i; /* min(count, data->nr) */
  
 -      /* Compute the width of the graph part;
 -       * 10 is for one blank at the beginning of the line plus
 -       * " | count " between the name and the graph.
 +      /*
 +       * We have width = stat_width or term_columns() columns total.
 +       * We want a maximum of min(max_len, stat_name_width) for the name part.
 +       * We want a maximum of min(max_change, stat_graph_width) for the +- part.
 +       * We also need 1 for " " and 4 + decimal_width(max_change)
 +       * for " | NNNN " and one the empty column at the end, altogether
 +       * 6 + decimal_width(max_change).
 +       *
 +       * If there's not enough space, we will use the smaller of
 +       * stat_name_width (if set) and 5/8*width for the filename,
 +       * and the rest for constant elements + graph part, but no more
 +       * than stat_graph_width for the graph part.
 +       * (5/8 gives 50 for filename and 30 for the constant parts + graph
 +       * for the standard terminal size).
         *
 -       * From here on, name_width is the width of the name area,
 -       * and width is the width of the graph area.
 +       * In other words: stat_width limits the maximum width, and
 +       * stat_name_width fixes the maximum width of the filename,
 +       * and is also used to divide available columns if there
 +       * aren't enough.
 +       *
 +       * Binary files are displayed with "Bin XXX -> YYY bytes"
 +       * instead of the change count and graph. This part is treated
 +       * similarly to the graph part, except that it is not
 +       * "scaled". If total width is too small to accomodate the
 +       * guaranteed minimum width of the filename part and the
 +       * separators and this message, this message will "overflow"
 +       * making the line longer than the maximum width.
         */
 -      name_width = (name_width < max_len) ? name_width : max_len;
 -      if (width < (name_width + 10) + max_change)
 -              width = width - (name_width + 10);
 +
 +      if (options->stat_width == -1)
 +              width = term_columns() - options->output_prefix_length;
        else
 -              width = max_change;
 +              width = options->stat_width ? options->stat_width : 80;
 +      number_width = decimal_width(max_change) > number_width ?
 +              decimal_width(max_change) : number_width;
 +
 +      if (options->stat_graph_width == -1)
 +              options->stat_graph_width = diff_stat_graph_width;
  
 +      /*
 +       * Guarantee 3/8*16==6 for the graph part
 +       * and 5/8*16==10 for the filename part
 +       */
 +      if (width < 16 + 6 + number_width)
 +              width = 16 + 6 + number_width;
 +
 +      /*
 +       * First assign sizes that are wanted, ignoring available width.
 +       * strlen("Bin XXX -> YYY bytes") == bin_width, and the part
 +       * starting from "XXX" should fit in graph_width.
 +       */
 +      graph_width = max_change + 4 > bin_width ? max_change : bin_width - 4;
 +      if (options->stat_graph_width &&
 +          options->stat_graph_width < graph_width)
 +              graph_width = options->stat_graph_width;
 +
 +      name_width = (options->stat_name_width > 0 &&
 +                    options->stat_name_width < max_len) ?
 +              options->stat_name_width : max_len;
 +
 +      /*
 +       * Adjust adjustable widths not to exceed maximum width
 +       */
 +      if (name_width + number_width + 6 + graph_width > width) {
 +              if (graph_width > width * 3/8 - number_width - 6) {
 +                      graph_width = width * 3/8 - number_width - 6;
 +                      if (graph_width < 6)
 +                              graph_width = 6;
 +              }
 +
 +              if (options->stat_graph_width &&
 +                  graph_width > options->stat_graph_width)
 +                      graph_width = options->stat_graph_width;
 +              if (name_width > width - number_width - 6 - graph_width)
 +                      name_width = width - number_width - 6 - graph_width;
 +              else
 +                      graph_width = width - number_width - 6 - name_width;
 +      }
 +
 +      /*
 +       * From here name_width is the width of the name area,
 +       * and graph_width is the width of the graph area.
 +       * max_change is used to scale graph properly.
 +       */
        for (i = 0; i < count; i++) {
                const char *prefix = "";
                char *name = data->files[i]->print_name;
                if (data->files[i]->is_binary) {
                        fprintf(options->file, "%s", line_prefix);
                        show_name(options->file, prefix, name, len);
 -                      fprintf(options->file, "  Bin ");
 -                      fprintf(options->file, "%s%"PRIuMAX"%s",
 +                      fprintf(options->file, " %*s", number_width, "Bin");
 +                      if (!added && !deleted) {
 +                              putc('\n', options->file);
 +                              continue;
 +                      }
 +                      fprintf(options->file, " %s%"PRIuMAX"%s",
                                del_c, deleted, reset);
                        fprintf(options->file, " -> ");
                        fprintf(options->file, "%s%"PRIuMAX"%s",
                else if (data->files[i]->is_unmerged) {
                        fprintf(options->file, "%s", line_prefix);
                        show_name(options->file, prefix, name, len);
 -                      fprintf(options->file, "  Unmerged\n");
 +                      fprintf(options->file, " Unmerged\n");
                        continue;
                }
  
                adds += add;
                dels += del;
  
 -              if (width <= max_change) {
 +              if (graph_width <= max_change) {
                        int total = add + del;
  
 -                      total = scale_linear(add + del, width, max_change);
 +                      total = scale_linear(add + del, graph_width, max_change);
                        if (total < 2 && add && del)
                                /* width >= 2 due to the sanity check */
                                total = 2;
                        if (add < del) {
 -                              add = scale_linear(add, width, max_change);
 +                              add = scale_linear(add, graph_width, max_change);
                                del = total - add;
                        } else {
 -                              del = scale_linear(del, width, max_change);
 +                              del = scale_linear(del, graph_width, max_change);
                                add = total - del;
                        }
                }
                fprintf(options->file, "%s", line_prefix);
                show_name(options->file, prefix, name, len);
 -              fprintf(options->file, "%5"PRIuMAX"%s", added + deleted,
 -                              added + deleted ? " " : "");
 +              fprintf(options->file, " %*"PRIuMAX"%s",
 +                      number_width, added + deleted,
 +                      added + deleted ? " " : "");
                show_graph(options->file, '+', add, add_c, reset);
                show_graph(options->file, '-', del, del_c, reset);
                fprintf(options->file, "\n");
@@@ -1693,16 -1548,17 +1693,16 @@@ static void show_shortstats(struct diff
                return;
  
        for (i = 0; i < data->nr; i++) {
 -              if (!data->files[i]->is_binary &&
 -                  !data->files[i]->is_unmerged) {
 -                      int added = data->files[i]->added;
 -                      int deleted= data->files[i]->deleted;
 -                      if (!data->files[i]->is_renamed &&
 -                          (added + deleted == 0)) {
 -                              total_files--;
 -                      } else {
 -                              adds += added;
 -                              dels += deleted;
 -                      }
 +              int added = data->files[i]->added;
 +              int deleted= data->files[i]->deleted;
 +
 +              if (data->files[i]->is_unmerged)
 +                      continue;
 +              if (!data->files[i]->is_renamed && (added + deleted == 0)) {
 +                      total_files--;
 +              } else if (!data->files[i]->is_binary) { /* don't count bytes */
 +                      adds += added;
 +                      dels += deleted;
                }
        }
        if (options->output_prefix) {
@@@ -2160,6 -2016,20 +2160,6 @@@ static void emit_binary_diff(FILE *file
        emit_binary_diff_body(file, two, one, prefix);
  }
  
 -static void diff_filespec_load_driver(struct diff_filespec *one)
 -{
 -      /* Use already-loaded driver */
 -      if (one->driver)
 -              return;
 -
 -      if (S_ISREG(one->mode))
 -              one->driver = userdiff_find_by_path(one->path);
 -
 -      /* Fallback to default settings */
 -      if (!one->driver)
 -              one->driver = userdiff_find_by_name("default");
 -}
 -
  int diff_filespec_is_binary(struct diff_filespec *one)
  {
        if (one->is_binary == -1) {
@@@ -2185,6 -2055,12 +2185,6 @@@ static const struct userdiff_funcname *
        return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
  }
  
 -static const char *userdiff_word_regex(struct diff_filespec *one)
 -{
 -      diff_filespec_load_driver(one);
 -      return one->driver->word_regex;
 -}
 -
  void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
  {
        if (!options->a_prefix)
@@@ -2371,8 -2247,42 +2371,8 @@@ static void builtin_diff(const char *na
                        xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10);
                else if (!prefixcmp(diffopts, "-u"))
                        xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10);
 -              if (o->word_diff) {
 -                      int i;
 -
 -                      ecbdata.diff_words =
 -                              xcalloc(1, sizeof(struct diff_words_data));
 -                      ecbdata.diff_words->type = o->word_diff;
 -                      ecbdata.diff_words->opt = o;
 -                      if (!o->word_regex)
 -                              o->word_regex = userdiff_word_regex(one);
 -                      if (!o->word_regex)
 -                              o->word_regex = userdiff_word_regex(two);
 -                      if (!o->word_regex)
 -                              o->word_regex = diff_word_regex_cfg;
 -                      if (o->word_regex) {
 -                              ecbdata.diff_words->word_regex = (regex_t *)
 -                                      xmalloc(sizeof(regex_t));
 -                              if (regcomp(ecbdata.diff_words->word_regex,
 -                                              o->word_regex,
 -                                              REG_EXTENDED | REG_NEWLINE))
 -                                      die ("Invalid regular expression: %s",
 -                                                      o->word_regex);
 -                      }
 -                      for (i = 0; i < ARRAY_SIZE(diff_words_styles); i++) {
 -                              if (o->word_diff == diff_words_styles[i].type) {
 -                                      ecbdata.diff_words->style =
 -                                              &diff_words_styles[i];
 -                                      break;
 -                              }
 -                      }
 -                      if (want_color(o->use_color)) {
 -                              struct diff_words_style *st = ecbdata.diff_words->style;
 -                              st->old.color = diff_get_color_opt(o, DIFF_FILE_OLD);
 -                              st->new.color = diff_get_color_opt(o, DIFF_FILE_NEW);
 -                              st->ctx.color = diff_get_color_opt(o, DIFF_PLAIN);
 -                      }
 -              }
 +              if (o->word_diff)
 +                      init_diff_words_data(&ecbdata, o, one, two);
                xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
                              &xpp, &xecfg);
                if (o->word_diff)
@@@ -2402,7 -2312,6 +2402,7 @@@ static void builtin_diffstat(const cha
  {
        mmfile_t mf1, mf2;
        struct diffstat_file *data;
 +      int same_contents;
  
        data = diffstat_add(diffstat, name_a, name_b);
  
                return;
        }
  
 +      same_contents = !hashcmp(one->sha1, two->sha1);
 +
        if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) {
                data->is_binary = 1;
 -              data->added = diff_filespec_size(two);
 -              data->deleted = diff_filespec_size(one);
 +              if (same_contents) {
 +                      data->added = 0;
 +                      data->deleted = 0;
 +              } else {
 +                      data->added = diff_filespec_size(two);
 +                      data->deleted = diff_filespec_size(one);
 +              }
        }
  
        else if (complete_rewrite) {
                data->added = count_lines(two->data, two->size);
        }
  
 -      else {
 +      else if (!same_contents) {
                /* Crazy xdl interfaces.. */
                xpparam_t xpp;
                xdemitconf_t xecfg;
@@@ -2541,12 -2443,12 +2541,12 @@@ void free_filespec(struct diff_filespe
  }
  
  void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
-                  unsigned short mode)
+                  int sha1_valid, unsigned short mode)
  {
        if (mode) {
                spec->mode = canon_mode(mode);
                hashcpy(spec->sha1, sha1);
-               spec->sha1_valid = !is_null_sha1(sha1);
+               spec->sha1_valid = sha1_valid;
        }
  }
  
@@@ -2619,6 -2521,22 +2619,6 @@@ static int reuse_worktree_file(const ch
        return 0;
  }
  
 -static int populate_from_stdin(struct diff_filespec *s)
 -{
 -      struct strbuf buf = STRBUF_INIT;
 -      size_t size = 0;
 -
 -      if (strbuf_read(&buf, 0, 0) < 0)
 -              return error("error while reading from stdin %s",
 -                                   strerror(errno));
 -
 -      s->should_munmap = 0;
 -      s->data = strbuf_detach(&buf, &size);
 -      s->size = size;
 -      s->should_free = 1;
 -      return 0;
 -}
 -
  static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
  {
        int len;
@@@ -2668,6 -2586,9 +2668,6 @@@ int diff_populate_filespec(struct diff_
                struct stat st;
                int fd;
  
 -              if (!strcmp(s->path, "-"))
 -                      return populate_from_stdin(s);
 -
                if (lstat(s->path, &st) < 0) {
                        if (errno == ENOENT) {
                        err_empty:
@@@ -2992,8 -2913,9 +2992,8 @@@ static void run_diff_cmd(const char *pg
        int complete_rewrite = (p->status == DIFF_STATUS_MODIFIED) && p->score;
        int must_show_header = 0;
  
 -      if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL))
 -              pgm = NULL;
 -      else {
 +
 +      if (DIFF_OPT_TST(o, ALLOW_EXTERNAL)) {
                struct userdiff_driver *drv = userdiff_find_by_path(attr_path);
                if (drv && drv->external)
                        pgm = drv->external;
@@@ -3028,7 -2950,7 +3028,7 @@@ static void diff_fill_sha1_info(struct 
        if (DIFF_FILE_VALID(one)) {
                if (!one->sha1_valid) {
                        struct stat st;
 -                      if (!strcmp(one->path, "-")) {
 +                      if (one->is_stdin) {
                                hashcpy(one->sha1, null_sha1);
                                return;
                        }
@@@ -3073,9 -2995,6 +3073,9 @@@ static void run_diff(struct diff_filepa
        if (o->prefix_length)
                strip_prefix(o->prefix_length, &name, &other);
  
 +      if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL))
 +              pgm = NULL;
 +
        if (DIFF_PAIR_UNMERGED(p)) {
                run_diff_cmd(pgm, name, NULL, attr_path,
                             NULL, NULL, NULL, o, p);
@@@ -3172,7 -3091,6 +3172,7 @@@ void diff_setup(struct diff_options *op
        options->rename_limit = -1;
        options->dirstat_permille = diff_dirstat_permille_default;
        options->context = 3;
 +      DIFF_OPT_SET(options, RENAME_EMPTY);
  
        options->change = diff_change;
        options->add_remove = diff_addremove;
@@@ -3384,7 -3302,6 +3384,7 @@@ static int stat_opt(struct diff_option
        char *end;
        int width = options->stat_width;
        int name_width = options->stat_name_width;
 +      int graph_width = options->stat_graph_width;
        int count = options->stat_count;
        int argcount = 1;
  
                                name_width = strtoul(av[1], &end, 10);
                                argcount = 2;
                        }
 +              } else if (!prefixcmp(arg, "-graph-width")) {
 +                      arg += strlen("-graph-width");
 +                      if (*arg == '=')
 +                              graph_width = strtoul(arg + 1, &end, 10);
 +                      else if (!*arg && !av[1])
 +                              die("Option '--stat-graph-width' requires a value");
 +                      else if (!*arg) {
 +                              graph_width = strtoul(av[1], &end, 10);
 +                              argcount = 2;
 +                      }
                } else if (!prefixcmp(arg, "-count")) {
                        arg += strlen("-count");
                        if (*arg == '=')
                return 0;
        options->output_format |= DIFF_FORMAT_DIFFSTAT;
        options->stat_name_width = name_width;
 +      options->stat_graph_width = graph_width;
        options->stat_width = width;
        options->stat_count = count;
        return argcount;
@@@ -3543,10 -3449,6 +3543,10 @@@ int diff_opt_parse(struct diff_options 
        }
        else if (!strcmp(arg, "--no-renames"))
                options->detect_rename = 0;
 +      else if (!strcmp(arg, "--rename-empty"))
 +              DIFF_OPT_SET(options, RENAME_EMPTY);
 +      else if (!strcmp(arg, "--no-rename-empty"))
 +              DIFF_OPT_CLR(options, RENAME_EMPTY);
        else if (!strcmp(arg, "--relative"))
                DIFF_OPT_SET(options, RELATIVE_NAME);
        else if (!prefixcmp(arg, "--relative=")) {
        else if (!strcmp(arg, "--ignore-space-at-eol"))
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
        else if (!strcmp(arg, "--patience"))
 -              DIFF_XDL_SET(options, PATIENCE_DIFF);
 +              options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
        else if (!strcmp(arg, "--histogram"))
 -              DIFF_XDL_SET(options, HISTOGRAM_DIFF);
 +              options->xdl_opts = DIFF_WITH_ALG(options, HISTOGRAM_DIFF);
  
        /* flags options */
        else if (!strcmp(arg, "--binary")) {
@@@ -4440,12 -4342,6 +4440,12 @@@ void diff_flush(struct diff_options *op
  
        if (output_format & DIFF_FORMAT_PATCH) {
                if (separator) {
 +                      if (options->output_prefix) {
 +                              struct strbuf *msg = NULL;
 +                              msg = options->output_prefix(options,
 +                                      options->output_prefix_data);
 +                              fwrite(msg->buf, msg->len, 1, stdout);
 +                      }
                        putc(options->line_termination, options->file);
                        if (options->stat_sep) {
                                /* attach patch instead of inline */
@@@ -4693,6 -4589,7 +4693,7 @@@ static int is_submodule_ignored(const c
  void diff_addremove(struct diff_options *options,
                    int addremove, unsigned mode,
                    const unsigned char *sha1,
+                   int sha1_valid,
                    const char *concatpath, unsigned dirty_submodule)
  {
        struct diff_filespec *one, *two;
        two = alloc_filespec(concatpath);
  
        if (addremove != '+')
-               fill_filespec(one, sha1, mode);
+               fill_filespec(one, sha1, sha1_valid, mode);
        if (addremove != '-') {
-               fill_filespec(two, sha1, mode);
+               fill_filespec(two, sha1, sha1_valid, mode);
                two->dirty_submodule = dirty_submodule;
        }
  
@@@ -4739,6 -4636,7 +4740,7 @@@ void diff_change(struct diff_options *o
                 unsigned old_mode, unsigned new_mode,
                 const unsigned char *old_sha1,
                 const unsigned char *new_sha1,
+                int old_sha1_valid, int new_sha1_valid,
                 const char *concatpath,
                 unsigned old_dirty_submodule, unsigned new_dirty_submodule)
  {
                const unsigned char *tmp_c;
                tmp = old_mode; old_mode = new_mode; new_mode = tmp;
                tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
+               tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
+                       new_sha1_valid = tmp;
                tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule;
                        new_dirty_submodule = tmp;
        }
  
        one = alloc_filespec(concatpath);
        two = alloc_filespec(concatpath);
-       fill_filespec(one, old_sha1, old_mode);
-       fill_filespec(two, new_sha1, new_mode);
+       fill_filespec(one, old_sha1, old_sha1_valid, old_mode);
+       fill_filespec(two, new_sha1, new_sha1_valid, new_mode);
        one->dirty_submodule = old_dirty_submodule;
        two->dirty_submodule = new_dirty_submodule;
  
diff --combined diff.h
index e027650cb0ff2651e2e890e7f00753c15f5b3cff,b5ba1402aa5d1ce65de56d024764564bbd473413..815dd7af5766937ae1b553631a8e7be4d609d8d3
--- 1/diff.h
--- 2/diff.h
+++ b/diff.h
@@@ -19,12 -19,14 +19,14 @@@ typedef void (*change_fn_t)(struct diff
                 unsigned old_mode, unsigned new_mode,
                 const unsigned char *old_sha1,
                 const unsigned char *new_sha1,
+                int old_sha1_valid, int new_sha1_valid,
                 const char *fullpath,
                 unsigned old_dirty_submodule, unsigned new_dirty_submodule);
  
  typedef void (*add_remove_fn_t)(struct diff_options *options,
                    int addremove, unsigned mode,
                    const unsigned char *sha1,
+                   int sha1_valid,
                    const char *fullpath, unsigned dirty_submodule);
  
  typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
@@@ -60,7 -62,7 +62,7 @@@ typedef struct strbuf *(*diff_prefix_fn
  #define DIFF_OPT_SILENT_ON_REMOVE    (1 <<  5)
  #define DIFF_OPT_FIND_COPIES_HARDER  (1 <<  6)
  #define DIFF_OPT_FOLLOW_RENAMES      (1 <<  7)
 -/* (1 <<  8) unused */
 +#define DIFF_OPT_RENAME_EMPTY        (1 <<  8)
  /* (1 <<  9) unused */
  #define DIFF_OPT_HAS_CHANGES         (1 << 10)
  #define DIFF_OPT_QUICK               (1 << 11)
@@@ -82,7 -84,6 +84,7 @@@
  #define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27)
  #define DIFF_OPT_DIRSTAT_BY_LINE     (1 << 28)
  #define DIFF_OPT_FUNCCONTEXT         (1 << 29)
 +#define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30)
  
  #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
  #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)
@@@ -91,8 -92,6 +93,8 @@@
  #define DIFF_XDL_SET(opts, flag)    ((opts)->xdl_opts |= XDF_##flag)
  #define DIFF_XDL_CLR(opts, flag)    ((opts)->xdl_opts &= ~XDF_##flag)
  
 +#define DIFF_WITH_ALG(opts, flag)   (((opts)->xdl_opts & ~XDF_DIFF_ALGORITHM_MASK) | XDF_##flag)
 +
  enum diff_words_type {
        DIFF_WORDS_NONE = 0,
        DIFF_WORDS_PORCELAIN,
@@@ -132,7 -131,6 +134,7 @@@ struct diff_options 
  
        int stat_width;
        int stat_name_width;
 +      int stat_graph_width;
        int stat_count;
        const char *word_regex;
        enum diff_words_type word_diff;
        diff_format_fn_t format_callback;
        void *format_callback_data;
        diff_prefix_fn_t output_prefix;
 +      int output_prefix_length;
        void *output_prefix_data;
  };
  
@@@ -214,12 -211,15 +216,15 @@@ extern void diff_addremove(struct diff_
                           int addremove,
                           unsigned mode,
                           const unsigned char *sha1,
+                          int sha1_valid,
                           const char *fullpath, unsigned dirty_submodule);
  
  extern void diff_change(struct diff_options *,
                        unsigned mode1, unsigned mode2,
                        const unsigned char *sha1,
                        const unsigned char *sha2,
+                       int sha1_valid,
+                       int sha2_valid,
                        const char *fullpath,
                        unsigned dirty_submodule1, unsigned dirty_submodule2);
  
diff --combined diffcore-rename.c
index 216a7a4bbcab189b5c3d1b7f58728b94b8d6aec8,e6f9be64cfed2984f1ebd0364ae1e9bfabc35de7..512d0ac5fd2bc0acfb57147a6eb77f61f92b7c7e
@@@ -48,7 -48,7 +48,7 @@@ static struct diff_rename_dst *locate_r
                memmove(rename_dst + first + 1, rename_dst + first,
                        (rename_dst_nr - first - 1) * sizeof(*rename_dst));
        rename_dst[first].two = alloc_filespec(two->path);
-       fill_filespec(rename_dst[first].two, two->sha1, two->mode);
+       fill_filespec(rename_dst[first].two, two->sha1, two->sha1_valid, two->mode);
        rename_dst[first].pair = NULL;
        return &(rename_dst[first]);
  }
@@@ -512,15 -512,9 +512,15 @@@ void diffcore_rename(struct diff_option
                        else if (options->single_follow &&
                                 strcmp(options->single_follow, p->two->path))
                                continue; /* not interested */
 +                      else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
 +                               is_empty_blob_sha1(p->two->sha1))
 +                              continue;
                        else
                                locate_rename_dst(p->two, 1);
                }
 +              else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
 +                       is_empty_blob_sha1(p->one->sha1))
 +                      continue;
                else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) {
                        /*
                         * If the source is a broken "delete", and
diff --combined diffcore.h
index be0739c5c401c059a0d3030acbc99a7744f17065,c964ec114a56cb0e42f4164bdabf15e9e15a4ecb..1c16c8595b21c2712259041c01d4b58d76a60222
@@@ -43,7 -43,6 +43,7 @@@ struct diff_filespec 
        unsigned should_free : 1; /* data should be free()'ed */
        unsigned should_munmap : 1; /* data should be munmap()'ed */
        unsigned dirty_submodule : 2;  /* For submodules: its work tree is dirty */
 +      unsigned is_stdin : 1;
  #define DIRTY_SUBMODULE_UNTRACKED 1
  #define DIRTY_SUBMODULE_MODIFIED  2
        unsigned has_more_entries : 1; /* only appear in combined diff */
@@@ -55,7 -54,7 +55,7 @@@
  extern struct diff_filespec *alloc_filespec(const char *);
  extern void free_filespec(struct diff_filespec *);
  extern void fill_filespec(struct diff_filespec *, const unsigned char *,
-                         unsigned short);
+                         int, unsigned short);
  
  extern int diff_populate_filespec(struct diff_filespec *, int);
  extern void diff_free_filespec_data(struct diff_filespec *);
diff --combined fsck.c
index 4c63b2cc41eec4f568ee6f0d18a51c97304d5d96,da53cf41f9c58f9779d00c28b9735b2b965dfaa9..7395ef6a425f5c7725767f34a0383f61907fce93
--- 1/fsck.c
--- 2/fsck.c
+++ b/fsck.c
@@@ -27,7 -27,7 +27,7 @@@ static int fsck_walk_tree(struct tree *
                else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode))
                        result = walk(&lookup_blob(entry.sha1)->object, OBJ_BLOB, data);
                else {
 -                      result = error("in tree %s: entry %s has bad mode %.6o\n",
 +                      result = error("in tree %s: entry %s has bad mode %.6o",
                                        sha1_to_hex(tree->object.sha1), entry.path, entry.mode);
                }
                if (result < 0)
@@@ -139,6 -139,7 +139,7 @@@ static int verify_ordered(unsigned mode
  static int fsck_tree(struct tree *item, int strict, fsck_error error_func)
  {
        int retval;
+       int has_null_sha1 = 0;
        int has_full_path = 0;
        int has_empty_name = 0;
        int has_zero_pad = 0;
        while (desc.size) {
                unsigned mode;
                const char *name;
+               const unsigned char *sha1;
  
-               tree_entry_extract(&desc, &name, &mode);
+               sha1 = tree_entry_extract(&desc, &name, &mode);
  
+               if (is_null_sha1(sha1))
+                       has_null_sha1 = 1;
                if (strchr(name, '/'))
                        has_full_path = 1;
                if (!*name)
        }
  
        retval = 0;
+       if (has_null_sha1)
+               retval += error_func(&item->object, FSCK_WARN, "contains entries pointing to null sha1");
        if (has_full_path)
                retval += error_func(&item->object, FSCK_WARN, "contains full pathnames");
        if (has_empty_name)
diff --combined read-cache.c
index b645827c06a268ee4721fda5fc8cdcd84775c9ec,5ae7f2b680d8788f4c806d9c6ec111b6ba339aaa..7d4bf68e33d35ebeb5e9f985ac5c9dc2962fc8ba
@@@ -12,8 -12,6 +12,8 @@@
  #include "commit.h"
  #include "blob.h"
  #include "resolve-undo.h"
 +#include "strbuf.h"
 +#include "varint.h"
  
  static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
  
@@@ -159,6 -157,16 +159,6 @@@ static int ce_modified_check_fs(struct 
        return 0;
  }
  
 -static int is_empty_blob_sha1(const unsigned char *sha1)
 -{
 -      static const unsigned char empty_blob_sha1[20] = {
 -              0xe6,0x9d,0xe2,0x9b,0xb2,0xd1,0xd6,0x43,0x4b,0x8b,
 -              0x29,0xae,0x77,0x5a,0xd8,0xc2,0xe4,0x8c,0x53,0x91
 -      };
 -
 -      return !hashcmp(sha1, empty_blob_sha1);
 -}
 -
  static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
  {
        unsigned int changed = 0;
@@@ -397,15 -405,10 +397,15 @@@ int df_name_compare(const char *name1, 
  
  int cache_name_compare(const char *name1, int flags1, const char *name2, int flags2)
  {
 -      int len1 = flags1 & CE_NAMEMASK;
 -      int len2 = flags2 & CE_NAMEMASK;
 -      int len = len1 < len2 ? len1 : len2;
 -      int cmp;
 +      int len1, len2, len, cmp;
 +
 +      len1 = flags1 & CE_NAMEMASK;
 +      if (CE_NAMEMASK <= len1)
 +              len1 = strlen(name1 + CE_NAMEMASK) + CE_NAMEMASK;
 +      len2 = flags2 & CE_NAMEMASK;
 +      if (CE_NAMEMASK <= len2)
 +              len2 = strlen(name2 + CE_NAMEMASK) + CE_NAMEMASK;
 +      len = len1 < len2 ? len1 : len2;
  
        cmp = memcmp(name1, name2, len);
        if (cmp)
@@@ -1186,74 -1189,15 +1186,74 @@@ static struct cache_entry *refresh_cach
        return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
  }
  
 +
 +/*****************************************************************
 + * Index File I/O
 + *****************************************************************/
 +
 +#define INDEX_FORMAT_DEFAULT 3
 +
 +/*
 + * dev/ino/uid/gid/size are also just tracked to the low 32 bits
 + * Again - this is just a (very strong in practice) heuristic that
 + * the inode hasn't changed.
 + *
 + * We save the fields in big-endian order to allow using the
 + * index file over NFS transparently.
 + */
 +struct ondisk_cache_entry {
 +      struct cache_time ctime;
 +      struct cache_time mtime;
 +      unsigned int dev;
 +      unsigned int ino;
 +      unsigned int mode;
 +      unsigned int uid;
 +      unsigned int gid;
 +      unsigned int size;
 +      unsigned char sha1[20];
 +      unsigned short flags;
 +      char name[FLEX_ARRAY]; /* more */
 +};
 +
 +/*
 + * This struct is used when CE_EXTENDED bit is 1
 + * The struct must match ondisk_cache_entry exactly from
 + * ctime till flags
 + */
 +struct ondisk_cache_entry_extended {
 +      struct cache_time ctime;
 +      struct cache_time mtime;
 +      unsigned int dev;
 +      unsigned int ino;
 +      unsigned int mode;
 +      unsigned int uid;
 +      unsigned int gid;
 +      unsigned int size;
 +      unsigned char sha1[20];
 +      unsigned short flags;
 +      unsigned short flags2;
 +      char name[FLEX_ARRAY]; /* more */
 +};
 +
 +/* These are only used for v3 or lower */
 +#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
 +#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
 +#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
 +#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \
 +                          ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
 +                          ondisk_cache_entry_size(ce_namelen(ce)))
 +
  static int verify_hdr(struct cache_header *hdr, unsigned long size)
  {
        git_SHA_CTX c;
        unsigned char sha1[20];
 +      int hdr_version;
  
        if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
                return error("bad signature");
 -      if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
 -              return error("bad index version");
 +      hdr_version = ntohl(hdr->hdr_version);
 +      if (hdr_version < 2 || 4 < hdr_version)
 +              return error("bad index version %d", hdr_version);
        git_SHA1_Init(&c);
        git_SHA1_Update(&c, hdr, size - 20);
        git_SHA1_Final(sha1, &c);
@@@ -1287,74 -1231,7 +1287,74 @@@ int read_index(struct index_state *ista
        return read_index_from(istate, get_index_file());
  }
  
 -static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
 +#ifndef NEEDS_ALIGNED_ACCESS
 +#define ntoh_s(var) ntohs(var)
 +#define ntoh_l(var) ntohl(var)
 +#else
 +static inline uint16_t ntoh_s_force_align(void *p)
 +{
 +      uint16_t x;
 +      memcpy(&x, p, sizeof(x));
 +      return ntohs(x);
 +}
 +static inline uint32_t ntoh_l_force_align(void *p)
 +{
 +      uint32_t x;
 +      memcpy(&x, p, sizeof(x));
 +      return ntohl(x);
 +}
 +#define ntoh_s(var) ntoh_s_force_align(&(var))
 +#define ntoh_l(var) ntoh_l_force_align(&(var))
 +#endif
 +
 +static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *ondisk,
 +                                                 unsigned int flags,
 +                                                 const char *name,
 +                                                 size_t len)
 +{
 +      struct cache_entry *ce = xmalloc(cache_entry_size(len));
 +
 +      ce->ce_ctime.sec = ntoh_l(ondisk->ctime.sec);
 +      ce->ce_mtime.sec = ntoh_l(ondisk->mtime.sec);
 +      ce->ce_ctime.nsec = ntoh_l(ondisk->ctime.nsec);
 +      ce->ce_mtime.nsec = ntoh_l(ondisk->mtime.nsec);
 +      ce->ce_dev   = ntoh_l(ondisk->dev);
 +      ce->ce_ino   = ntoh_l(ondisk->ino);
 +      ce->ce_mode  = ntoh_l(ondisk->mode);
 +      ce->ce_uid   = ntoh_l(ondisk->uid);
 +      ce->ce_gid   = ntoh_l(ondisk->gid);
 +      ce->ce_size  = ntoh_l(ondisk->size);
 +      ce->ce_flags = flags;
 +      hashcpy(ce->sha1, ondisk->sha1);
 +      memcpy(ce->name, name, len);
 +      ce->name[len] = '\0';
 +      return ce;
 +}
 +
 +/*
 + * Adjacent cache entries tend to share the leading paths, so it makes
 + * sense to only store the differences in later entries.  In the v4
 + * on-disk format of the index, each on-disk cache entry stores the
 + * number of bytes to be stripped from the end of the previous name,
 + * and the bytes to append to the result, to come up with its name.
 + */
 +static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
 +{
 +      const unsigned char *ep, *cp = (const unsigned char *)cp_;
 +      size_t len = decode_varint(&cp);
 +
 +      if (name->len < len)
 +              die("malformed name field in the index");
 +      strbuf_remove(name, name->len - len, len);
 +      for (ep = cp; *ep; ep++)
 +              ; /* find the end */
 +      strbuf_add(name, cp, ep - cp);
 +      return (const char *)ep + 1 - cp_;
 +}
 +
 +static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 +                                          unsigned long *ent_size,
 +                                          struct strbuf *previous_name)
  {
        struct cache_entry *ce;
        size_t len;
        unsigned int flags;
  
        /* On-disk flags are just 16 bits */
 -      flags = ntohs(ondisk->flags);
 +      flags = ntoh_s(ondisk->flags);
        len = flags & CE_NAMEMASK;
  
        if (flags & CE_EXTENDED) {
                struct ondisk_cache_entry_extended *ondisk2;
                int extended_flags;
                ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
 -              extended_flags = ntohs(ondisk2->flags2) << 16;
 +              extended_flags = ntoh_s(ondisk2->flags2) << 16;
                /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
                if (extended_flags & ~CE_EXTENDED_FLAGS)
                        die("Unknown index entry format %08x", extended_flags);
        else
                name = ondisk->name;
  
 -      if (len == CE_NAMEMASK)
 -              len = strlen(name);
 -
 -      ce = xmalloc(cache_entry_size(len));
 -
 -      ce->ce_ctime.sec = ntohl(ondisk->ctime.sec);
 -      ce->ce_mtime.sec = ntohl(ondisk->mtime.sec);
 -      ce->ce_ctime.nsec = ntohl(ondisk->ctime.nsec);
 -      ce->ce_mtime.nsec = ntohl(ondisk->mtime.nsec);
 -      ce->ce_dev   = ntohl(ondisk->dev);
 -      ce->ce_ino   = ntohl(ondisk->ino);
 -      ce->ce_mode  = ntohl(ondisk->mode);
 -      ce->ce_uid   = ntohl(ondisk->uid);
 -      ce->ce_gid   = ntohl(ondisk->gid);
 -      ce->ce_size  = ntohl(ondisk->size);
 -      ce->ce_flags = flags;
 -
 -      hashcpy(ce->sha1, ondisk->sha1);
 -
 -      memcpy(ce->name, name, len);
 -      ce->name[len] = '\0';
 +      if (!previous_name) {
 +              /* v3 and earlier */
 +              if (len == CE_NAMEMASK)
 +                      len = strlen(name);
 +              ce = cache_entry_from_ondisk(ondisk, flags, name, len);
 +
 +              *ent_size = ondisk_ce_size(ce);
 +      } else {
 +              unsigned long consumed;
 +              consumed = expand_name_field(previous_name, name);
 +              ce = cache_entry_from_ondisk(ondisk, flags,
 +                                           previous_name->buf,
 +                                           previous_name->len);
 +
 +              *ent_size = (name - ((char *)ondisk)) + consumed;
 +      }
        return ce;
  }
  
@@@ -1407,7 -1289,6 +1407,7 @@@ int read_index_from(struct index_state 
        struct cache_header *hdr;
        void *mmap;
        size_t mmap_size;
 +      struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
  
        errno = EBUSY;
        if (istate->initialized)
        if (verify_hdr(hdr, mmap_size) < 0)
                goto unmap;
  
 +      istate->version = ntohl(hdr->hdr_version);
        istate->cache_nr = ntohl(hdr->hdr_entries);
        istate->cache_alloc = alloc_nr(istate->cache_nr);
        istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
        istate->initialized = 1;
  
 +      if (istate->version == 4)
 +              previous_name = &previous_name_buf;
 +      else
 +              previous_name = NULL;
 +
        src_offset = sizeof(*hdr);
        for (i = 0; i < istate->cache_nr; i++) {
                struct ondisk_cache_entry *disk_ce;
                struct cache_entry *ce;
 +              unsigned long consumed;
  
                disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
 -              ce = create_from_disk(disk_ce);
 +              ce = create_from_disk(disk_ce, &consumed, previous_name);
                set_index_entry(istate, i, ce);
  
 -              src_offset += ondisk_ce_size(ce);
 +              src_offset += consumed;
        }
 +      strbuf_release(&previous_name_buf);
        istate->timestamp.sec = st.st_mtime;
        istate->timestamp.nsec = ST_MTIME_NSEC(st);
  
@@@ -1647,10 -1520,13 +1647,10 @@@ static void ce_smudge_racily_clean_entr
        }
  }
  
 -static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
 +/* Copy miscellaneous fields but not the name */
 +static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
 +                                     struct cache_entry *ce)
  {
 -      int size = ondisk_ce_size(ce);
 -      struct ondisk_cache_entry *ondisk = xcalloc(1, size);
 -      char *name;
 -      int result;
 -
        ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
        ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
        ondisk->ctime.nsec = htonl(ce->ce_ctime.nsec);
                struct ondisk_cache_entry_extended *ondisk2;
                ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
                ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
 -              name = ondisk2->name;
 +              return ondisk2->name;
 +      }
 +      else {
 +              return ondisk->name;
 +      }
 +}
 +
 +static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
 +                        struct strbuf *previous_name)
 +{
 +      int size;
 +      struct ondisk_cache_entry *ondisk;
 +      char *name;
 +      int result;
 +
 +      if (!previous_name) {
 +              size = ondisk_ce_size(ce);
 +              ondisk = xcalloc(1, size);
 +              name = copy_cache_entry_to_ondisk(ondisk, ce);
 +              memcpy(name, ce->name, ce_namelen(ce));
 +      } else {
 +              int common, to_remove, prefix_size;
 +              unsigned char to_remove_vi[16];
 +              for (common = 0;
 +                   (ce->name[common] &&
 +                    common < previous_name->len &&
 +                    ce->name[common] == previous_name->buf[common]);
 +                   common++)
 +                      ; /* still matching */
 +              to_remove = previous_name->len - common;
 +              prefix_size = encode_varint(to_remove, to_remove_vi);
 +
 +              if (ce->ce_flags & CE_EXTENDED)
 +                      size = offsetof(struct ondisk_cache_entry_extended, name);
 +              else
 +                      size = offsetof(struct ondisk_cache_entry, name);
 +              size += prefix_size + (ce_namelen(ce) - common + 1);
 +
 +              ondisk = xcalloc(1, size);
 +              name = copy_cache_entry_to_ondisk(ondisk, ce);
 +              memcpy(name, to_remove_vi, prefix_size);
 +              memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common);
 +
 +              strbuf_splice(previous_name, common, to_remove,
 +                            ce->name + common, ce_namelen(ce) - common);
        }
 -      else
 -              name = ondisk->name;
 -      memcpy(name, ce->name, ce_namelen(ce));
  
        result = ce_write(c, fd, ondisk, size);
        free(ondisk);
@@@ -1748,11 -1583,10 +1748,11 @@@ int write_index(struct index_state *ist
  {
        git_SHA_CTX c;
        struct cache_header hdr;
 -      int i, err, removed, extended;
 +      int i, err, removed, extended, hdr_version;
        struct cache_entry **cache = istate->cache;
        int entries = istate->cache_nr;
        struct stat st;
 +      struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
  
        for (i = removed = extended = 0; i < entries; i++) {
                if (cache[i]->ce_flags & CE_REMOVE)
                }
        }
  
 +      if (!istate->version)
 +              istate->version = INDEX_FORMAT_DEFAULT;
 +
 +      /* demote version 3 to version 2 when the latter suffices */
 +      if (istate->version == 3 || istate->version == 2)
 +              istate->version = extended ? 3 : 2;
 +
 +      hdr_version = istate->version;
 +
        hdr.hdr_signature = htonl(CACHE_SIGNATURE);
 -      /* for extended format, increase version so older git won't try to read it */
 -      hdr.hdr_version = htonl(extended ? 3 : 2);
 +      hdr.hdr_version = htonl(hdr_version);
        hdr.hdr_entries = htonl(entries - removed);
  
        git_SHA1_Init(&c);
        if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
                return -1;
  
 +      previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
        for (i = 0; i < entries; i++) {
                struct cache_entry *ce = cache[i];
                if (ce->ce_flags & CE_REMOVE)
                        continue;
                if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
                        ce_smudge_racily_clean_entry(ce);
 -              if (ce_write_entry(&c, newfd, ce) < 0)
+               if (is_null_sha1(ce->sha1))
+                       return error("cache entry has null sha1: %s", ce->name);
 +              if (ce_write_entry(&c, newfd, ce, previous_name) < 0)
                        return -1;
        }
 +      strbuf_release(&previous_name_buf);
  
        /* Write extension data here */
        if (istate->cache_tree) {
diff --combined revision.c
index 5b81a92e3ac65ab0295f25198511fc83b4bbf1c9,21ef729cfaa4e675bae3c73a7a09d1cfc21aad05..74c484ca84867c5edfb1ac569ac63efef1a893e6
@@@ -139,32 -139,11 +139,32 @@@ void mark_tree_uninteresting(struct tre
  
  void mark_parents_uninteresting(struct commit *commit)
  {
 -      struct commit_list *parents = commit->parents;
 +      struct commit_list *parents = NULL, *l;
 +
 +      for (l = commit->parents; l; l = l->next)
 +              commit_list_insert(l->item, &parents);
  
        while (parents) {
                struct commit *commit = parents->item;
 -              if (!(commit->object.flags & UNINTERESTING)) {
 +              l = parents;
 +              parents = parents->next;
 +              free(l);
 +
 +              while (commit) {
 +                      /*
 +                       * A missing commit is ok iff its parent is marked
 +                       * uninteresting.
 +                       *
 +                       * We just mark such a thing parsed, so that when
 +                       * it is popped next time around, we won't be trying
 +                       * to parse it and get an error.
 +                       */
 +                      if (!has_sha1_file(commit->object.sha1))
 +                              commit->object.parsed = 1;
 +
 +                      if (commit->object.flags & UNINTERESTING)
 +                              break;
 +
                        commit->object.flags |= UNINTERESTING;
  
                        /*
                         * wasn't uninteresting), in which case we need
                         * to mark its parents recursively too..
                         */
 -                      if (commit->parents)
 -                              mark_parents_uninteresting(commit);
 -              }
 +                      if (!commit->parents)
 +                              break;
  
 -              /*
 -               * A missing commit is ok iff its parent is marked
 -               * uninteresting.
 -               *
 -               * We just mark such a thing parsed, so that when
 -               * it is popped next time around, we won't be trying
 -               * to parse it and get an error.
 -               */
 -              if (!has_sha1_file(commit->object.sha1))
 -                      commit->object.parsed = 1;
 -              parents = parents->next;
 +                      for (l = commit->parents->next; l; l = l->next)
 +                              commit_list_insert(l->item, &parents);
 +                      commit = commit->parents->item;
 +              }
        }
  }
  
@@@ -345,6 -332,7 +345,7 @@@ static int tree_difference = REV_TREE_S
  static void file_add_remove(struct diff_options *options,
                    int addremove, unsigned mode,
                    const unsigned char *sha1,
+                   int sha1_valid,
                    const char *fullpath, unsigned dirty_submodule)
  {
        int diff = addremove == '+' ? REV_TREE_NEW : REV_TREE_OLD;
@@@ -358,6 -346,7 +359,7 @@@ static void file_change(struct diff_opt
                 unsigned old_mode, unsigned new_mode,
                 const unsigned char *old_sha1,
                 const unsigned char *new_sha1,
+                int old_sha1_valid, int new_sha1_valid,
                 const char *fullpath,
                 unsigned old_dirty_submodule, unsigned new_dirty_submodule)
  {
@@@ -1358,13 -1347,11 +1360,13 @@@ static int handle_revision_opt(struct r
                revs->topo_order = 1;
        } else if (!strcmp(arg, "--simplify-merges")) {
                revs->simplify_merges = 1;
 +              revs->topo_order = 1;
                revs->rewrite_parents = 1;
                revs->simplify_history = 0;
                revs->limited = 1;
        } else if (!strcmp(arg, "--simplify-by-decoration")) {
                revs->simplify_merges = 1;
 +              revs->topo_order = 1;
                revs->rewrite_parents = 1;
                revs->simplify_history = 0;
                revs->simplify_by_decoration = 1;
                revs->grep_filter.regflags |= REG_EXTENDED;
        } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
                revs->grep_filter.regflags |= REG_ICASE;
 +              DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
        } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
                revs->grep_filter.fixed = 1;
        } else if (!strcmp(arg, "--all-match")) {
@@@ -1717,21 -1703,17 +1719,21 @@@ int setup_revisions(int argc, const cha
                submodule = opt->submodule;
  
        /* First, search for "--" */
 -      seen_dashdash = 0;
 -      for (i = 1; i < argc; i++) {
 -              const char *arg = argv[i];
 -              if (strcmp(arg, "--"))
 -                      continue;
 -              argv[i] = NULL;
 -              argc = i;
 -              if (argv[i + 1])
 -                      append_prune_data(&prune_data, argv + i + 1);
 +      if (opt && opt->assume_dashdash) {
                seen_dashdash = 1;
 -              break;
 +      } else {
 +              seen_dashdash = 0;
 +              for (i = 1; i < argc; i++) {
 +                      const char *arg = argv[i];
 +                      if (strcmp(arg, "--"))
 +                              continue;
 +                      argv[i] = NULL;
 +                      argc = i;
 +                      if (argv[i + 1])
 +                              append_prune_data(&prune_data, argv + i + 1);
 +                      seen_dashdash = 1;
 +                      break;
 +              }
        }
  
        /* Second, deal with arguments and options */
                         * but the latter we have checked in the main loop.
                         */
                        for (j = i; j < argc; j++)
 -                              verify_filename(revs->prefix, argv[j]);
 +                              verify_filename(revs->prefix, argv[j], j == i);
  
                        append_prune_data(&prune_data, argv + i);
                        break;
@@@ -1949,9 -1931,8 +1951,9 @@@ static struct commit_list **simplify_on
        }
  
        /*
 -       * Do we know what commit all of our parents should be rewritten to?
 -       * Otherwise we are not ready to rewrite this one yet.
 +       * Do we know what commit all of our parents that matter
 +       * should be rewritten to?  Otherwise we are not ready to
 +       * rewrite this one yet.
         */
        for (cnt = 0, p = commit->parents; p; p = p->next) {
                pst = locate_simplify_state(revs, p->item);
                        tail = &commit_list_insert(p->item, tail)->next;
                        cnt++;
                }
 +              if (revs->first_parent_only)
 +                      break;
        }
        if (cnt) {
                tail = &commit_list_insert(commit, tail)->next;
        for (p = commit->parents; p; p = p->next) {
                pst = locate_simplify_state(revs, p->item);
                p->item = pst->simplified;
 +              if (revs->first_parent_only)
 +                      break;
        }
 -      cnt = remove_duplicate_parents(commit);
 +      if (!revs->first_parent_only)
 +              cnt = remove_duplicate_parents(commit);
 +      else
 +              cnt = 1;
  
        /*
         * It is possible that we are a merge and one side branch
  
  static void simplify_merges(struct rev_info *revs)
  {
 -      struct commit_list *list;
 +      struct commit_list *list, *next;
        struct commit_list *yet_to_do, **tail;
 +      struct commit *commit;
  
 -      if (!revs->topo_order)
 -              sort_in_topological_order(&revs->commits, revs->lifo);
        if (!revs->prune)
                return;
  
        /* feed the list reversed */
        yet_to_do = NULL;
 -      for (list = revs->commits; list; list = list->next)
 -              commit_list_insert(list->item, &yet_to_do);
 +      for (list = revs->commits; list; list = next) {
 +              commit = list->item;
 +              next = list->next;
 +              /*
 +               * Do not free(list) here yet; the original list
 +               * is used later in this function.
 +               */
 +              commit_list_insert(commit, &yet_to_do);
 +      }
        while (yet_to_do) {
                list = yet_to_do;
                yet_to_do = NULL;
                tail = &yet_to_do;
                while (list) {
 -                      struct commit *commit = list->item;
 -                      struct commit_list *next = list->next;
 +                      commit = list->item;
 +                      next = list->next;
                        free(list);
                        list = next;
                        tail = simplify_one(revs, commit, tail);
        revs->commits = NULL;
        tail = &revs->commits;
        while (list) {
 -              struct commit *commit = list->item;
 -              struct commit_list *next = list->next;
                struct merge_simplify_state *st;
 +
 +              commit = list->item;
 +              next = list->next;
                free(list);
                list = next;
                st = locate_simplify_state(revs, commit);
@@@ -2083,16 -2050,10 +2085,16 @@@ static void set_children(struct rev_inf
        }
  }
  
 +void reset_revision_walk(void)
 +{
 +      clear_object_flags(SEEN | ADDED | SHOWN);
 +}
 +
  int prepare_revision_walk(struct rev_info *revs)
  {
        int nr = revs->pending.nr;
        struct object_array_entry *e, *list;
 +      struct commit_list **next = &revs->commits;
  
        e = list = revs->pending.objects;
        revs->pending.nr = 0;
                if (commit) {
                        if (!(commit->object.flags & SEEN)) {
                                commit->object.flags |= SEEN;
 -                              commit_list_insert_by_date(commit, &revs->commits);
 +                              next = commit_list_append(commit, next);
                        }
                }
                e++;
        }
 +      commit_list_sort_by_date(&revs->commits);
        if (!revs->leak_pending)
                free(list);
  
diff --combined t/t1450-fsck.sh
index 5b79c51b8c51b3fc62823e1b51f9087fbd7f30e3,5e36cc71b4cc5f60b3c3a1782f95ae36448020c5..bf7a2cd6fb649e3b210b537d4d25312ba2921f16
@@@ -27,8 -27,12 +27,8 @@@ test_expect_success 'loose objects borr
                git init &&
                echo ../../../.git/objects >.git/objects/info/alternates &&
                test_commit C fileC one &&
 -              git fsck >../out 2>&1
 +              git fsck --no-dangling >../actual 2>&1
        ) &&
 -      {
 -              grep -v dangling out >actual ||
 -              :
 -      } &&
        test_cmp empty actual
  '
  
@@@ -213,4 -217,30 +213,30 @@@ test_expect_success 'rev-list --verify-
        grep -q "error: sha1 mismatch 63ffffffffffffffffffffffffffffffffffffff" out
  '
  
+ _bz='\0'
+ _bz5="$_bz$_bz$_bz$_bz$_bz"
+ _bz20="$_bz5$_bz5$_bz5$_bz5"
+ test_expect_success 'fsck notices blob entry pointing to null sha1' '
+       (git init null-blob &&
+        cd null-blob &&
+        sha=$(printf "100644 file$_bz$_bz20" |
+              git hash-object -w --stdin -t tree) &&
+         git fsck 2>out &&
+         cat out &&
+         grep "warning.*null sha1" out
+       )
+ '
+ test_expect_success 'fsck notices submodule entry pointing to null sha1' '
+       (git init null-commit &&
+        cd null-commit &&
+        sha=$(printf "160000 submodule$_bz$_bz20" |
+              git hash-object -w --stdin -t tree) &&
+         git fsck 2>out &&
+         cat out &&
+         grep "warning.*null sha1" out
+       )
+ '
  test_done