perf: add test for writing the index
[gitweb.git] / diff.c
diff --git a/diff.c b/diff.c
index 3cdf920672dc4f7f19eef5f4ff16a4ce66464756..9c382580306e340ed6333f96bc4919c4c507a7b9 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2005 Junio C Hamano
  */
 #include "cache.h"
+#include "config.h"
 #include "tempfile.h"
 #include "quote.h"
 #include "diff.h"
 #endif
 
 static int diff_detect_rename_default;
-static int diff_indent_heuristic; /* experimental */
-static int diff_compaction_heuristic; /* experimental */
+static int diff_indent_heuristic = 1;
 static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
 static int diff_context_default = 3;
+static int diff_interhunk_context_default;
 static const char *diff_word_regex_cfg;
 static const char *external_diff_cmd_cfg;
 static const char *diff_order_file_cfg;
@@ -223,16 +224,8 @@ void init_diff_ui_defaults(void)
 
 int git_diff_heuristic_config(const char *var, const char *value, void *cb)
 {
-       if (!strcmp(var, "diff.indentheuristic")) {
+       if (!strcmp(var, "diff.indentheuristic"))
                diff_indent_heuristic = git_config_bool(var, value);
-               if (diff_indent_heuristic)
-                       diff_compaction_heuristic = 0;
-       }
-       if (!strcmp(var, "diff.compactionheuristic")) {
-               diff_compaction_heuristic = git_config_bool(var, value);
-               if (diff_compaction_heuristic)
-                       diff_indent_heuristic = 0;
-       }
        return 0;
 }
 
@@ -248,6 +241,12 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
                        return -1;
                return 0;
        }
+       if (!strcmp(var, "diff.interhunkcontext")) {
+               diff_interhunk_context_default = git_config_int(var, value);
+               if (diff_interhunk_context_default < 0)
+                       return -1;
+               return 0;
+       }
        if (!strcmp(var, "diff.renames")) {
                diff_detect_rename_default = git_config_rename(var, value);
                return 0;
@@ -292,9 +291,6 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       if (git_diff_heuristic_config(var, value, cb) < 0)
-               return -1;
-
        if (!strcmp(var, "diff.wserrorhighlight")) {
                int val = parse_ws_error_highlight(value);
                if (val < 0)
@@ -303,9 +299,6 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       if (git_color_config(var, value, cb) < 0)
-               return -1;
-
        return git_diff_basic_config(var, value, cb);
 }
 
@@ -353,6 +346,9 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
        if (starts_with(var, "submodule."))
                return parse_submodule_config_option(var, value);
 
+       if (git_diff_heuristic_config(var, value, cb) < 0)
+               return -1;
+
        return git_default_config(var, value, cb);
 }
 
@@ -400,7 +396,7 @@ static struct diff_tempfile {
         */
        const char *name;
 
-       char hex[GIT_SHA1_HEXSZ + 1];
+       char hex[GIT_MAX_HEXSZ + 1];
        char mode[10];
 
        /*
@@ -913,7 +909,7 @@ static int fn_out_diff_words_write_helper(FILE *fp,
 /*
  * '--color-words' algorithm can be described as:
  *
- *   1. collect the minus/plus lines of a diff hunk, divided into
+ *   1. collect the minus/plus lines of a diff hunk, divided into
  *      minus-lines and plus-lines;
  *
  *   2. break both minus-lines and plus-lines into words and
@@ -1220,8 +1216,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
                        regfree(ecbdata->diff_words->word_regex);
                        free(ecbdata->diff_words->word_regex);
                }
-               free(ecbdata->diff_words);
-               ecbdata->diff_words = NULL;
+               FREE_AND_NULL(ecbdata->diff_words);
        }
 }
 
@@ -2023,7 +2018,7 @@ static void show_dirstat(struct diff_options *options)
                if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
                        diff_populate_filespec(p->one, 0);
                        diff_populate_filespec(p->two, 0);
-                       diffcore_count_changes(p->one, p->two, NULL, NULL, 0,
+                       diffcore_count_changes(p->one, p->two, NULL, NULL,
                                               &copied, &added);
                        diff_free_filespec_data(p->one);
                        diff_free_filespec_data(p->two);
@@ -2097,7 +2092,7 @@ static void show_dirstat_by_line(struct diffstat_t *data, struct diff_options *o
                         * bytes per "line".
                         * This is stupid and ugly, but very cheap...
                         */
-                       damage = (damage + 63) / 64;
+                       damage = DIV_ROUND_UP(damage, 64);
                ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
                dir.files[dir.nr].name = file->name;
                dir.files[dir.nr].changed = damage;
@@ -2704,13 +2699,13 @@ void free_filespec(struct diff_filespec *spec)
        }
 }
 
-void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
-                  int sha1_valid, unsigned short mode)
+void fill_filespec(struct diff_filespec *spec, const struct object_id *oid,
+                  int oid_valid, unsigned short mode)
 {
        if (mode) {
                spec->mode = canon_mode(mode);
-               hashcpy(spec->oid.hash, sha1);
-               spec->oid_valid = sha1_valid;
+               oidcpy(&spec->oid, oid);
+               spec->oid_valid = oid_valid;
        }
 }
 
@@ -2719,7 +2714,7 @@ void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
  * the work tree has that object contents, return true, so that
  * prepare_temp_file() does not have to inflate and extract.
  */
-static int reuse_worktree_file(const char *name, const unsigned char *sha1, int want_file)
+static int reuse_worktree_file(const char *name, const struct object_id *oid, int want_file)
 {
        const struct cache_entry *ce;
        struct stat st;
@@ -2750,14 +2745,14 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int
         * objects however would tend to be slower as they need
         * to be individually opened and inflated.
         */
-       if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1))
+       if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(oid->hash))
                return 0;
 
        /*
         * Similarly, if we'd have to convert the file contents anyway, that
         * makes the optimization not worthwhile.
         */
-       if (!want_file && would_convert_to_git(name))
+       if (!want_file && would_convert_to_git(&the_index, name))
                return 0;
 
        len = strlen(name);
@@ -2770,7 +2765,7 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int
         * This is not the sha1 we are looking for, or
         * unreusable because it is not a regular file.
         */
-       if (hashcmp(sha1, ce->oid.hash) || !S_ISREG(ce->ce_mode))
+       if (oidcmp(oid, &ce->oid) || !S_ISREG(ce->ce_mode))
                return 0;
 
        /*
@@ -2844,7 +2839,7 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
                return diff_populate_gitlink(s, size_only);
 
        if (!s->oid_valid ||
-           reuse_worktree_file(s->path, s->oid.hash, 0)) {
+           reuse_worktree_file(s->path, &s->oid, 0)) {
                struct strbuf buf = STRBUF_INIT;
                struct stat st;
                int fd;
@@ -2872,8 +2867,25 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
                        s->should_free = 1;
                        return 0;
                }
-               if (size_only)
+
+               /*
+                * Even if the caller would be happy with getting
+                * only the size, we cannot return early at this
+                * point if the path requires us to run the content
+                * conversion.
+                */
+               if (size_only && !would_convert_to_git(&the_index, s->path))
                        return 0;
+
+               /*
+                * Note: this check uses xsize_t(st.st_size) that may
+                * not be the true size of the blob after it goes
+                * through convert_to_git().  This may not strictly be
+                * correct, but the whole point of big_file_threshold
+                * and is_binary check being that we want to avoid
+                * opening the file and inspecting the contents, this
+                * is probably fine.
+                */
                if ((flags & CHECK_BINARY) &&
                    s->size > big_file_threshold && s->is_binary == -1) {
                        s->is_binary = 1;
@@ -2889,7 +2901,7 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
                /*
                 * Convert from working tree format to canonical git format
                 */
-               if (convert_to_git(s->path, s->data, s->size, &buf, crlf_warn)) {
+               if (convert_to_git(&the_index, s->path, s->data, s->size, &buf, crlf_warn)) {
                        size_t size = 0;
                        munmap(s->data, s->size);
                        s->should_munmap = 0;
@@ -2936,8 +2948,7 @@ void diff_free_filespec_blob(struct diff_filespec *s)
 void diff_free_filespec_data(struct diff_filespec *s)
 {
        diff_free_filespec_blob(s);
-       free(s->cnt_data);
-       s->cnt_data = NULL;
+       FREE_AND_NULL(s->cnt_data);
 }
 
 static void prep_temp_blob(const char *path, struct diff_tempfile *temp,
@@ -2993,7 +3004,7 @@ static struct diff_tempfile *prepare_temp_file(const char *name,
 
        if (!S_ISGITLINK(one->mode) &&
            (!one->oid_valid ||
-            reuse_worktree_file(name, one->oid.hash, 1))) {
+            reuse_worktree_file(name, &one->oid, 1))) {
                struct stat st;
                if (lstat(name, &st) < 0) {
                        if (errno == ENOENT)
@@ -3015,13 +3026,13 @@ static struct diff_tempfile *prepare_temp_file(const char *name,
                        /* we can borrow from the file in the work tree */
                        temp->name = name;
                        if (!one->oid_valid)
-                               sha1_to_hex_r(temp->hex, null_sha1);
+                               oid_to_hex_r(temp->hex, &null_oid);
                        else
-                               sha1_to_hex_r(temp->hex, one->oid.hash);
+                               oid_to_hex_r(temp->hex, &one->oid);
                        /* Even though we may sometimes borrow the
                         * contents from the work tree, we always want
                         * one->mode.  mode is trustworthy even when
-                        * !(one->sha1_valid), as long as
+                        * !(one->oid_valid), as long as
                         * DIFF_FILE_VALID(one).
                         */
                        xsnprintf(temp->mode, sizeof(temp->mode), "%06o", one->mode);
@@ -3096,6 +3107,22 @@ static int similarity_index(struct diff_filepair *p)
        return p->score * 100 / MAX_SCORE;
 }
 
+static const char *diff_abbrev_oid(const struct object_id *oid, int abbrev)
+{
+       if (startup_info->have_repository)
+               return find_unique_abbrev(oid->hash, abbrev);
+       else {
+               char *hex = oid_to_hex(oid);
+               if (abbrev < 0)
+                       abbrev = FALLBACK_DEFAULT_ABBREV;
+               if (abbrev > GIT_SHA1_HEXSZ)
+                       die("BUG: oid abbreviation out of range: %d", abbrev);
+               if (abbrev)
+                       hex[abbrev] = '\0';
+               return hex;
+       }
+}
+
 static void fill_metainfo(struct strbuf *msg,
                          const char *name,
                          const char *other,
@@ -3154,9 +3181,9 @@ static void fill_metainfo(struct strbuf *msg,
                            (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two)))
                                abbrev = 40;
                }
-               strbuf_addf(msg, "%s%sindex %s..", line_prefix, set,
-                           find_unique_abbrev(one->oid.hash, abbrev));
-               strbuf_add_unique_abbrev(msg, two->oid.hash, abbrev);
+               strbuf_addf(msg, "%s%sindex %s..%s", line_prefix, set,
+                           diff_abbrev_oid(&one->oid, abbrev),
+                           diff_abbrev_oid(&two->oid, abbrev));
                if (one->mode == two->mode)
                        strbuf_addf(msg, " %06o", one->mode);
                strbuf_addf(msg, "%s\n", reset);
@@ -3208,7 +3235,7 @@ static void run_diff_cmd(const char *pgm,
                fprintf(o->file, "* Unmerged path %s\n", name);
 }
 
-static void diff_fill_sha1_info(struct diff_filespec *one)
+static void diff_fill_oid_info(struct diff_filespec *one)
 {
        if (DIFF_FILE_VALID(one)) {
                if (!one->oid_valid) {
@@ -3267,8 +3294,8 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
                return;
        }
 
-       diff_fill_sha1_info(one);
-       diff_fill_sha1_info(two);
+       diff_fill_oid_info(one);
+       diff_fill_oid_info(two);
 
        if (!pgm &&
            DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
@@ -3313,8 +3340,8 @@ static void run_diffstat(struct diff_filepair *p, struct diff_options *o,
        if (o->prefix_length)
                strip_prefix(o->prefix_length, &name, &other);
 
-       diff_fill_sha1_info(p->one);
-       diff_fill_sha1_info(p->two);
+       diff_fill_oid_info(p->one);
+       diff_fill_oid_info(p->two);
 
        builtin_diffstat(name, other, p->one, p->two, diffstat, o, p);
 }
@@ -3337,8 +3364,8 @@ static void run_checkdiff(struct diff_filepair *p, struct diff_options *o)
        if (o->prefix_length)
                strip_prefix(o->prefix_length, &name, &other);
 
-       diff_fill_sha1_info(p->one);
-       diff_fill_sha1_info(p->two);
+       diff_fill_oid_info(p->one);
+       diff_fill_oid_info(p->two);
 
        builtin_checkdiff(name, other, attr_path, p->one, p->two, o);
 }
@@ -3349,11 +3376,13 @@ void diff_setup(struct diff_options *options)
 
        options->file = stdout;
 
+       options->abbrev = DEFAULT_ABBREV;
        options->line_termination = '\n';
        options->break_opt = -1;
        options->rename_limit = -1;
        options->dirstat_permille = diff_dirstat_permille_default;
        options->context = diff_context_default;
+       options->interhunkcontext = diff_interhunk_context_default;
        options->ws_error_highlight = ws_error_highlight_default;
        DIFF_OPT_SET(options, RENAME_EMPTY);
 
@@ -3365,8 +3394,6 @@ void diff_setup(struct diff_options *options)
        options->xdl_opts |= diff_algorithm;
        if (diff_indent_heuristic)
                DIFF_XDL_SET(options, INDENT_HEURISTIC);
-       else if (diff_compaction_heuristic)
-               DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
 
        options->orderfile = diff_order_file_cfg;
 
@@ -3468,7 +3495,7 @@ void diff_setup_done(struct diff_options *options)
                         */
                        read_cache();
        }
-       if (options->abbrev <= 0 || 40 < options->abbrev)
+       if (40 < options->abbrev)
                options->abbrev = 40; /* full */
 
        /*
@@ -3861,16 +3888,10 @@ int diff_opt_parse(struct diff_options *options,
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
        else if (!strcmp(arg, "--ignore-blank-lines"))
                DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
-       else if (!strcmp(arg, "--indent-heuristic")) {
+       else if (!strcmp(arg, "--indent-heuristic"))
                DIFF_XDL_SET(options, INDENT_HEURISTIC);
-               DIFF_XDL_CLR(options, COMPACTION_HEURISTIC);
-       } else if (!strcmp(arg, "--no-indent-heuristic"))
-               DIFF_XDL_CLR(options, INDENT_HEURISTIC);
-       else if (!strcmp(arg, "--compaction-heuristic")) {
-               DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
+       else if (!strcmp(arg, "--no-indent-heuristic"))
                DIFF_XDL_CLR(options, INDENT_HEURISTIC);
-       } else if (!strcmp(arg, "--no-compaction-heuristic"))
-               DIFF_XDL_CLR(options, COMPACTION_HEURISTIC);
        else if (!strcmp(arg, "--patience"))
                options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
        else if (!strcmp(arg, "--histogram"))
@@ -3972,6 +3993,10 @@ int diff_opt_parse(struct diff_options *options,
                return parse_submodule_opt(options, arg);
        else if (skip_prefix(arg, "--ws-error-highlight=", &arg))
                return parse_ws_error_highlight_opt(options, arg);
+       else if (!strcmp(arg, "--ita-invisible-in-index"))
+               options->ita_invisible_in_index = 1;
+       else if (!strcmp(arg, "--ita-visible-in-index"))
+               options->ita_invisible_in_index = 0;
 
        /* misc options */
        else if (!strcmp(arg, "-z"))
@@ -3994,8 +4019,7 @@ int diff_opt_parse(struct diff_options *options,
        else if (!strcmp(arg, "--pickaxe-regex"))
                options->pickaxe_opts |= DIFF_PICKAXE_REGEX;
        else if ((argcount = short_opt('O', av, &optarg))) {
-               const char *path = prefix_filename(prefix, strlen(prefix), optarg);
-               options->orderfile = xstrdup(path);
+               options->orderfile = prefix_filename(prefix, optarg);
                return argcount;
        }
        else if ((argcount = parse_long_opt("diff-filter", av, &optarg))) {
@@ -4005,6 +4029,8 @@ int diff_opt_parse(struct diff_options *options,
                            offending, optarg);
                return argcount;
        }
+       else if (!strcmp(arg, "--no-abbrev"))
+               options->abbrev = 0;
        else if (!strcmp(arg, "--abbrev"))
                options->abbrev = DEFAULT_ABBREV;
        else if (skip_prefix(arg, "--abbrev=", &arg)) {
@@ -4040,13 +4066,12 @@ int diff_opt_parse(struct diff_options *options,
        else if (!strcmp(arg, "--no-function-context"))
                DIFF_OPT_CLR(options, FUNCCONTEXT);
        else if ((argcount = parse_long_opt("output", av, &optarg))) {
-               const char *path = prefix_filename(prefix, strlen(prefix), optarg);
-               options->file = fopen(path, "w");
-               if (!options->file)
-                       die_errno("Could not open '%s'", path);
+               char *path = prefix_filename(prefix, optarg);
+               options->file = xfopen(path, "w");
                options->close_file = 1;
                if (options->use_color != GIT_COLOR_ALWAYS)
                        options->use_color = GIT_COLOR_NEVER;
+               free(path);
                return argcount;
        } else
                return 0;
@@ -4157,14 +4182,15 @@ void diff_free_filepair(struct diff_filepair *p)
        free(p);
 }
 
-const char *diff_aligned_abbrev(const unsigned char *sha1, int len)
+const char *diff_aligned_abbrev(const struct object_id *oid, int len)
 {
        int abblen;
        const char *abbrev;
-       if (len == 40)
-               return sha1_to_hex(sha1);
 
-       abbrev = find_unique_abbrev(sha1, len);
+       if (len == GIT_SHA1_HEXSZ)
+               return oid_to_hex(oid);
+
+       abbrev = diff_abbrev_oid(oid, len);
        abblen = strlen(abbrev);
 
        /*
@@ -4186,15 +4212,16 @@ const char *diff_aligned_abbrev(const unsigned char *sha1, int len)
         * the automatic sizing is supposed to give abblen that ensures
         * uniqueness across all objects (statistically speaking).
         */
-       if (abblen < 37) {
-               static char hex[41];
+       if (abblen < GIT_SHA1_HEXSZ - 3) {
+               static char hex[GIT_MAX_HEXSZ + 1];
                if (len < abblen && abblen <= len + 2)
                        xsnprintf(hex, sizeof(hex), "%s%.*s", abbrev, len+3-abblen, "..");
                else
                        xsnprintf(hex, sizeof(hex), "%s...", abbrev);
                return hex;
        }
-       return sha1_to_hex(sha1);
+
+       return oid_to_hex(oid);
 }
 
 static void diff_flush_raw(struct diff_filepair *p, struct diff_options *opt)
@@ -4205,9 +4232,9 @@ static void diff_flush_raw(struct diff_filepair *p, struct diff_options *opt)
        fprintf(opt->file, "%s", diff_line_prefix(opt));
        if (!(opt->output_format & DIFF_FORMAT_NAME_STATUS)) {
                fprintf(opt->file, ":%06o %06o %s ", p->one->mode, p->two->mode,
-                       diff_aligned_abbrev(p->one->oid.hash, opt->abbrev));
+                       diff_aligned_abbrev(&p->one->oid, opt->abbrev));
                fprintf(opt->file, "%s ",
-                       diff_aligned_abbrev(p->two->oid.hash, opt->abbrev));
+                       diff_aligned_abbrev(&p->two->oid, opt->abbrev));
        }
        if (p->score) {
                fprintf(opt->file, "%c%03d%c", p->status, similarity_index(p),
@@ -4434,6 +4461,7 @@ static void flush_one_pair(struct diff_filepair *p, struct diff_options *opt)
                name_a = p->two->path;
                name_b = NULL;
                strip_prefix(opt->prefix_length, &name_a, &name_b);
+               fprintf(opt->file, "%s", diff_line_prefix(opt));
                write_name_quoted(name_a, opt->file, opt->line_termination);
        }
 }
@@ -4536,14 +4564,26 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
        data->patchlen += new_len;
 }
 
+static void patch_id_add_string(git_SHA_CTX *ctx, const char *str)
+{
+       git_SHA1_Update(ctx, str, strlen(str));
+}
+
+static void patch_id_add_mode(git_SHA_CTX *ctx, unsigned mode)
+{
+       /* large enough for 2^32 in octal */
+       char buf[12];
+       int len = xsnprintf(buf, sizeof(buf), "%06o", mode);
+       git_SHA1_Update(ctx, buf, len);
+}
+
 /* returns 0 upon success, and writes result into sha1 */
-static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
+static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
        int i;
        git_SHA_CTX ctx;
        struct patch_id_t data;
-       char buffer[PATH_MAX * 4 + 20];
 
        git_SHA1_Init(&ctx);
        memset(&data, 0, sizeof(struct patch_id_t));
@@ -4570,41 +4610,35 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1,
                if (DIFF_PAIR_UNMERGED(p))
                        continue;
 
-               diff_fill_sha1_info(p->one);
-               diff_fill_sha1_info(p->two);
+               diff_fill_oid_info(p->one);
+               diff_fill_oid_info(p->two);
 
                len1 = remove_space(p->one->path, strlen(p->one->path));
                len2 = remove_space(p->two->path, strlen(p->two->path));
-               if (p->one->mode == 0)
-                       len1 = snprintf(buffer, sizeof(buffer),
-                                       "diff--gita/%.*sb/%.*s"
-                                       "newfilemode%06o"
-                                       "---/dev/null"
-                                       "+++b/%.*s",
-                                       len1, p->one->path,
-                                       len2, p->two->path,
-                                       p->two->mode,
-                                       len2, p->two->path);
-               else if (p->two->mode == 0)
-                       len1 = snprintf(buffer, sizeof(buffer),
-                                       "diff--gita/%.*sb/%.*s"
-                                       "deletedfilemode%06o"
-                                       "---a/%.*s"
-                                       "+++/dev/null",
-                                       len1, p->one->path,
-                                       len2, p->two->path,
-                                       p->one->mode,
-                                       len1, p->one->path);
-               else
-                       len1 = snprintf(buffer, sizeof(buffer),
-                                       "diff--gita/%.*sb/%.*s"
-                                       "---a/%.*s"
-                                       "+++b/%.*s",
-                                       len1, p->one->path,
-                                       len2, p->two->path,
-                                       len1, p->one->path,
-                                       len2, p->two->path);
-               git_SHA1_Update(&ctx, buffer, len1);
+               patch_id_add_string(&ctx, "diff--git");
+               patch_id_add_string(&ctx, "a/");
+               git_SHA1_Update(&ctx, p->one->path, len1);
+               patch_id_add_string(&ctx, "b/");
+               git_SHA1_Update(&ctx, p->two->path, len2);
+
+               if (p->one->mode == 0) {
+                       patch_id_add_string(&ctx, "newfilemode");
+                       patch_id_add_mode(&ctx, p->two->mode);
+                       patch_id_add_string(&ctx, "---/dev/null");
+                       patch_id_add_string(&ctx, "+++b/");
+                       git_SHA1_Update(&ctx, p->two->path, len2);
+               } else if (p->two->mode == 0) {
+                       patch_id_add_string(&ctx, "deletedfilemode");
+                       patch_id_add_mode(&ctx, p->one->mode);
+                       patch_id_add_string(&ctx, "---a/");
+                       git_SHA1_Update(&ctx, p->one->path, len1);
+                       patch_id_add_string(&ctx, "+++/dev/null");
+               } else {
+                       patch_id_add_string(&ctx, "---a/");
+                       git_SHA1_Update(&ctx, p->one->path, len1);
+                       patch_id_add_string(&ctx, "+++b/");
+                       git_SHA1_Update(&ctx, p->two->path, len2);
+               }
 
                if (diff_header_only)
                        continue;
@@ -4616,9 +4650,9 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1,
                if (diff_filespec_is_binary(p->one) ||
                    diff_filespec_is_binary(p->two)) {
                        git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid),
-                                       40);
+                                       GIT_SHA1_HEXSZ);
                        git_SHA1_Update(&ctx, oid_to_hex(&p->two->oid),
-                                       40);
+                                       GIT_SHA1_HEXSZ);
                        continue;
                }
 
@@ -4631,15 +4665,15 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1,
                                     p->one->path);
        }
 
-       git_SHA1_Final(sha1, &ctx);
+       git_SHA1_Final(oid->hash, &ctx);
        return 0;
 }
 
-int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
+int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only)
 {
        struct diff_queue_struct *q = &diff_queued_diff;
        int i;
-       int result = diff_get_patch_id(options, sha1, diff_header_only);
+       int result = diff_get_patch_id(options, oid, diff_header_only);
 
        for (i = 0; i < q->nr; i++)
                diff_free_filepair(q->queue[i]);
@@ -4767,9 +4801,7 @@ void diff_flush(struct diff_options *options)
                 */
                if (options->close_file)
                        fclose(options->file);
-               options->file = fopen("/dev/null", "w");
-               if (!options->file)
-                       die_errno("Could not open /dev/null");
+               options->file = xfopen("/dev/null", "w");
                options->close_file = 1;
                for (i = 0; i < q->nr; i++) {
                        struct diff_filepair *p = q->queue[i];
@@ -5041,8 +5073,8 @@ static int is_submodule_ignored(const char *path, struct diff_options *options)
 
 void diff_addremove(struct diff_options *options,
                    int addremove, unsigned mode,
-                   const unsigned char *sha1,
-                   int sha1_valid,
+                   const struct object_id *oid,
+                   int oid_valid,
                    const char *concatpath, unsigned dirty_submodule)
 {
        struct diff_filespec *one, *two;
@@ -5074,9 +5106,9 @@ void diff_addremove(struct diff_options *options,
        two = alloc_filespec(concatpath);
 
        if (addremove != '+')
-               fill_filespec(one, sha1, sha1_valid, mode);
+               fill_filespec(one, oid, oid_valid, mode);
        if (addremove != '-') {
-               fill_filespec(two, sha1, sha1_valid, mode);
+               fill_filespec(two, oid, oid_valid, mode);
                two->dirty_submodule = dirty_submodule;
        }
 
@@ -5087,9 +5119,9 @@ void diff_addremove(struct diff_options *options,
 
 void diff_change(struct diff_options *options,
                 unsigned old_mode, unsigned new_mode,
-                const unsigned char *old_sha1,
-                const unsigned char *new_sha1,
-                int old_sha1_valid, int new_sha1_valid,
+                const struct object_id *old_oid,
+                const struct object_id *new_oid,
+                int old_oid_valid, int new_oid_valid,
                 const char *concatpath,
                 unsigned old_dirty_submodule, unsigned new_dirty_submodule)
 {
@@ -5101,14 +5133,10 @@ void diff_change(struct diff_options *options,
                return;
 
        if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
-               unsigned tmp;
-               const unsigned char *tmp_c;
-               tmp = old_mode; old_mode = new_mode; new_mode = tmp;
-               tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
-               tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
-                       new_sha1_valid = tmp;
-               tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule;
-                       new_dirty_submodule = tmp;
+               SWAP(old_mode, new_mode);
+               SWAP(old_oid, new_oid);
+               SWAP(old_oid_valid, new_oid_valid);
+               SWAP(old_dirty_submodule, new_dirty_submodule);
        }
 
        if (options->prefix &&
@@ -5117,8 +5145,8 @@ void diff_change(struct diff_options *options,
 
        one = alloc_filespec(concatpath);
        two = alloc_filespec(concatpath);
-       fill_filespec(one, old_sha1, old_sha1_valid, old_mode);
-       fill_filespec(two, new_sha1, new_sha1_valid, new_mode);
+       fill_filespec(one, old_oid, old_oid_valid, old_mode);
+       fill_filespec(two, new_oid, new_oid_valid, new_mode);
        one->dirty_submodule = old_dirty_submodule;
        two->dirty_submodule = new_dirty_submodule;
        p = diff_queue(&diff_queued_diff, one, two);
@@ -5208,7 +5236,7 @@ size_t fill_textconv(struct userdiff_driver *driver,
 
        if (driver->textconv_cache && df->oid_valid) {
                *outbuf = notes_cache_get(driver->textconv_cache,
-                                         df->oid.hash,
+                                         &df->oid,
                                          &size);
                if (*outbuf)
                        return size;
@@ -5220,7 +5248,7 @@ size_t fill_textconv(struct userdiff_driver *driver,
 
        if (driver->textconv_cache && df->oid_valid) {
                /* ignore errors, as we might be in a readonly repository */
-               notes_cache_put(driver->textconv_cache, df->oid.hash, *outbuf,
+               notes_cache_put(driver->textconv_cache, &df->oid, *outbuf,
                                size);
                /*
                 * we could save up changes and flush them all at the end,
@@ -5234,6 +5262,29 @@ size_t fill_textconv(struct userdiff_driver *driver,
        return size;
 }
 
+int textconv_object(const char *path,
+                   unsigned mode,
+                   const struct object_id *oid,
+                   int oid_valid,
+                   char **buf,
+                   unsigned long *buf_size)
+{
+       struct diff_filespec *df;
+       struct userdiff_driver *textconv;
+
+       df = alloc_filespec(path);
+       fill_filespec(df, oid, oid_valid, mode);
+       textconv = get_textconv(df);
+       if (!textconv) {
+               free_filespec(df);
+               return 0;
+       }
+
+       *buf_size = fill_textconv(textconv, df, buf);
+       free_filespec(df);
+       return 1;
+}
+
 void setup_diff_pager(struct diff_options *opt)
 {
        /*