Merge branch 'jk/diff-compact-heuristic'
authorJunio C Hamano <gitster@pobox.com>
Fri, 6 May 2016 21:45:46 +0000 (14:45 -0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 6 May 2016 21:45:46 +0000 (14:45 -0700)
Patch output from "git diff" and friends has been tweaked to be
more readable by using a blank line as a strong hint that the
contents before and after it belong to a logically separate unit.

* jk/diff-compact-heuristic:
diff: undocument the compaction heuristic knobs for experimentation
xdiff: implement empty line chunk heuristic
xdiff: add recs_match helper function

1  2 
diff.c
xdiff/xdiff.h
diff --combined diff.c
index 4dfe6609d059b56c5ab4dd31c8d2139c7fa90abb,05ca3ce08e21082f66ea9a8c6d0b51d1e56c6815..d3734d3181b7eebedf80bf6f3d534c61961dca9f
--- 1/diff.c
--- 2/diff.c
+++ b/diff.c
@@@ -2,7 -2,6 +2,7 @@@
   * Copyright (C) 2005 Junio C Hamano
   */
  #include "cache.h"
 +#include "tempfile.h"
  #include "quote.h"
  #include "diff.h"
  #include "diffcore.h"
@@@ -13,7 -12,7 +13,7 @@@
  #include "run-command.h"
  #include "utf8.h"
  #include "userdiff.h"
 -#include "sigchain.h"
 +#include "submodule-config.h"
  #include "submodule.h"
  #include "ll-merge.h"
  #include "string-list.h"
@@@ -26,6 -25,7 +26,7 @@@
  #endif
  
  static int diff_detect_rename_default;
+ static int diff_compaction_heuristic = 1;
  static int diff_rename_limit_default = 400;
  static int diff_suppress_blank_empty;
  static int diff_use_color_default = -1;
@@@ -168,11 -168,6 +169,11 @@@ long parse_algorithm_value(const char *
   * never be affected by the setting of diff.renames
   * the user happens to have in the configuration file.
   */
 +void init_diff_ui_defaults(void)
 +{
 +      diff_detect_rename_default = 1;
 +}
 +
  int git_diff_ui_config(const char *var, const char *value, void *cb)
  {
        if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) {
                diff_detect_rename_default = git_config_rename(var, value);
                return 0;
        }
+       if (!strcmp(var, "diff.compactionheuristic")) {
+               diff_compaction_heuristic = git_config_bool(var, value);
+               return 0;
+       }
        if (!strcmp(var, "diff.autorefreshindex")) {
                diff_auto_refresh_index = git_config_bool(var, value);
                return 0;
@@@ -314,26 -313,11 +319,26 @@@ static const char *external_diff(void
        return external_diff_cmd;
  }
  
 +/*
 + * Keep track of files used for diffing. Sometimes such an entry
 + * refers to a temporary file, sometimes to an existing file, and
 + * sometimes to "/dev/null".
 + */
  static struct diff_tempfile {
 -      const char *name; /* filename external diff should read from */
 -      char hex[41];
 +      /*
 +       * filename external diff should read from, or NULL if this
 +       * entry is currently not in use:
 +       */
 +      const char *name;
 +
 +      char hex[GIT_SHA1_HEXSZ + 1];
        char mode[10];
 -      char tmp_path[PATH_MAX];
 +
 +      /*
 +       * If this diff_tempfile instance refers to a temporary file,
 +       * this tempfile object is used to manage its lifetime.
 +       */
 +      struct tempfile tempfile;
  } diff_temp[2];
  
  typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
@@@ -499,59 -483,26 +504,59 @@@ static int new_blank_line_at_eof(struc
        return ws_blank_line(line, len, ecbdata->ws_rule);
  }
  
 -static void emit_add_line(const char *reset,
 -                        struct emit_callback *ecbdata,
 -                        const char *line, int len)
 +static void emit_line_checked(const char *reset,
 +                            struct emit_callback *ecbdata,
 +                            const char *line, int len,
 +                            enum color_diff color,
 +                            unsigned ws_error_highlight,
 +                            char sign)
  {
 -      const char *ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE);
 -      const char *set = diff_get_color(ecbdata->color_diff, DIFF_FILE_NEW);
 +      const char *set = diff_get_color(ecbdata->color_diff, color);
 +      const char *ws = NULL;
 +
 +      if (ecbdata->opt->ws_error_highlight & ws_error_highlight) {
 +              ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE);
 +              if (!*ws)
 +                      ws = NULL;
 +      }
  
 -      if (!*ws)
 -              emit_line_0(ecbdata->opt, set, reset, '+', line, len);
 -      else if (new_blank_line_at_eof(ecbdata, line, len))
 +      if (!ws)
 +              emit_line_0(ecbdata->opt, set, reset, sign, line, len);
 +      else if (sign == '+' && new_blank_line_at_eof(ecbdata, line, len))
                /* Blank line at EOF - paint '+' as well */
 -              emit_line_0(ecbdata->opt, ws, reset, '+', line, len);
 +              emit_line_0(ecbdata->opt, ws, reset, sign, line, len);
        else {
                /* Emit just the prefix, then the rest. */
 -              emit_line_0(ecbdata->opt, set, reset, '+', "", 0);
 +              emit_line_0(ecbdata->opt, set, reset, sign, "", 0);
                ws_check_emit(line, len, ecbdata->ws_rule,
                              ecbdata->opt->file, set, reset, ws);
        }
  }
  
 +static void emit_add_line(const char *reset,
 +                        struct emit_callback *ecbdata,
 +                        const char *line, int len)
 +{
 +      emit_line_checked(reset, ecbdata, line, len,
 +                        DIFF_FILE_NEW, WSEH_NEW, '+');
 +}
 +
 +static void emit_del_line(const char *reset,
 +                        struct emit_callback *ecbdata,
 +                        const char *line, int len)
 +{
 +      emit_line_checked(reset, ecbdata, line, len,
 +                        DIFF_FILE_OLD, WSEH_OLD, '-');
 +}
 +
 +static void emit_context_line(const char *reset,
 +                            struct emit_callback *ecbdata,
 +                            const char *line, int len)
 +{
 +      emit_line_checked(reset, ecbdata, line, len,
 +                        DIFF_CONTEXT, WSEH_CONTEXT, ' ');
 +}
 +
  static void emit_hunk_header(struct emit_callback *ecbdata,
                             const char *line, int len)
  {
@@@ -618,16 -569,25 +623,16 @@@ static struct diff_tempfile *claim_diff
        die("BUG: diff is failing to clean up its tempfiles");
  }
  
 -static int remove_tempfile_installed;
 -
  static void remove_tempfile(void)
  {
        int i;
        for (i = 0; i < ARRAY_SIZE(diff_temp); i++) {
 -              if (diff_temp[i].name == diff_temp[i].tmp_path)
 -                      unlink_or_warn(diff_temp[i].name);
 +              if (is_tempfile_active(&diff_temp[i].tempfile))
 +                      delete_tempfile(&diff_temp[i].tempfile);
                diff_temp[i].name = NULL;
        }
  }
  
 -static void remove_tempfile_on_signal(int signo)
 -{
 -      remove_tempfile();
 -      sigchain_pop(signo);
 -      raise(signo);
 -}
 -
  static void print_line_count(FILE *file, int count)
  {
        switch (count) {
@@@ -648,6 -608,7 +653,6 @@@ static void emit_rewrite_lines(struct e
  {
        const char *endp = NULL;
        static const char *nneof = " No newline at end of file\n";
 -      const char *old = diff_get_color(ecb->color_diff, DIFF_FILE_OLD);
        const char *reset = diff_get_color(ecb->color_diff, DIFF_RESET);
  
        while (0 < size) {
                len = endp ? (endp - data + 1) : size;
                if (prefix != '+') {
                        ecb->lno_in_preimage++;
 -                      emit_line_0(ecb->opt, old, reset, '-',
 -                                  data, len);
 +                      emit_del_line(reset, ecb, data, len);
                } else {
                        ecb->lno_in_postimage++;
                        emit_add_line(reset, ecb, data, len);
@@@ -1294,27 -1256,17 +1299,27 @@@ static void fn_out_consume(void *priv, 
                return;
        }
  
 -      if (line[0] != '+') {
 -              const char *color =
 -                      diff_get_color(ecbdata->color_diff,
 -                                     line[0] == '-' ? DIFF_FILE_OLD : DIFF_CONTEXT);
 -              ecbdata->lno_in_preimage++;
 -              if (line[0] == ' ')
 -                      ecbdata->lno_in_postimage++;
 -              emit_line(ecbdata->opt, color, reset, line, len);
 -      } else {
 +      switch (line[0]) {
 +      case '+':
                ecbdata->lno_in_postimage++;
                emit_add_line(reset, ecbdata, line + 1, len - 1);
 +              break;
 +      case '-':
 +              ecbdata->lno_in_preimage++;
 +              emit_del_line(reset, ecbdata, line + 1, len - 1);
 +              break;
 +      case ' ':
 +              ecbdata->lno_in_postimage++;
 +              ecbdata->lno_in_preimage++;
 +              emit_context_line(reset, ecbdata, line + 1, len - 1);
 +              break;
 +      default:
 +              /* incomplete line at the end */
 +              ecbdata->lno_in_preimage++;
 +              emit_line(ecbdata->opt,
 +                        diff_get_color(ecbdata->color_diff, DIFF_CONTEXT),
 +                        reset, line, len);
 +              break;
        }
  }
  
@@@ -2612,9 -2564,12 +2617,9 @@@ static void builtin_checkdiff(const cha
  
  struct diff_filespec *alloc_filespec(const char *path)
  {
 -      int namelen = strlen(path);
 -      struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
 +      struct diff_filespec *spec;
  
 -      memset(spec, 0, sizeof(*spec));
 -      spec->path = (char *)(spec + 1);
 -      memcpy(spec->path, path, namelen+1);
 +      FLEXPTR_ALLOC_STR(spec, path, path);
        spec->count = 1;
        spec->is_binary = -1;
        return spec;
@@@ -2709,21 -2664,21 +2714,21 @@@ static int reuse_worktree_file(const ch
  
  static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
  {
 -      int len;
 -      char *data = xmalloc(100), *dirty = "";
 +      struct strbuf buf = STRBUF_INIT;
 +      char *dirty = "";
  
        /* Are we looking at the work tree? */
        if (s->dirty_submodule)
                dirty = "-dirty";
  
 -      len = snprintf(data, 100,
 -                     "Subproject commit %s%s\n", sha1_to_hex(s->sha1), dirty);
 -      s->data = data;
 -      s->size = len;
 -      s->should_free = 1;
 +      strbuf_addf(&buf, "Subproject commit %s%s\n", sha1_to_hex(s->sha1), dirty);
 +      s->size = buf.len;
        if (size_only) {
                s->data = NULL;
 -              free(data);
 +              strbuf_release(&buf);
 +      } else {
 +              s->data = strbuf_detach(&buf, NULL);
 +              s->should_free = 1;
        }
        return 0;
  }
@@@ -2871,7 -2826,8 +2876,7 @@@ static void prep_temp_blob(const char *
        strbuf_addstr(&template, "XXXXXX_");
        strbuf_addstr(&template, base);
  
 -      fd = git_mkstemps(temp->tmp_path, PATH_MAX, template.buf,
 -                      strlen(base) + 1);
 +      fd = mks_tempfile_ts(&temp->tempfile, template.buf, strlen(base) + 1);
        if (fd < 0)
                die_errno("unable to create temp-file");
        if (convert_to_working_tree(path,
        }
        if (write_in_full(fd, blob, size) != size)
                die_errno("unable to write temp-file");
 -      close(fd);
 -      temp->name = temp->tmp_path;
 -      strcpy(temp->hex, sha1_to_hex(sha1));
 -      temp->hex[40] = 0;
 -      sprintf(temp->mode, "%06o", mode);
 +      close_tempfile(&temp->tempfile);
 +      temp->name = get_tempfile_path(&temp->tempfile);
 +      sha1_to_hex_r(temp->hex, sha1);
 +      xsnprintf(temp->mode, sizeof(temp->mode), "%06o", mode);
        strbuf_release(&buf);
        strbuf_release(&template);
        free(path_dup);
@@@ -2901,11 -2858,17 +2906,11 @@@ static struct diff_tempfile *prepare_te
                 * a '+' entry produces this for file-1.
                 */
                temp->name = "/dev/null";
 -              strcpy(temp->hex, ".");
 -              strcpy(temp->mode, ".");
 +              xsnprintf(temp->hex, sizeof(temp->hex), ".");
 +              xsnprintf(temp->mode, sizeof(temp->mode), ".");
                return temp;
        }
  
 -      if (!remove_tempfile_installed) {
 -              atexit(remove_tempfile);
 -              sigchain_push_common(remove_tempfile_on_signal);
 -              remove_tempfile_installed = 1;
 -      }
 -
        if (!S_ISGITLINK(one->mode) &&
            (!one->sha1_valid ||
             reuse_worktree_file(name, one->sha1, 1))) {
                        /* we can borrow from the file in the work tree */
                        temp->name = name;
                        if (!one->sha1_valid)
 -                              strcpy(temp->hex, sha1_to_hex(null_sha1));
 +                              sha1_to_hex_r(temp->hex, null_sha1);
                        else
 -                              strcpy(temp->hex, sha1_to_hex(one->sha1));
 +                              sha1_to_hex_r(temp->hex, one->sha1);
                        /* Even though we may sometimes borrow the
                         * contents from the work tree, we always want
                         * one->mode.  mode is trustworthy even when
                         * !(one->sha1_valid), as long as
                         * DIFF_FILE_VALID(one).
                         */
 -                      sprintf(temp->mode, "%06o", one->mode);
 +                      xsnprintf(temp->mode, sizeof(temp->mode), "%06o", one->mode);
                }
                return temp;
        }
@@@ -3269,7 -3232,6 +3274,7 @@@ void diff_setup(struct diff_options *op
        options->rename_limit = -1;
        options->dirstat_permille = diff_dirstat_permille_default;
        options->context = diff_context_default;
 +      options->ws_error_highlight = WSEH_NEW;
        DIFF_OPT_SET(options, RENAME_EMPTY);
  
        /* pathchange left =NULL by default */
        options->use_color = diff_use_color_default;
        options->detect_rename = diff_detect_rename_default;
        options->xdl_opts |= diff_algorithm;
+       if (diff_compaction_heuristic)
+               DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
  
        options->orderfile = diff_order_file_cfg;
  
@@@ -3656,55 -3620,12 +3663,55 @@@ static void enable_patch_output(int *fm
        *fmt |= DIFF_FORMAT_PATCH;
  }
  
 -int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 +static int parse_one_token(const char **arg, const char *token)
 +{
 +      const char *rest;
 +      if (skip_prefix(*arg, token, &rest) && (!*rest || *rest == ',')) {
 +              *arg = rest;
 +              return 1;
 +      }
 +      return 0;
 +}
 +
 +static int parse_ws_error_highlight(struct diff_options *opt, const char *arg)
 +{
 +      const char *orig_arg = arg;
 +      unsigned val = 0;
 +      while (*arg) {
 +              if (parse_one_token(&arg, "none"))
 +                      val = 0;
 +              else if (parse_one_token(&arg, "default"))
 +                      val = WSEH_NEW;
 +              else if (parse_one_token(&arg, "all"))
 +                      val = WSEH_NEW | WSEH_OLD | WSEH_CONTEXT;
 +              else if (parse_one_token(&arg, "new"))
 +                      val |= WSEH_NEW;
 +              else if (parse_one_token(&arg, "old"))
 +                      val |= WSEH_OLD;
 +              else if (parse_one_token(&arg, "context"))
 +                      val |= WSEH_CONTEXT;
 +              else {
 +                      error("unknown value after ws-error-highlight=%.*s",
 +                            (int)(arg - orig_arg), orig_arg);
 +                      return 0;
 +              }
 +              if (*arg)
 +                      arg++;
 +      }
 +      opt->ws_error_highlight = val;
 +      return 1;
 +}
 +
 +int diff_opt_parse(struct diff_options *options,
 +                 const char **av, int ac, const char *prefix)
  {
        const char *arg = av[0];
        const char *optarg;
        int argcount;
  
 +      if (!prefix)
 +              prefix = "";
 +
        /* Output format options */
        if (!strcmp(arg, "-p") || !strcmp(arg, "-u") || !strcmp(arg, "--patch")
            || opt_arg(arg, 'U', "unified", &options->context))
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
        else if (!strcmp(arg, "--ignore-blank-lines"))
                DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
+       else if (!strcmp(arg, "--compaction-heuristic"))
+               DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
+       else if (!strcmp(arg, "--no-compaction-heuristic"))
+               DIFF_XDL_CLR(options, COMPACTION_HEURISTIC);
        else if (!strcmp(arg, "--patience"))
                options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
        else if (!strcmp(arg, "--histogram"))
                DIFF_OPT_SET(options, FIND_COPIES_HARDER);
        else if (!strcmp(arg, "--follow"))
                DIFF_OPT_SET(options, FOLLOW_RENAMES);
 -      else if (!strcmp(arg, "--no-follow"))
 +      else if (!strcmp(arg, "--no-follow")) {
                DIFF_OPT_CLR(options, FOLLOW_RENAMES);
 -      else if (!strcmp(arg, "--color"))
 +              DIFF_OPT_CLR(options, DEFAULT_FOLLOW_RENAMES);
 +      } else if (!strcmp(arg, "--color"))
                options->use_color = 1;
        else if (skip_prefix(arg, "--color=", &arg)) {
                int value = git_config_colorbool(NULL, arg);
                DIFF_OPT_SET(options, SUBMODULE_LOG);
        else if (skip_prefix(arg, "--submodule=", &arg))
                return parse_submodule_opt(options, arg);
 +      else if (skip_prefix(arg, "--ws-error-highlight=", &arg))
 +              return parse_ws_error_highlight(options, arg);
  
        /* misc options */
        else if (!strcmp(arg, "-z"))
        else if (!strcmp(arg, "--pickaxe-regex"))
                options->pickaxe_opts |= DIFF_PICKAXE_REGEX;
        else if ((argcount = short_opt('O', av, &optarg))) {
 -              options->orderfile = optarg;
 +              const char *path = prefix_filename(prefix, strlen(prefix), optarg);
 +              options->orderfile = xstrdup(path);
                return argcount;
        }
        else if ((argcount = parse_long_opt("diff-filter", av, &optarg))) {
        else if (!strcmp(arg, "--no-function-context"))
                DIFF_OPT_CLR(options, FUNCCONTEXT);
        else if ((argcount = parse_long_opt("output", av, &optarg))) {
 -              options->file = fopen(optarg, "w");
 +              const char *path = prefix_filename(prefix, strlen(prefix), optarg);
 +              options->file = fopen(path, "w");
                if (!options->file)
 -                      die_errno("Could not open '%s'", optarg);
 +                      die_errno("Could not open '%s'", path);
                options->close_file = 1;
                return argcount;
        } else
@@@ -4091,9 -4011,9 +4102,9 @@@ const char *diff_unique_abbrev(const un
        if (abblen < 37) {
                static char hex[41];
                if (len < abblen && abblen <= len + 2)
 -                      sprintf(hex, "%s%.*s", abbrev, len+3-abblen, "..");
 +                      xsnprintf(hex, sizeof(hex), "%s%.*s", abbrev, len+3-abblen, "..");
                else
 -                      sprintf(hex, "%s...", abbrev);
 +                      xsnprintf(hex, sizeof(hex), "%s...", abbrev);
                return hex;
        }
        return sha1_to_hex(sha1);
@@@ -5087,7 -5007,7 +5098,7 @@@ size_t fill_textconv(struct userdiff_dr
  {
        size_t size;
  
 -      if (!driver || !driver->textconv) {
 +      if (!driver) {
                if (!DIFF_FILE_VALID(df)) {
                        *outbuf = "";
                        return 0;
                return df->size;
        }
  
 +      if (!driver->textconv)
 +              die("BUG: fill_textconv called with non-textconv driver");
 +
        if (driver->textconv_cache && df->sha1_valid) {
                *outbuf = notes_cache_get(driver->textconv_cache, df->sha1,
                                          &size);
diff --combined xdiff/xdiff.h
index 4fb7e79410c22fba1fb390af2e09008e932f5ea8,d1dbb2750acb9da8f6e61d71ce4e51e689e6ae88..7423f77fc8b2682c121e8d4adb855337e107b29e
@@@ -41,7 -41,10 +41,9 @@@ extern "C" 
  
  #define XDF_IGNORE_BLANK_LINES (1 << 7)
  
+ #define XDF_COMPACTION_HEURISTIC (1 << 8)
  #define XDL_EMIT_FUNCNAMES (1 << 0)
 -#define XDL_EMIT_COMMON (1 << 1)
  #define XDL_EMIT_FUNCCONTEXT (1 << 2)
  
  #define XDL_MMB_READONLY (1 << 0)