Merge branch 'js/diff-color-words'
authorJunio C Hamano <gitster@pobox.com>
Mon, 26 Jan 2009 01:13:29 +0000 (17:13 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 26 Jan 2009 01:13:29 +0000 (17:13 -0800)
* js/diff-color-words:
Change the spelling of "wordregex".
color-words: Support diff.wordregex config option
color-words: make regex configurable via attributes
color-words: expand docs with precise semantics
color-words: enable REG_NEWLINE to help user
color-words: take an optional regular expression describing words
color-words: change algorithm to allow for 0-character word boundaries
color-words: refactor word splitting and use ALLOC_GROW()
Add color_fwrite_lines(), a function coloring each line individually

1  2 
Documentation/config.txt
Documentation/diff-options.txt
color.c
color.h
diff.c
diff --combined Documentation/config.txt
index 290cb48eb9dc959c380de94a787650c7d8b64436,332213e65d0f4972906f6804d4994842a1d7f6f3..e2b8775dd308d66027494d8705643c30581e2e65
@@@ -635,10 -635,16 +635,16 @@@ diff.renames:
        will enable basic rename detection.  If set to "copies" or
        "copy", it will detect copies, as well.
  
 -diff.suppress-blank-empty::
 +diff.suppressBlankEmpty::
        A boolean to inhibit the standard behavior of printing a space
        before each empty output line. Defaults to false.
  
+ diff.wordRegex::
+       A POSIX Extended Regular Expression used to determine what is a "word"
+       when performing word-by-word difference calculations.  Character
+       sequences that match the regular expression are "words", all other
+       characters are *ignorable* whitespace.
  fetch.unpackLimit::
        If the number of objects fetched over the git native
        transfer is below this
@@@ -702,9 -708,7 +708,9 @@@ gc.packrefs:
  
  gc.pruneexpire::
        When 'git-gc' is run, it will call 'prune --expire 2.weeks.ago'.
 -      Override the grace period with this config variable.
 +      Override the grace period with this config variable.  The value
 +      "now" may be used to disable this  grace period and always prune
 +      unreachable objects immediately.
  
  gc.reflogexpire::
        'git-reflog expire' removes reflog entries older than
@@@ -725,10 -729,6 +731,10 @@@ gc.rerereunresolved:
        kept for this many days when 'git-rerere gc' is run.
        The default is 15 days.  See linkgit:git-rerere[1].
  
 +gitcvs.commitmsgannotation::
 +      Append this string to each commit message. Set to empty string
 +      to disable this feature. Defaults to "via git-CVS emulator".
 +
  gitcvs.enabled::
        Whether the CVS server interface is enabled for this repository.
        See linkgit:git-cvsserver[1].
index a8ed23e7f8e6d58bb08bd2926e5d2114524c2c0a,164e2c5348cd0210a9848c7eb21947c7d0369bd4..813a7b11b99d51d3014f854770384ed4fc247c9f
@@@ -36,9 -36,6 +36,9 @@@ endif::git-format-patch[
  --patch-with-raw::
        Synonym for "-p --raw".
  
 +--patience::
 +      Generate a diff using the "patience diff" algorithm.
 +
  --stat[=width[,name-width]]::
        Generate a diffstat.  You can override the default
        output width for 80-column terminal by "--stat=width".
        Turn off colored diff, even when the configuration file
        gives the default to color output.
  
- --color-words::
-       Show colored word diff, i.e. color words which have changed.
+ --color-words[=<regex>]::
+       Show colored word diff, i.e., color words which have changed.
+       By default, words are separated by whitespace.
+ +
+ When a <regex> is specified, every non-overlapping match of the
+ <regex> is considered a word.  Anything between these matches is
+ considered whitespace and ignored(!) for the purposes of finding
+ differences.  You may want to append `|[^[:space:]]` to your regular
+ expression to make sure that it matches all non-whitespace characters.
+ A match that contains a newline is silently truncated(!) at the
+ newline.
+ +
+ The regex can also be set via a diff driver or configuration option, see
+ linkgit:gitattributes[1] or linkgit:git-config[1].  Giving it explicitly
+ overrides any diff driver or configuration setting.  Diff drivers
+ override configuration settings.
  
  --no-renames::
        Turn off rename detection, even when the configuration
diff --combined color.c
index 915d7a97f67dfd8459e2adc444acab777e503ddd,d4ae83f9b7b2075aed27e0709e4cc99a76755e13..db4dccfb77d80c9bd8981719fe2d0dc17c6772b6
+++ b/color.c
@@@ -40,41 -40,30 +40,41 @@@ static int parse_attr(const char *name
  }
  
  void color_parse(const char *value, const char *var, char *dst)
 +{
 +      color_parse_mem(value, strlen(value), var, dst);
 +}
 +
 +void color_parse_mem(const char *value, int value_len, const char *var,
 +              char *dst)
  {
        const char *ptr = value;
 +      int len = value_len;
        int attr = -1;
        int fg = -2;
        int bg = -2;
  
 -      if (!strcasecmp(value, "reset")) {
 +      if (!strncasecmp(value, "reset", len)) {
                strcpy(dst, "\033[m");
                return;
        }
  
        /* [fg [bg]] [attr] */
 -      while (*ptr) {
 +      while (len > 0) {
                const char *word = ptr;
 -              int val, len = 0;
 +              int val, wordlen = 0;
  
 -              while (word[len] && !isspace(word[len]))
 -                      len++;
 +              while (len > 0 && !isspace(word[wordlen])) {
 +                      wordlen++;
 +                      len--;
 +              }
  
 -              ptr = word + len;
 -              while (*ptr && isspace(*ptr))
 +              ptr = word + wordlen;
 +              while (len > 0 && isspace(*ptr)) {
                        ptr++;
 +                      len--;
 +              }
  
 -              val = parse_color(word, len);
 +              val = parse_color(word, wordlen);
                if (val >= -1) {
                        if (fg == -2) {
                                fg = val;
@@@ -86,7 -75,7 +86,7 @@@
                        }
                        goto bad;
                }
 -              val = parse_attr(word, len);
 +              val = parse_attr(word, wordlen);
                if (val < 0 || attr != -1)
                        goto bad;
                attr = val;
        *dst = 0;
        return;
  bad:
 -      die("bad config value '%s' for variable '%s'", value, var);
 +      die("bad color value '%.*s' for variable '%s'", value_len, value, var);
  }
  
  int git_config_colorbool(const char *var, const char *value, int stdout_is_tty)
@@@ -202,3 -191,31 +202,31 @@@ int color_fprintf_ln(FILE *fp, const ch
        va_end(args);
        return r;
  }
+ /*
+  * This function splits the buffer by newlines and colors the lines individually.
+  *
+  * Returns 0 on success.
+  */
+ int color_fwrite_lines(FILE *fp, const char *color,
+               size_t count, const char *buf)
+ {
+       if (!*color)
+               return fwrite(buf, count, 1, fp) != 1;
+       while (count) {
+               char *p = memchr(buf, '\n', count);
+               if (p != buf && (fputs(color, fp) < 0 ||
+                               fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
+                               fputs(COLOR_RESET, fp) < 0))
+                       return -1;
+               if (!p)
+                       return 0;
+               if (fputc('\n', fp) < 0)
+                       return -1;
+               count -= p + 1 - buf;
+               buf = p + 1;
+       }
+       return 0;
+ }
diff --combined color.h
index 70660999df4b937b542fff6163b798bf16841b7f,cd5c985eb7ee51c332020722fdf6282637822f37..5019df82f79f1888b3aa57b9752a6a55b13f475a
+++ b/color.h
@@@ -16,9 -16,9 +16,10 @@@ extern int git_use_color_default
  int git_color_default_config(const char *var, const char *value, void *cb);
  
  int git_config_colorbool(const char *var, const char *value, int stdout_is_tty);
 -void color_parse(const char *var, const char *value, char *dst);
 +void color_parse(const char *value, const char *var, char *dst);
 +void color_parse_mem(const char *value, int len, const char *var, char *dst);
  int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
  int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+ int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
  
  #endif /* COLOR_H */
diff --combined diff.c
index 82cff975b3512b6b31d78a31cfab4cca3d575cc5,ed8b83c68f1304e116f2ca841d76d513e46ea0c4..972b3daa6578776ca2f262d8e4d3290bae64e234
--- 1/diff.c
--- 2/diff.c
+++ b/diff.c
@@@ -23,6 -23,7 +23,7 @@@ static int diff_detect_rename_default
  static int diff_rename_limit_default = 200;
  static int diff_suppress_blank_empty;
  int diff_use_color_default = -1;
+ static const char *diff_word_regex_cfg;
  static const char *external_diff_cmd_cfg;
  int diff_auto_refresh_index = 1;
  static int diff_mnemonic_prefix;
@@@ -92,6 -93,8 +93,8 @@@ int git_diff_ui_config(const char *var
        }
        if (!strcmp(var, "diff.external"))
                return git_config_string(&external_diff_cmd_cfg, var, value);
+       if (!strcmp(var, "diff.wordregex"))
+               return git_config_string(&diff_word_regex_cfg, var, value);
  
        return git_diff_basic_config(var, value, cb);
  }
@@@ -118,9 -121,7 +121,9 @@@ int git_diff_basic_config(const char *v
        }
  
        /* like GNU diff's --suppress-blank-empty option  */
 -      if (!strcmp(var, "diff.suppress-blank-empty")) {
 +      if (!strcmp(var, "diff.suppressblankempty") ||
 +                      /* for backwards compatibility */
 +                      !strcmp(var, "diff.suppress-blank-empty")) {
                diff_suppress_blank_empty = git_config_bool(var, value);
                return 0;
        }
@@@ -321,82 -322,138 +324,138 @@@ static int fill_mmfile(mmfile_t *mf, st
  struct diff_words_buffer {
        mmfile_t text;
        long alloc;
-       long current; /* output pointer */
-       int suppressed_newline;
+       struct diff_words_orig {
+               const char *begin, *end;
+       } *orig;
+       int orig_nr, orig_alloc;
  };
  
  static void diff_words_append(char *line, unsigned long len,
                struct diff_words_buffer *buffer)
  {
-       if (buffer->text.size + len > buffer->alloc) {
-               buffer->alloc = (buffer->text.size + len) * 3 / 2;
-               buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
-       }
+       ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc);
        line++;
        len--;
        memcpy(buffer->text.ptr + buffer->text.size, line, len);
        buffer->text.size += len;
+       buffer->text.ptr[buffer->text.size] = '\0';
  }
  
  struct diff_words_data {
        struct diff_words_buffer minus, plus;
+       const char *current_plus;
        FILE *file;
+       regex_t *word_regex;
  };
  
- static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
-               int suppress_newline)
+ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
  {
-       const char *ptr;
-       int eol = 0;
+       struct diff_words_data *diff_words = priv;
+       int minus_first, minus_len, plus_first, plus_len;
+       const char *minus_begin, *minus_end, *plus_begin, *plus_end;
  
-       if (len == 0)
+       if (line[0] != '@' || parse_hunk_header(line, len,
+                       &minus_first, &minus_len, &plus_first, &plus_len))
                return;
  
-       ptr  = buffer->text.ptr + buffer->current;
-       buffer->current += len;
-       if (ptr[len - 1] == '\n') {
-               eol = 1;
-               len--;
+       /* POSIX requires that first be decremented by one if len == 0... */
+       if (minus_len) {
+               minus_begin = diff_words->minus.orig[minus_first].begin;
+               minus_end =
+                       diff_words->minus.orig[minus_first + minus_len - 1].end;
+       } else
+               minus_begin = minus_end =
+                       diff_words->minus.orig[minus_first].end;
+       if (plus_len) {
+               plus_begin = diff_words->plus.orig[plus_first].begin;
+               plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end;
+       } else
+               plus_begin = plus_end = diff_words->plus.orig[plus_first].end;
+       if (diff_words->current_plus != plus_begin)
+               fwrite(diff_words->current_plus,
+                               plus_begin - diff_words->current_plus, 1,
+                               diff_words->file);
+       if (minus_begin != minus_end)
+               color_fwrite_lines(diff_words->file,
+                               diff_get_color(1, DIFF_FILE_OLD),
+                               minus_end - minus_begin, minus_begin);
+       if (plus_begin != plus_end)
+               color_fwrite_lines(diff_words->file,
+                               diff_get_color(1, DIFF_FILE_NEW),
+                               plus_end - plus_begin, plus_begin);
+       diff_words->current_plus = plus_end;
+ }
+ /* This function starts looking at *begin, and returns 0 iff a word was found. */
+ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
+               int *begin, int *end)
+ {
+       if (word_regex && *begin < buffer->size) {
+               regmatch_t match[1];
+               if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+                       char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
+                                       '\n', match[0].rm_eo - match[0].rm_so);
+                       *end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
+                       *begin += match[0].rm_so;
+                       return *begin >= *end;
+               }
+               return -1;
        }
  
-       fputs(diff_get_color(1, color), file);
-       fwrite(ptr, len, 1, file);
-       fputs(diff_get_color(1, DIFF_RESET), file);
+       /* find the next word */
+       while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
+               (*begin)++;
+       if (*begin >= buffer->size)
+               return -1;
  
-       if (eol) {
-               if (suppress_newline)
-                       buffer->suppressed_newline = 1;
-               else
-                       putc('\n', file);
-       }
+       /* find the end of the word */
+       *end = *begin + 1;
+       while (*end < buffer->size && !isspace(buffer->ptr[*end]))
+               (*end)++;
+       return 0;
  }
  
- static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
+ /*
+  * This function splits the words in buffer->text, stores the list with
+  * newline separator into out, and saves the offsets of the original words
+  * in buffer->orig.
+  */
+ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
+               regex_t *word_regex)
  {
-       struct diff_words_data *diff_words = priv;
+       int i, j;
+       long alloc = 0;
  
-       if (diff_words->minus.suppressed_newline) {
-               if (line[0] != '+')
-                       putc('\n', diff_words->file);
-               diff_words->minus.suppressed_newline = 0;
-       }
+       out->size = 0;
+       out->ptr = NULL;
  
-       len--;
-       switch (line[0]) {
-               case '-':
-                       print_word(diff_words->file,
-                                  &diff_words->minus, len, DIFF_FILE_OLD, 1);
-                       break;
-               case '+':
-                       print_word(diff_words->file,
-                                  &diff_words->plus, len, DIFF_FILE_NEW, 0);
-                       break;
-               case ' ':
-                       print_word(diff_words->file,
-                                  &diff_words->plus, len, DIFF_PLAIN, 0);
-                       diff_words->minus.current += len;
-                       break;
+       /* fake an empty "0th" word */
+       ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
+       buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
+       buffer->orig_nr = 1;
+       for (i = 0; i < buffer->text.size; i++) {
+               if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
+                       return;
+               /* store original boundaries */
+               ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
+                               buffer->orig_alloc);
+               buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i;
+               buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j;
+               buffer->orig_nr++;
+               /* store one word */
+               ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
+               memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
+               out->ptr[out->size + j - i] = '\n';
+               out->size += j - i + 1;
+               i = j - 1;
        }
  }
  
@@@ -407,38 -464,36 +466,36 @@@ static void diff_words_show(struct diff
        xdemitconf_t xecfg;
        xdemitcb_t ecb;
        mmfile_t minus, plus;
-       int i;
+       /* special case: only removal */
+       if (!diff_words->plus.text.size) {
+               color_fwrite_lines(diff_words->file,
+                       diff_get_color(1, DIFF_FILE_OLD),
+                       diff_words->minus.text.size, diff_words->minus.text.ptr);
+               diff_words->minus.text.size = 0;
+               return;
+       }
+       diff_words->current_plus = diff_words->plus.text.ptr;
  
        memset(&xpp, 0, sizeof(xpp));
        memset(&xecfg, 0, sizeof(xecfg));
-       minus.size = diff_words->minus.text.size;
-       minus.ptr = xmalloc(minus.size);
-       memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-       for (i = 0; i < minus.size; i++)
-               if (isspace(minus.ptr[i]))
-                       minus.ptr[i] = '\n';
-       diff_words->minus.current = 0;
-       plus.size = diff_words->plus.text.size;
-       plus.ptr = xmalloc(plus.size);
-       memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-       for (i = 0; i < plus.size; i++)
-               if (isspace(plus.ptr[i]))
-                       plus.ptr[i] = '\n';
-       diff_words->plus.current = 0;
+       diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
+       diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
        xpp.flags = XDF_NEED_MINIMAL;
-       xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+       /* as only the hunk header will be parsed, we need a 0-context */
+       xecfg.ctxlen = 0;
        xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
                      &xpp, &xecfg, &ecb);
        free(minus.ptr);
        free(plus.ptr);
+       if (diff_words->current_plus != diff_words->plus.text.ptr +
+                       diff_words->plus.text.size)
+               fwrite(diff_words->current_plus,
+                       diff_words->plus.text.ptr + diff_words->plus.text.size
+                       - diff_words->current_plus, 1,
+                       diff_words->file);
        diff_words->minus.text.size = diff_words->plus.text.size = 0;
-       if (diff_words->minus.suppressed_newline) {
-               putc('\n', diff_words->file);
-               diff_words->minus.suppressed_newline = 0;
-       }
  }
  
  typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
@@@ -462,7 -517,10 +519,10 @@@ static void free_diff_words_data(struc
                        diff_words_show(ecbdata->diff_words);
  
                free (ecbdata->diff_words->minus.text.ptr);
+               free (ecbdata->diff_words->minus.orig);
                free (ecbdata->diff_words->plus.text.ptr);
+               free (ecbdata->diff_words->plus.orig);
+               free(ecbdata->diff_words->word_regex);
                free(ecbdata->diff_words);
                ecbdata->diff_words = NULL;
        }
@@@ -1325,6 -1383,12 +1385,12 @@@ static const struct userdiff_funcname *
        return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
  }
  
+ static const char *userdiff_word_regex(struct diff_filespec *one)
+ {
+       diff_filespec_load_driver(one);
+       return one->driver->word_regex;
+ }
  void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
  {
        if (!options->a_prefix)
@@@ -1485,6 -1549,21 +1551,21 @@@ static void builtin_diff(const char *na
                        ecbdata.diff_words =
                                xcalloc(1, sizeof(struct diff_words_data));
                        ecbdata.diff_words->file = o->file;
+                       if (!o->word_regex)
+                               o->word_regex = userdiff_word_regex(one);
+                       if (!o->word_regex)
+                               o->word_regex = userdiff_word_regex(two);
+                       if (!o->word_regex)
+                               o->word_regex = diff_word_regex_cfg;
+                       if (o->word_regex) {
+                               ecbdata.diff_words->word_regex = (regex_t *)
+                                       xmalloc(sizeof(regex_t));
+                               if (regcomp(ecbdata.diff_words->word_regex,
+                                               o->word_regex,
+                                               REG_EXTENDED | REG_NEWLINE))
+                                       die ("Invalid regular expression: %s",
+                                                       o->word_regex);
+                       }
                }
                xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
                              &xpp, &xecfg, &ecb);
@@@ -2474,8 -2553,6 +2555,8 @@@ int diff_opt_parse(struct diff_options 
                options->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE;
        else if (!strcmp(arg, "--ignore-space-at-eol"))
                options->xdl_opts |= XDF_IGNORE_WHITESPACE_AT_EOL;
 +      else if (!strcmp(arg, "--patience"))
 +              options->xdl_opts |= XDF_PATIENCE_DIFF;
  
        /* flags options */
        else if (!strcmp(arg, "--binary")) {
                DIFF_OPT_CLR(options, COLOR_DIFF);
        else if (!strcmp(arg, "--color-words"))
                options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+       else if (!prefixcmp(arg, "--color-words=")) {
+               options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+               options->word_regex = arg + 14;
+       }
        else if (!strcmp(arg, "--exit-code"))
                DIFF_OPT_SET(options, EXIT_WITH_STATUS);
        else if (!strcmp(arg, "--quiet"))