diff: add --ignore-blank-lines option
authorAntoine Pelisse <apelisse@gmail.com>
Wed, 19 Jun 2013 18:46:07 +0000 (20:46 +0200)
committerJunio C Hamano <gitster@pobox.com>
Wed, 19 Jun 2013 22:17:45 +0000 (15:17 -0700)
The goal of the patch is to introduce the GNU diff
-B/--ignore-blank-lines as closely as possible. The short option is not
available because it's already used for "break-rewrites".

When this option is used, git-diff will not create hunks that simply
add or remove empty lines, but will still show empty lines
addition/suppression if they are close enough to "valuable" changes.

There are two differences between this option and GNU diff -B option:
- GNU diff doesn't have "--inter-hunk-context", so this must be handled
- The following sequence looks like a bug (context is displayed twice):

$ seq 5 >file1
$ cat <<EOF >file2
change
1
2

3
4
5
change
EOF
$ diff -u -B file1 file2
--- file1 2013-06-08 22:13:04.471517834 +0200
+++ file2 2013-06-08 22:13:23.275517855 +0200
@@ -1,5 +1,7 @@
+change
1
2
+
3
4
5
@@ -3,3 +5,4 @@
3
4
5
+change

So here is a more thorough description of the option:
- real changes are interesting
- blank lines that are close enough (less than context size) to
interesting changes are considered interesting (recursive definition)
- "context" lines are used around each hunk of interesting changes
- If two hunks are separated by less than "inter-hunk-context", they
will be merged into one.

The implementation does the "interesting changes selection" in a single
pass.

Signed-off-by: Antoine Pelisse <apelisse@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/diff-options.txt
diff.c
t/t4015-diff-whitespace.sh
xdiff/xdiff.h
xdiff/xdiffi.c
xdiff/xdiffi.h
xdiff/xemit.c
xdiff/xemit.h
xdiff/xutils.c
xdiff/xutils.h
index 104579dc75128811e475d408035569ac09f3283b..80f06b771bd42d2696ba7643d8fb15f7e9ebd3d8 100644 (file)
@@ -439,6 +439,9 @@ endif::git-format-patch[]
        differences even if one line has whitespace where the other
        line has none.
 
+--ignore-blank-lines::
+       Ignore changes whose lines are all blank.
+
 --inter-hunk-context=<lines>::
        Show the context between diff hunks, up to the specified number
        of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index f0b3e7cfe34a99c0e5fea85fcd00ec54e9b578d8..208094f6b7d8b6e18459fc1cd57ac8554723d8d4 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -3593,6 +3593,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
        else if (!strcmp(arg, "--ignore-space-at-eol"))
                DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+       else if (!strcmp(arg, "--ignore-blank-lines"))
+               DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
        else if (!strcmp(arg, "--patience"))
                options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
        else if (!strcmp(arg, "--histogram"))
index cc3db1304ef202a376e84360986568bb7408c701..3fb4b976a277e69cdbf73172caf335748be609a3 100755 (executable)
@@ -142,6 +142,351 @@ EOF
 git diff --ignore-space-at-eol > out
 test_expect_success 'another test, with --ignore-space-at-eol' 'test_cmp expect out'
 
+test_expect_success 'ignore-blank-lines: only new lines' '
+       test_seq 5 >x &&
+       git update-index x &&
+       test_seq 5 | sed "/3/i \\
+" >x &&
+       git diff --ignore-blank-lines >out &&
+       >expect &&
+       test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: only new lines with space' '
+       test_seq 5 >x &&
+       git update-index x &&
+       test_seq 5 | sed "/3/i \ " >x &&
+       git diff -w --ignore-blank-lines >out &&
+       >expect &&
+       test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: after change' '
+       cat <<-\EOF >x &&
+       1
+       2
+
+       3
+       4
+       5
+
+       6
+       7
+       EOF
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+
+       1
+       2
+       3
+       4
+       5
+       6
+
+       7
+       EOF
+       git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,6 +1,7 @@
+       +change
+       +
+        1
+        2
+       -
+        3
+        4
+        5
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: before change' '
+       cat <<-\EOF >x &&
+       1
+       2
+
+       3
+       4
+       5
+       6
+       7
+       EOF
+       git update-index x &&
+       cat <<-\EOF >x &&
+
+       1
+       2
+       3
+       4
+       5
+
+       6
+       7
+       change
+       EOF
+       git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -4,5 +4,7 @@
+        3
+        4
+        5
+       +
+        6
+        7
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes' '
+       cat <<-\EOF >x &&
+       1
+       2
+       3
+       4
+       5
+
+
+       6
+       7
+       8
+       9
+       10
+       EOF
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+       1
+       2
+
+       3
+       4
+       5
+       6
+       7
+       8
+
+       9
+       10
+       change
+       EOF
+       git diff --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,5 +1,7 @@
+       +change
+        1
+        2
+       +
+        3
+        4
+        5
+       @@ -8,5 +8,7 @@
+        6
+        7
+        8
+       +
+        9
+        10
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes (with interhunkctx)' '
+       test_seq 10 >x &&
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+       1
+       2
+
+       3
+       4
+       5
+
+       6
+       7
+       8
+       9
+
+       10
+       change
+       EOF
+       git diff --inter-hunk-context=2 --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,10 +1,15 @@
+       +change
+        1
+        2
+       +
+        3
+        4
+        5
+       +
+        6
+        7
+        8
+        9
+       +
+        10
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: scattered spaces' '
+       test_seq 10 >x &&
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+       1
+       2
+       3
+
+       4
+
+       5
+
+       6
+
+
+       7
+
+       8
+       9
+       10
+       change
+       EOF
+       git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,3 +1,4 @@
+       +change
+        1
+        2
+        3
+       @@ -8,3 +15,4 @@
+        8
+        9
+        10
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: spaces coalesce' '
+       test_seq 6 >x &&
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+       1
+       2
+       3
+
+       4
+
+       5
+
+       6
+       change
+       EOF
+       git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,6 +1,11 @@
+       +change
+        1
+        2
+        3
+       +
+        4
+       +
+        5
+       +
+        6
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: mix changes and blank lines' '
+       test_seq 16 >x &&
+       git update-index x &&
+       cat <<-\EOF >x &&
+       change
+       1
+       2
+
+       3
+       4
+       5
+       change
+       6
+       7
+       8
+
+       9
+       10
+       11
+       change
+       12
+       13
+       14
+
+       15
+       16
+       change
+       EOF
+       git diff --ignore-blank-lines >out.tmp &&
+       cat <<-\EOF >expected &&
+       diff --git a/x b/x
+       --- a/x
+       +++ b/x
+       @@ -1,8 +1,11 @@
+       +change
+        1
+        2
+       +
+        3
+        4
+        5
+       +change
+        6
+        7
+        8
+       @@ -9,8 +13,11 @@
+        9
+        10
+        11
+       +change
+        12
+        13
+        14
+       +
+        15
+        16
+       +change
+       EOF
+       compare_diff_patch expected out.tmp
+'
+
 test_expect_success 'check mixed spaces and tabs in indent' '
 
        # This is indented with SP HT SP.
index 219a3bbca613192d8b127659e4bfc0d60f38cf10..c0339919ccf78c4fe7d78123f9c3e891075d0602 100644 (file)
@@ -39,6 +39,8 @@ extern "C" {
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)
 
+#define XDF_IGNORE_BLANK_LINES (1 << 7)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
index b2eb6db2c851aea7eb08b17d802155876bef211c..2358a2d6326e54308413cb8a5e6b61eba06324e9 100644 (file)
@@ -394,6 +394,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
        xch->i2 = i2;
        xch->chg1 = chg1;
        xch->chg2 = chg2;
+       xch->ignore = 0;
 
        return xch;
 }
@@ -544,7 +545,9 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
        xdchange_t *xch, *xche;
 
        for (xch = xscr; xch; xch = xche->next) {
-               xche = xdl_get_hunk(xch, xecfg);
+               xche = xdl_get_hunk(&xch, xecfg);
+               if (!xch)
+                       break;
                if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
                                     xch->i2, xche->i2 + xche->chg2 - xch->i2,
                                     ecb->priv) < 0)
@@ -553,6 +556,27 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
        return 0;
 }
 
+static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+{
+       xdchange_t *xch;
+
+       for (xch = xscr; xch; xch = xch->next) {
+               int ignore = 1;
+               xrecord_t **rec;
+               long i;
+
+               rec = &xe->xdf1.recs[xch->i1];
+               for (i = 0; i < xch->chg1 && ignore; i++)
+                       ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+               rec = &xe->xdf2.recs[xch->i2];
+               for (i = 0; i < xch->chg2 && ignore; i++)
+                       ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+               xch->ignore = ignore;
+       }
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
             xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
        xdchange_t *xscr;
@@ -571,6 +595,9 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
                return -1;
        }
        if (xscr) {
+               if (xpp->flags & XDF_IGNORE_BLANK_LINES)
+                       xdl_mark_ignorable(xscr, &xe, xpp->flags);
+
                if (ef(&xe, xscr, ecb, xecfg) < 0) {
 
                        xdl_free_script(xscr);
index 7a92ea9c4d84a559ae1d0bd90ebe667828d8f9cb..8b81206c9af0767bd91c4b9e453f7c5c2bde47b1 100644 (file)
@@ -41,6 +41,7 @@ typedef struct s_xdchange {
        struct s_xdchange *next;
        long i1, i2;
        long chg1, chg2;
+       int ignore;
 } xdchange_t;
 
 
index d11dbf9f13c13db16fa05f7539170862d6da9795..4d8645867e7d0103d49882877a7e4a8abacd24ea 100644 (file)
@@ -56,16 +56,51 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 /*
  * Starting at the passed change atom, find the latest change atom to be included
  * inside the differential hunk according to the specified configuration.
+ * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
-       xdchange_t *xch, *xchp;
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+{
+       xdchange_t *xch, *xchp, *lxch;
        long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+       long max_ignorable = xecfg->ctxlen;
+       unsigned long ignored = 0; /* number of ignored blank lines */
+
+       /* remove ignorable changes that are too far before other changes */
+       for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
+               xch = xchp->next;
+
+               if (xch == NULL ||
+                   xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
+                       *xscr = xch;
+       }
+
+       if (*xscr == NULL)
+               return NULL;
+
+       lxch = *xscr;
 
-       for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
-               if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
+       for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
+               long distance = xch->i1 - (xchp->i1 + xchp->chg1);
+               if (distance > max_common)
                        break;
 
-       return xchp;
+               if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) {
+                       lxch = xch;
+                       ignored = 0;
+               } else if (distance < max_ignorable && xch->ignore) {
+                       ignored += xch->chg2;
+               } else if (lxch != xchp &&
+                          xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
+                       break;
+               } else if (!xch->ignore) {
+                       lxch = xch;
+                       ignored = 0;
+               } else {
+                       ignored += xch->chg2;
+               }
+       }
+
+       return lxch;
 }
 
 
@@ -139,7 +174,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
                return xdl_emit_common(xe, xscr, ecb, xecfg);
 
        for (xch = xscr; xch; xch = xche->next) {
-               xche = xdl_get_hunk(xch, xecfg);
+               xche = xdl_get_hunk(&xch, xecfg);
+               if (!xch)
+                       break;
 
                s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
                s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
index c2e2e830273782dc597606ddbb0401c04dce8f8f..d29710770ce40bafa6e9eb2b2ea7c9c8ba43c727 100644 (file)
@@ -27,7 +27,7 @@
 typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
                           xdemitconf_t const *xecfg);
 
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
                  xdemitconf_t const *xecfg);
 
index 9504eaecb8ac45248ea22b0779aa7492cd15d83e..62cb23dfd37743e4985655998ccabd56db160233 100644 (file)
@@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
        return nl + 1;
 }
 
+int xdl_blankline(const char *line, long size, long flags)
+{
+       long i;
+
+       if (!(flags & XDF_WHITESPACE_FLAGS))
+               return (size <= 1);
+
+       for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
+               ;
+
+       return (i == size);
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
        int i1, i2;
index ad1428ed699383bf6f736b28033aeb9dbe114b76..4646ce575251b07053f20285be99422d6576603e 100644 (file)
@@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
+int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);