diff: do not chomp hunk-header in the middle of a character
authorJunio C Hamano <junio@pobox.com>
Wed, 2 Jan 2008 09:50:11 +0000 (01:50 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 7 Jan 2008 06:44:44 +0000 (22:44 -0800)
We truncate hunk-header line at 80 bytes, but that 80th byte
could be in the middle of a character, which is bad. This uses
pick_one_utf8_char() function to make sure we do not cut a character
in the middle.

This assumes that the internal representation of the text is
UTF-8. This needs to be extended in the future but the optimal
direction has not been decided yet.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff.c
t/t4025-hunk-header.sh [new file with mode: 0755]
diff --git a/diff.c b/diff.c
index 2c78d74a427aa04aba1403661c899083488e46f2..b18c140e3df7acc6e5bddd499d7ed7a4e672bf97 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -10,6 +10,7 @@
 #include "color.h"
 #include "attr.h"
 #include "run-command.h"
+#include "utf8.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -469,10 +470,13 @@ static void diff_words_show(struct diff_words_data *diff_words)
        }
 }
 
+typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
+
 struct emit_callback {
        struct xdiff_emit_state xm;
        int nparents, color_diff;
        unsigned ws_rule;
+       sane_truncate_fn truncate;
        const char **label_path;
        struct diff_words_data *diff_words;
        int *found_changesp;
@@ -525,6 +529,24 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons
        }
 }
 
+static unsigned long sane_truncate_line(struct emit_callback *ecb, char *line, unsigned long len)
+{
+       const char *cp;
+       unsigned long allot;
+       size_t l = len;
+
+       if (ecb->truncate)
+               return ecb->truncate(line, len);
+       cp = line;
+       allot = l;
+       while (0 < l) {
+               (void) utf8_width(&cp, &l);
+               if (!cp)
+                       break; /* truncated in the middle? */
+       }
+       return allot - l;
+}
+
 static void fn_out_consume(void *priv, char *line, unsigned long len)
 {
        int i;
@@ -555,8 +577,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
                ;
        if (2 <= i && i < len && line[i] == ' ') {
                ecbdata->nparents = i - 1;
+               len = sane_truncate_line(ecbdata, line, len);
                emit_line(diff_get_color(ecbdata->color_diff, DIFF_FRAGINFO),
                          reset, line, len);
+               if (line[len-1] != '\n')
+                       putchar('\n');
                return;
        }
 
diff --git a/t/t4025-hunk-header.sh b/t/t4025-hunk-header.sh
new file mode 100755 (executable)
index 0000000..9ba06b7
--- /dev/null
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+test_description='diff hunk header truncation'
+
+. ./test-lib.sh
+
+N='日本語'
+N1='日'
+N2='日本'
+NS="$N$N$N$N$N$N$N$N$N$N$N$N$N"
+
+test_expect_success setup '
+
+       (
+               echo "A $NS"
+               for c in B C D E F G H I J K
+               do
+                       echo "  $c"
+               done
+               echo "L  $NS"
+               for c in M N O P Q R S T U V
+               do
+                       echo "  $c"
+               done
+       ) >file &&
+       git add file &&
+
+       sed -e "/^  [EP]/s/$/ modified/" <file >file+ &&
+       mv file+ file
+
+'
+
+test_expect_success 'hunk header truncation with an overly long line' '
+
+       git diff | sed -n -e "s/^.*@@//p" >actual &&
+       (
+               echo " A $N$N$N$N$N$N$N$N$N2"
+               echo " L  $N$N$N$N$N$N$N$N$N1"
+       ) >expected &&
+       diff -u actual expected
+
+'
+
+test_done