Merge branch 'jk/read-commit-buffer-data-after-free'
authorJunio C Hamano <gitster@pobox.com>
Sun, 17 Feb 2013 23:23:20 +0000 (15:23 -0800)
committerJunio C Hamano <gitster@pobox.com>
Sun, 17 Feb 2013 23:23:20 +0000 (15:23 -0800)
"git log --grep=<pattern>" used to look for the pattern in literal
bytes of the commit log message and ignored the log-output encoding.

* jk/read-commit-buffer-data-after-free:
log: re-encode commit messages before grepping

revision.c
t/t4210-log-i18n.sh [new file with mode: 0755]
index d7562ee5004379774c6fe1a74b0c503475a34ab5..ef6020541282770b9edc4c921cadb7ab1506da56 100644 (file)
@@ -2268,7 +2268,10 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
 static int commit_match(struct commit *commit, struct rev_info *opt)
 {
        int retval;
+       const char *encoding;
+       char *message;
        struct strbuf buf = STRBUF_INIT;
+
        if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list)
                return 1;
 
@@ -2279,13 +2282,23 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
                strbuf_addch(&buf, '\n');
        }
 
+       /*
+        * We grep in the user's output encoding, under the assumption that it
+        * is the encoding they are most likely to write their grep pattern
+        * for. In addition, it means we will match the "notes" encoding below,
+        * so we will not end up with a buffer that has two different encodings
+        * in it.
+        */
+       encoding = get_log_output_encoding();
+       message = logmsg_reencode(commit, encoding);
+
        /* Copy the commit to temporary if we are using "fake" headers */
        if (buf.len)
-               strbuf_addstr(&buf, commit->buffer);
+               strbuf_addstr(&buf, message);
 
        if (opt->grep_filter.header_list && opt->mailmap) {
                if (!buf.len)
-                       strbuf_addstr(&buf, commit->buffer);
+                       strbuf_addstr(&buf, message);
 
                commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
                commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
@@ -2294,18 +2307,18 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
        /* Append "fake" message parts as needed */
        if (opt->show_notes) {
                if (!buf.len)
-                       strbuf_addstr(&buf, commit->buffer);
-               format_display_notes(commit->object.sha1, &buf,
-                                    get_log_output_encoding(), 1);
+                       strbuf_addstr(&buf, message);
+               format_display_notes(commit->object.sha1, &buf, encoding, 1);
        }
 
-       /* Find either in the commit object, or in the temporary */
+       /* Find either in the original commit message, or in the temporary */
        if (buf.len)
                retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len);
        else
                retval = grep_buffer(&opt->grep_filter,
-                                    commit->buffer, strlen(commit->buffer));
+                                    message, strlen(message));
        strbuf_release(&buf);
+       logmsg_free(message, commit);
        return retval;
 }
 
diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh
new file mode 100755 (executable)
index 0000000..52a7472
--- /dev/null
@@ -0,0 +1,58 @@
+#!/bin/sh
+
+test_description='test log with i18n features'
+. ./test-lib.sh
+
+# two forms of é
+utf8_e=$(printf '\303\251')
+latin1_e=$(printf '\351')
+
+test_expect_success 'create commits in different encodings' '
+       test_tick &&
+       cat >msg <<-EOF &&
+       utf8
+
+       t${utf8_e}st
+       EOF
+       git add msg &&
+       git -c i18n.commitencoding=utf8 commit -F msg &&
+       cat >msg <<-EOF &&
+       latin1
+
+       t${latin1_e}st
+       EOF
+       git add msg &&
+       git -c i18n.commitencoding=ISO-8859-1 commit -F msg
+'
+
+test_expect_success 'log --grep searches in log output encoding (utf8)' '
+       cat >expect <<-\EOF &&
+       latin1
+       utf8
+       EOF
+       git log --encoding=utf8 --format=%s --grep=$utf8_e >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'log --grep searches in log output encoding (latin1)' '
+       cat >expect <<-\EOF &&
+       latin1
+       utf8
+       EOF
+       git log --encoding=ISO-8859-1 --format=%s --grep=$latin1_e >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'log --grep does not find non-reencoded values (utf8)' '
+       >expect &&
+       git log --encoding=utf8 --format=%s --grep=$latin1_e >actual &&
+       test_cmp expect actual
+'
+
+test_expect_success 'log --grep does not find non-reencoded values (latin1)' '
+       >expect &&
+       git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual &&
+       test_cmp expect actual
+'
+
+test_done