Merge branch 'jc/int'
authorJunio C Hamano <junkio@cox.net>
Sun, 14 Jan 2007 20:04:25 +0000 (12:04 -0800)
committerJunio C Hamano <junkio@cox.net>
Sun, 14 Jan 2007 20:04:25 +0000 (12:04 -0800)
* jc/int:
More tests in t3901.
Consistent message encoding while reusing log from an existing commit.
t3901: test "format-patch | am" pipe with i18n
Use log output encoding in --pretty=email headers.

commit.c
git-commit.sh
git-revert.sh
t/t3901-8859-1.txt [new file with mode: 0755]
t/t3901-i18n-patch.sh [new file with mode: 0755]
t/t3901-utf8.txt [new file with mode: 0755]
index 496d37aa020871aed111002c2be0380366a70baa..9b2b842e7dcc153a12b35394a2e3f88f146b6225 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -464,20 +464,29 @@ static int get_one_line(const char *msg, unsigned long len)
        return ret;
 }
 
+/* High bit set, or ISO-2022-INT */
+static int non_ascii(int ch)
+{
+       ch = (ch & 0xff);
+       return ((ch & 0x80) || (ch == 0x1b));
+}
+
 static int is_rfc2047_special(char ch)
 {
-       return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_'));
+       return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
 }
 
-static int add_rfc2047(char *buf, const char *line, int len)
+static int add_rfc2047(char *buf, const char *line, int len,
+                      const char *encoding)
 {
        char *bp = buf;
        int i, needquote;
-       static const char q_utf8[] = "=?utf-8?q?";
+       char q_encoding[128];
+       const char *q_encoding_fmt = "=?%s?q?";
 
        for (i = needquote = 0; !needquote && i < len; i++) {
-               unsigned ch = line[i];
-               if (ch & 0x80)
+               int ch = line[i];
+               if (non_ascii(ch))
                        needquote++;
                if ((i + 1 < len) &&
                    (ch == '=' && line[i+1] == '?'))
@@ -486,8 +495,11 @@ static int add_rfc2047(char *buf, const char *line, int len)
        if (!needquote)
                return sprintf(buf, "%.*s", len, line);
 
-       memcpy(bp, q_utf8, sizeof(q_utf8)-1);
-       bp += sizeof(q_utf8)-1;
+       i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding);
+       if (sizeof(q_encoding) < i)
+               die("Insanely long encoding name %s", encoding);
+       memcpy(bp, q_encoding, i);
+       bp += i;
        for (i = 0; i < len; i++) {
                unsigned ch = line[i] & 0xFF;
                if (is_rfc2047_special(ch)) {
@@ -505,7 +517,8 @@ static int add_rfc2047(char *buf, const char *line, int len)
 }
 
 static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
-                        const char *line, int relative_date)
+                        const char *line, int relative_date,
+                        const char *encoding)
 {
        char *date;
        int namelen;
@@ -533,7 +546,8 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
                filler = "";
                strcpy(buf, "From: ");
                ret = strlen(buf);
-               ret += add_rfc2047(buf + ret, line, display_name_length);
+               ret += add_rfc2047(buf + ret, line, display_name_length,
+                                  encoding);
                memcpy(buf + ret, name_tail, namelen - display_name_length);
                ret += namelen - display_name_length;
                buf[ret++] = '\n';
@@ -668,21 +682,18 @@ static char *replace_encoding_header(char *buf, char *encoding)
        return buf;
 }
 
-static char *logmsg_reencode(const struct commit *commit)
+static char *logmsg_reencode(const struct commit *commit,
+                            char *output_encoding)
 {
        char *encoding;
        char *out;
-       char *output_encoding = (git_log_output_encoding
-                                ? git_log_output_encoding
-                                : git_commit_encoding);
+       char *utf8 = "utf-8";
 
-       if (!output_encoding)
-               output_encoding = "utf-8";
-       else if (!*output_encoding)
+       if (!*output_encoding)
                return NULL;
        encoding = get_header(commit, "encoding");
        if (!encoding)
-               return NULL;
+               encoding = utf8;
        if (!strcmp(encoding, output_encoding))
                out = strdup(commit->buffer);
        else
@@ -691,7 +702,8 @@ static char *logmsg_reencode(const struct commit *commit)
        if (out)
                out = replace_encoding_header(out, output_encoding);
 
-       free(encoding);
+       if (encoding != utf8)
+               free(encoding);
        if (!out)
                return NULL;
        return out;
@@ -711,8 +723,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
        int parents_shown = 0;
        const char *msg = commit->buffer;
        int plain_non_ascii = 0;
-       char *reencoded = logmsg_reencode(commit);
+       char *reencoded;
+       char *encoding;
 
+       encoding = (git_log_output_encoding
+                   ? git_log_output_encoding
+                   : git_commit_encoding);
+       if (!encoding)
+               encoding = "utf-8";
+       reencoded = logmsg_reencode(commit, encoding);
        if (reencoded)
                msg = reencoded;
 
@@ -738,7 +757,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
                                    i + 1 < len && msg[i+1] == '\n')
                                        in_body = 1;
                        }
-                       else if (ch & 0x80) {
+                       else if (non_ascii(ch)) {
                                plain_non_ascii = 1;
                                break;
                        }
@@ -797,13 +816,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
                                offset += add_user_info("Author", fmt,
                                                        buf + offset,
                                                        line + 7,
-                                                       relative_date);
+                                                       relative_date,
+                                                       encoding);
                        if (!memcmp(line, "committer ", 10) &&
                            (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER))
                                offset += add_user_info("Commit", fmt,
                                                        buf + offset,
                                                        line + 10,
-                                                       relative_date);
+                                                       relative_date,
+                                                       encoding);
                        continue;
                }
 
@@ -826,7 +847,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
                        int slen = strlen(subject);
                        memcpy(buf + offset, subject, slen);
                        offset += slen;
-                       offset += add_rfc2047(buf + offset, line, linelen);
+                       offset += add_rfc2047(buf + offset, line, linelen,
+                                             encoding);
                }
                else {
                        memset(buf + offset, ' ', indent);
@@ -837,11 +859,17 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
                if (fmt == CMIT_FMT_ONELINE)
                        break;
                if (subject && plain_non_ascii) {
-                       static const char header[] =
-                               "Content-Type: text/plain; charset=UTF-8\n"
+                       int sz;
+                       char header[512];
+                       const char *header_fmt =
+                               "Content-Type: text/plain; charset=%s\n"
                                "Content-Transfer-Encoding: 8bit\n";
-                       memcpy(buf + offset, header, sizeof(header)-1);
-                       offset += sizeof(header)-1;
+                       sz = snprintf(header, sizeof(header), header_fmt,
+                                     encoding);
+                       if (sizeof(header) < sz)
+                               die("Encoding name %s too long", encoding);
+                       memcpy(buf + offset, header, sz);
+                       offset += sz;
                }
                if (after_subject) {
                        int slen = strlen(after_subject);
index 9fdf234b522322cf4e5d539f52debe76bebf6aa8..e23918cd6c515e193d3bc5d476c360001fdc1222 100755 (executable)
@@ -429,7 +429,9 @@ then
        fi
 elif test "$use_commit" != ""
 then
-       git-cat-file commit "$use_commit" | sed -e '1,/^$/d'
+       encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+       git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
+       sed -e '1,/^$/d' -e 's/^    //'
 elif test -f "$GIT_DIR/MERGE_MSG"
 then
        cat "$GIT_DIR/MERGE_MSG"
@@ -491,7 +493,8 @@ then
                q
        }
        '
-       set_author_env=`git-cat-file commit "$use_commit" |
+       encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+       set_author_env=`git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
        LANG=C LC_ALL=C sed -ne "$pick_author_script"`
        eval "$set_author_env"
        export GIT_AUTHOR_NAME
index 224e6540ca073da804332799660a8cdeacdfdaf7..71cbcbc2b886b2770c61c4b3a496db913ef56d9d 100755 (executable)
@@ -81,6 +81,8 @@ prev=$(git-rev-parse --verify "$commit^1" 2>/dev/null) ||
 git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
        die "Cannot run $me a multi-parent commit."
 
+encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+
 # "commit" is an existing commit.  We would want to apply
 # the difference it introduces since its first parent "prev"
 # on top of the current HEAD if we are cherry-pick.  Or the
@@ -88,10 +90,11 @@ git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
 
 case "$me" in
 revert)
-       git-rev-list --pretty=oneline --max-count=1 $commit |
+       git show -s --pretty=oneline --encoding="$encoding" $commit |
        sed -e '
                s/^[^ ]* /Revert "/
-               s/$/"/'
+               s/$/"/
+       '
        echo
        echo "This reverts commit $commit."
        test "$rev" = "$commit" ||
@@ -120,14 +123,17 @@ cherry-pick)
 
                q
        }'
-       set_author_env=`git-cat-file commit "$commit" |
+
+       logmsg=`git show -s --pretty=raw --encoding="$encoding" "$commit"`
+       set_author_env=`echo "$logmsg" |
        LANG=C LC_ALL=C sed -ne "$pick_author_script"`
        eval "$set_author_env"
        export GIT_AUTHOR_NAME
        export GIT_AUTHOR_EMAIL
        export GIT_AUTHOR_DATE
 
-       git-cat-file commit $commit | sed -e '1,/^$/d'
+       echo "$logmsg" |
+       sed -e '1,/^$/d' -e 's/^    //'
        case "$replay" in
        '')
                echo "(cherry picked from commit $commit)"
diff --git a/t/t3901-8859-1.txt b/t/t3901-8859-1.txt
new file mode 100755 (executable)
index 0000000..38c21a6
--- /dev/null
@@ -0,0 +1,4 @@
+: to be sourced in t3901 -- this is latin-1
+GIT_AUTHOR_NAME="Áéí óú" &&
+GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
+export GIT_AUTHOR_NAME GIT_COMMITTER_NAME
diff --git a/t/t3901-i18n-patch.sh b/t/t3901-i18n-patch.sh
new file mode 100755 (executable)
index 0000000..eda0e2d
--- /dev/null
@@ -0,0 +1,255 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Junio C Hamano
+#
+
+test_description='i18n settings and format-patch | am pipe'
+
+. ./test-lib.sh
+
+check_encoding () {
+       # Make sure characters are not corrupted
+       cnt="$1" header="$2" i=1 j=0 bad=0
+       while test "$i" -le $cnt
+       do
+               git format-patch --encoding=UTF-8 --stdout HEAD~$i..HEAD~$j |
+               grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" &&
+               git-cat-file commit HEAD~$j |
+               case "$header" in
+               8859)
+                       grep "^encoding ISO-8859-1" ;;
+               *)
+                       ! grep "^encoding ISO-8859-1" ;;
+               esac || {
+                       bad=1
+                       break
+               }
+               j=$i
+               i=$(($i+1))
+       done
+       (exit $bad)
+}
+
+test_expect_success setup '
+       git-repo-config i18n.commitencoding UTF-8 &&
+
+       # use UTF-8 in author and committer name to match the
+       # i18n.commitencoding settings
+       . ../t3901-utf8.txt &&
+
+       test_tick &&
+       echo "$GIT_AUTHOR_NAME" >mine &&
+       git add mine &&
+       git commit -s -m "Initial commit" &&
+
+       test_tick &&
+       echo Hello world >mine &&
+       git add mine &&
+       git commit -s -m "Second on main" &&
+
+       # the first commit on the side branch is UTF-8
+       test_tick &&
+       git checkout -b side master^ &&
+       echo Another file >yours &&
+       git add yours &&
+       git commit -s -m "Second on side" &&
+
+       # the second one on the side branch is ISO-8859-1
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       # use author and committer name in ISO-8859-1 to match it.
+       . ../t3901-8859-1.txt &&
+       test_tick &&
+       echo Yet another >theirs &&
+       git add theirs &&
+       git commit -s -m "Third on side" &&
+
+       # Back to default
+       git-repo-config i18n.commitencoding UTF-8
+'
+
+test_expect_success 'format-patch output (ISO-8859-1)' '
+       git-repo-config i18n.logoutputencoding ISO-8859-1 &&
+
+       git format-patch --stdout master..HEAD^ >out-l1 &&
+       git format-patch --stdout HEAD^ >out-l2 &&
+       grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l1 &&
+       grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l1 &&
+       grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l2 &&
+       grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l2
+'
+
+test_expect_success 'format-patch output (UTF-8)' '
+       git repo-config i18n.logoutputencoding UTF-8 &&
+
+       git format-patch --stdout master..HEAD^ >out-u1 &&
+       git format-patch --stdout HEAD^ >out-u2 &&
+       grep "^Content-Type: text/plain; charset=UTF-8" out-u1 &&
+       grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u1 &&
+       grep "^Content-Type: text/plain; charset=UTF-8" out-u2 &&
+       grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u2
+'
+
+test_expect_success 'rebase (U/U)' '
+       # We want the result of rebase in UTF-8
+       git-repo-config i18n.commitencoding UTF-8 &&
+
+       # The test is about logoutputencoding not affecting the
+       # final outcome -- it is used internally to generate the
+       # patch and the log.
+
+       git repo-config i18n.logoutputencoding UTF-8 &&
+
+       # The result will be committed by GIT_COMMITTER_NAME --
+       # we want UTF-8 encoded name.
+       . ../t3901-utf8.txt &&
+       git checkout -b test &&
+       git-rebase master &&
+
+       check_encoding 2
+'
+
+test_expect_success 'rebase (U/L)' '
+       git-repo-config i18n.commitencoding UTF-8 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-utf8.txt &&
+
+       git reset --hard side &&
+       git-rebase master &&
+
+       check_encoding 2
+'
+
+test_expect_success 'rebase (L/L)' '
+       # In this test we want ISO-8859-1 encoded commits as the result
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard side &&
+       git-rebase master &&
+
+       check_encoding 2 8859
+'
+
+test_expect_success 'rebase (L/U)' '
+       # This is pathological -- use UTF-8 as intermediate form
+       # to get ISO-8859-1 results.
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding UTF-8 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard side &&
+       git-rebase master &&
+
+       check_encoding 2 8859
+'
+
+test_expect_success 'cherry-pick(U/U)' '
+       # Both the commitencoding and logoutputencoding is set to UTF-8.
+
+       git-repo-config i18n.commitencoding UTF-8 &&
+       git repo-config i18n.logoutputencoding UTF-8 &&
+       . ../t3901-utf8.txt &&
+
+       git reset --hard master &&
+       git cherry-pick side^ &&
+       git cherry-pick side &&
+       EDITOR=: VISUAL=: git revert HEAD &&
+
+       check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/L)' '
+       # Both the commitencoding and logoutputencoding is set to ISO-8859-1
+
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard master &&
+       git cherry-pick side^ &&
+       git cherry-pick side &&
+       EDITOR=: VISUAL=: git revert HEAD &&
+
+       check_encoding 3 8859
+'
+
+test_expect_success 'cherry-pick(U/L)' '
+       # Commitencoding is set to UTF-8 but logoutputencoding is ISO-8859-1
+
+       git-repo-config i18n.commitencoding UTF-8 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-utf8.txt &&
+
+       git reset --hard master &&
+       git cherry-pick side^ &&
+       git cherry-pick side &&
+       EDITOR=: VISUAL=: git revert HEAD &&
+
+       check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/U)' '
+       # Again, the commitencoding is set to ISO-8859-1 but
+       # logoutputencoding is set to UTF-8.
+
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding UTF-8 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard master &&
+       git cherry-pick side^ &&
+       git cherry-pick side &&
+       EDITOR=: VISUAL=: git revert HEAD &&
+
+       check_encoding 3 8859
+'
+
+test_expect_success 'rebase --merge (U/U)' '
+       git-repo-config i18n.commitencoding UTF-8 &&
+       git repo-config i18n.logoutputencoding UTF-8 &&
+       . ../t3901-utf8.txt &&
+
+       git reset --hard side &&
+       git-rebase --merge master &&
+
+       check_encoding 2
+'
+
+test_expect_success 'rebase --merge (U/L)' '
+       git-repo-config i18n.commitencoding UTF-8 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-utf8.txt &&
+
+       git reset --hard side &&
+       git-rebase --merge master &&
+
+       check_encoding 2
+'
+
+test_expect_success 'rebase --merge (L/L)' '
+       # In this test we want ISO-8859-1 encoded commits as the result
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding ISO-8859-1 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard side &&
+       git-rebase --merge master &&
+
+       check_encoding 2 8859
+'
+
+test_expect_success 'rebase --merge (L/U)' '
+       # This is pathological -- use UTF-8 as intermediate form
+       # to get ISO-8859-1 results.
+       git-repo-config i18n.commitencoding ISO-8859-1 &&
+       git repo-config i18n.logoutputencoding UTF-8 &&
+       . ../t3901-8859-1.txt &&
+
+       git reset --hard side &&
+       git-rebase --merge master &&
+
+       check_encoding 2 8859
+'
+
+test_done
diff --git a/t/t3901-utf8.txt b/t/t3901-utf8.txt
new file mode 100755 (executable)
index 0000000..5f5205c
--- /dev/null
@@ -0,0 +1,4 @@
+: to be sourced in t3901 -- this is utf8
+GIT_AUTHOR_NAME="Áéí óú" &&
+GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
+export GIT_AUTHOR_NAME GIT_COMMITTER_NAME