return ret;
}
+/* High bit set, or ISO-2022-INT */
+static int non_ascii(int ch)
+{
+ ch = (ch & 0xff);
+ return ((ch & 0x80) || (ch == 0x1b));
+}
+
static int is_rfc2047_special(char ch)
{
- return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_'));
+ return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
}
-static int add_rfc2047(char *buf, const char *line, int len)
+static int add_rfc2047(char *buf, const char *line, int len,
+ const char *encoding)
{
char *bp = buf;
int i, needquote;
- static const char q_utf8[] = "=?utf-8?q?";
+ char q_encoding[128];
+ const char *q_encoding_fmt = "=?%s?q?";
for (i = needquote = 0; !needquote && i < len; i++) {
- unsigned ch = line[i];
- if (ch & 0x80)
+ int ch = line[i];
+ if (non_ascii(ch))
needquote++;
if ((i + 1 < len) &&
(ch == '=' && line[i+1] == '?'))
if (!needquote)
return sprintf(buf, "%.*s", len, line);
- memcpy(bp, q_utf8, sizeof(q_utf8)-1);
- bp += sizeof(q_utf8)-1;
+ i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding);
+ if (sizeof(q_encoding) < i)
+ die("Insanely long encoding name %s", encoding);
+ memcpy(bp, q_encoding, i);
+ bp += i;
for (i = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF;
if (is_rfc2047_special(ch)) {
}
static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
- const char *line, int relative_date)
+ const char *line, int relative_date,
+ const char *encoding)
{
char *date;
int namelen;
filler = "";
strcpy(buf, "From: ");
ret = strlen(buf);
- ret += add_rfc2047(buf + ret, line, display_name_length);
+ ret += add_rfc2047(buf + ret, line, display_name_length,
+ encoding);
memcpy(buf + ret, name_tail, namelen - display_name_length);
ret += namelen - display_name_length;
buf[ret++] = '\n';
return buf;
}
-static char *logmsg_reencode(const struct commit *commit)
+static char *logmsg_reencode(const struct commit *commit,
+ char *output_encoding)
{
char *encoding;
char *out;
- char *output_encoding = (git_log_output_encoding
- ? git_log_output_encoding
- : git_commit_encoding);
+ char *utf8 = "utf-8";
- if (!output_encoding)
- output_encoding = "utf-8";
- else if (!*output_encoding)
+ if (!*output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
if (!encoding)
- return NULL;
+ encoding = utf8;
if (!strcmp(encoding, output_encoding))
out = strdup(commit->buffer);
else
if (out)
out = replace_encoding_header(out, output_encoding);
- free(encoding);
+ if (encoding != utf8)
+ free(encoding);
if (!out)
return NULL;
return out;
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
- char *reencoded = logmsg_reencode(commit);
+ char *reencoded;
+ char *encoding;
+ encoding = (git_log_output_encoding
+ ? git_log_output_encoding
+ : git_commit_encoding);
+ if (!encoding)
+ encoding = "utf-8";
+ reencoded = logmsg_reencode(commit, encoding);
if (reencoded)
msg = reencoded;
i + 1 < len && msg[i+1] == '\n')
in_body = 1;
}
- else if (ch & 0x80) {
+ else if (non_ascii(ch)) {
plain_non_ascii = 1;
break;
}
offset += add_user_info("Author", fmt,
buf + offset,
line + 7,
- relative_date);
+ relative_date,
+ encoding);
if (!memcmp(line, "committer ", 10) &&
(fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER))
offset += add_user_info("Commit", fmt,
buf + offset,
line + 10,
- relative_date);
+ relative_date,
+ encoding);
continue;
}
int slen = strlen(subject);
memcpy(buf + offset, subject, slen);
offset += slen;
- offset += add_rfc2047(buf + offset, line, linelen);
+ offset += add_rfc2047(buf + offset, line, linelen,
+ encoding);
}
else {
memset(buf + offset, ' ', indent);
if (fmt == CMIT_FMT_ONELINE)
break;
if (subject && plain_non_ascii) {
- static const char header[] =
- "Content-Type: text/plain; charset=UTF-8\n"
+ int sz;
+ char header[512];
+ const char *header_fmt =
+ "Content-Type: text/plain; charset=%s\n"
"Content-Transfer-Encoding: 8bit\n";
- memcpy(buf + offset, header, sizeof(header)-1);
- offset += sizeof(header)-1;
+ sz = snprintf(header, sizeof(header), header_fmt,
+ encoding);
+ if (sizeof(header) < sz)
+ die("Encoding name %s too long", encoding);
+ memcpy(buf + offset, header, sz);
+ offset += sz;
}
if (after_subject) {
int slen = strlen(after_subject);
--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2006 Junio C Hamano
+#
+
+test_description='i18n settings and format-patch | am pipe'
+
+. ./test-lib.sh
+
+check_encoding () {
+ # Make sure characters are not corrupted
+ cnt="$1" header="$2" i=1 j=0 bad=0
+ while test "$i" -le $cnt
+ do
+ git format-patch --encoding=UTF-8 --stdout HEAD~$i..HEAD~$j |
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" &&
+ git-cat-file commit HEAD~$j |
+ case "$header" in
+ 8859)
+ grep "^encoding ISO-8859-1" ;;
+ *)
+ ! grep "^encoding ISO-8859-1" ;;
+ esac || {
+ bad=1
+ break
+ }
+ j=$i
+ i=$(($i+1))
+ done
+ (exit $bad)
+}
+
+test_expect_success setup '
+ git-repo-config i18n.commitencoding UTF-8 &&
+
+ # use UTF-8 in author and committer name to match the
+ # i18n.commitencoding settings
+ . ../t3901-utf8.txt &&
+
+ test_tick &&
+ echo "$GIT_AUTHOR_NAME" >mine &&
+ git add mine &&
+ git commit -s -m "Initial commit" &&
+
+ test_tick &&
+ echo Hello world >mine &&
+ git add mine &&
+ git commit -s -m "Second on main" &&
+
+ # the first commit on the side branch is UTF-8
+ test_tick &&
+ git checkout -b side master^ &&
+ echo Another file >yours &&
+ git add yours &&
+ git commit -s -m "Second on side" &&
+
+ # the second one on the side branch is ISO-8859-1
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ # use author and committer name in ISO-8859-1 to match it.
+ . ../t3901-8859-1.txt &&
+ test_tick &&
+ echo Yet another >theirs &&
+ git add theirs &&
+ git commit -s -m "Third on side" &&
+
+ # Back to default
+ git-repo-config i18n.commitencoding UTF-8
+'
+
+test_expect_success 'format-patch output (ISO-8859-1)' '
+ git-repo-config i18n.logoutputencoding ISO-8859-1 &&
+
+ git format-patch --stdout master..HEAD^ >out-l1 &&
+ git format-patch --stdout HEAD^ >out-l2 &&
+ grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l1 &&
+ grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l1 &&
+ grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l2 &&
+ grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l2
+'
+
+test_expect_success 'format-patch output (UTF-8)' '
+ git repo-config i18n.logoutputencoding UTF-8 &&
+
+ git format-patch --stdout master..HEAD^ >out-u1 &&
+ git format-patch --stdout HEAD^ >out-u2 &&
+ grep "^Content-Type: text/plain; charset=UTF-8" out-u1 &&
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u1 &&
+ grep "^Content-Type: text/plain; charset=UTF-8" out-u2 &&
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u2
+'
+
+test_expect_success 'rebase (U/U)' '
+ # We want the result of rebase in UTF-8
+ git-repo-config i18n.commitencoding UTF-8 &&
+
+ # The test is about logoutputencoding not affecting the
+ # final outcome -- it is used internally to generate the
+ # patch and the log.
+
+ git repo-config i18n.logoutputencoding UTF-8 &&
+
+ # The result will be committed by GIT_COMMITTER_NAME --
+ # we want UTF-8 encoded name.
+ . ../t3901-utf8.txt &&
+ git checkout -b test &&
+ git-rebase master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase (U/L)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase (L/L)' '
+ # In this test we want ISO-8859-1 encoded commits as the result
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'rebase (L/U)' '
+ # This is pathological -- use UTF-8 as intermediate form
+ # to get ISO-8859-1 results.
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'cherry-pick(U/U)' '
+ # Both the commitencoding and logoutputencoding is set to UTF-8.
+
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/L)' '
+ # Both the commitencoding and logoutputencoding is set to ISO-8859-1
+
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3 8859
+'
+
+test_expect_success 'cherry-pick(U/L)' '
+ # Commitencoding is set to UTF-8 but logoutputencoding is ISO-8859-1
+
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/U)' '
+ # Again, the commitencoding is set to ISO-8859-1 but
+ # logoutputencoding is set to UTF-8.
+
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3 8859
+'
+
+test_expect_success 'rebase --merge (U/U)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase --merge (U/L)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase --merge (L/L)' '
+ # In this test we want ISO-8859-1 encoded commits as the result
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'rebase --merge (L/U)' '
+ # This is pathological -- use UTF-8 as intermediate form
+ # to get ISO-8859-1 results.
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2 8859
+'
+
+test_done