Merge branch 'rd/send-email-2047-fix'
authorJunio C Hamano <gitster@pobox.com>
Wed, 7 Jan 2015 21:06:47 +0000 (13:06 -0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 7 Jan 2015 21:06:47 +0000 (13:06 -0800)
"git send-email" did not handle RFC 2047 encoded headers quite
right.

* rd/send-email-2047-fix:
send-email: handle adjacent RFC 2047-encoded words properly
send-email: align RFC 2047 decoding more closely with the spec

git-send-email.perl
t/t9001-send-email.sh
index 82c6feaa4640e993051bfbe57df593c0f012a9ba..58c8bc2a59fa0eac8d8f97ca219417183d3725d5 100755 (executable)
@@ -146,6 +146,11 @@ sub format_2822_time {
 my $smtp;
 my $auth;
 
+# Regexes for RFC 2047 productions.
+my $re_token = qr/[^][()<>@,;:\\"\/?.= \000-\037\177-\377]+/;
+my $re_encoded_text = qr/[^? \000-\037\177-\377]+/;
+my $re_encoded_word = qr/=\?($re_token)\?($re_token)\?($re_encoded_text)\?=/;
+
 # Variables we fill in automatically, or via prompting:
 my (@to,$no_to,@initial_to,@cc,$no_cc,@initial_cc,@bcclist,$no_bcc,@xh,
        $initial_reply_to,$initial_subject,@files,
@@ -917,15 +922,26 @@ sub make_message_id {
 
 sub unquote_rfc2047 {
        local ($_) = @_;
-       my $encoding;
-       s{=\?([^?]+)\?q\?(.*?)\?=}{
-               $encoding = $1;
-               my $e = $2;
-               $e =~ s/_/ /g;
-               $e =~ s/=([0-9A-F]{2})/chr(hex($1))/eg;
-               $e;
+       my $charset;
+       my $sep = qr/[ \t]+/;
+       s{$re_encoded_word(?:$sep$re_encoded_word)*}{
+               my @words = split $sep, $&;
+               foreach (@words) {
+                       m/$re_encoded_word/;
+                       $charset = $1;
+                       my $encoding = $2;
+                       my $text = $3;
+                       if ($encoding eq 'q' || $encoding eq 'Q') {
+                               $_ = $text;
+                               s/_/ /g;
+                               s/=([0-9A-F]{2})/chr(hex($1))/egi;
+                       } else {
+                               # other encodings not supported yet
+                       }
+               }
+               join '', @words;
        }eg;
-       return wantarray ? ($_, $encoding) : $_;
+       return wantarray ? ($_, $charset) : $_;
 }
 
 sub quote_rfc2047 {
@@ -938,10 +954,8 @@ sub quote_rfc2047 {
 
 sub is_rfc2047_quoted {
        my $s = shift;
-       my $token = qr/[^][()<>@,;:"\/?.= \000-\037\177-\377]+/;
-       my $encoded_text = qr/[!->@-~]+/;
        length($s) <= 75 &&
-       $s =~ m/^(?:"[[:ascii:]]*"|=\?$token\?$token\?$encoded_text\?=)$/o;
+       $s =~ m/^(?:"[[:ascii:]]*"|$re_encoded_word)$/o;
 }
 
 sub subject_needs_rfc2047_quoting {
index e37efef5ca6fce69b0ec5e0103a2ca9f5f5cb234..a8773bdf379ad858b252be8045b42cec99af546b 100755 (executable)
@@ -242,6 +242,13 @@ test_expect_success $PREREQ 'non-ascii self name is suppressed' "
                'non_ascii_self_suppressed'
 "
 
+# This name is long enough to force format-patch to split it into multiple
+# encoded-words, assuming it uses UTF-8 with the "Q" encoding.
+test_expect_success $PREREQ 'long non-ascii self name is suppressed' "
+       test_suppress_self_quoted 'Ƒüñníęř €. Nâṁé' 'odd_?=mail@example.com' \
+               'long_non_ascii_self_suppressed'
+"
+
 test_expect_success $PREREQ 'sanitized self name is suppressed' "
        test_suppress_self_unquoted '\"A U. Thor\"' 'author@example.com' \
                'self_name_sanitized_suppressed'