send-email: align RFC 2047 decoding more closely with the spec
authorРоман Донченко <dpb@corrigendum.ru>
Sun, 14 Dec 2014 15:59:46 +0000 (18:59 +0300)
committerJunio C Hamano <gitster@pobox.com>
Mon, 15 Dec 2014 17:06:39 +0000 (09:06 -0800)
More specifically:

* Add "\" to the list of characters not allowed in a token (see RFC 2047
errata).

* Share regexes between unquote_rfc2047 and is_rfc2047_quoted. Besides
removing duplication, this also makes unquote_rfc2047 more stringent.

* Allow both "q" and "Q" to identify the encoding.

* Allow lowercase hexadecimal digits in the "Q" encoding.

And, more on the cosmetic side:

* Change the "encoded-text" regex to exclude rather than include characters,
for clarity and consistency with "token".

Signed-off-by: Роман Донченко <dpb@corrigendum.ru>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
git-send-email.perl
index fdb0029b597898559376b1b28d692450794251f3..106c2b065dfd6f29c5983d6944ec53cbb95490b0 100755 (executable)
@@ -143,6 +143,11 @@ sub format_2822_time {
 my $smtp;
 my $auth;
 
+# Regexes for RFC 2047 productions.
+my $re_token = qr/[^][()<>@,;:\\"\/?.= \000-\037\177-\377]+/;
+my $re_encoded_text = qr/[^? \000-\037\177-\377]+/;
+my $re_encoded_word = qr/=\?($re_token)\?($re_token)\?($re_encoded_text)\?=/;
+
 # Variables we fill in automatically, or via prompting:
 my (@to,$no_to,@initial_to,@cc,$no_cc,@initial_cc,@bcclist,$no_bcc,@xh,
        $initial_reply_to,$initial_subject,@files,
@@ -906,15 +911,20 @@ sub make_message_id {
 
 sub unquote_rfc2047 {
        local ($_) = @_;
-       my $encoding;
-       s{=\?([^?]+)\?q\?(.*?)\?=}{
-               $encoding = $1;
-               my $e = $2;
-               $e =~ s/_/ /g;
-               $e =~ s/=([0-9A-F]{2})/chr(hex($1))/eg;
-               $e;
+       my $charset;
+       s{$re_encoded_word}{
+               $charset = $1;
+               my $encoding = $2;
+               my $text = $3;
+               if ($encoding eq 'q' || $encoding eq 'Q') {
+                       $text =~ s/_/ /g;
+                       $text =~ s/=([0-9A-F]{2})/chr(hex($1))/egi;
+                       $text;
+               } else {
+                       $&; # other encodings not supported yet
+               }
        }eg;
-       return wantarray ? ($_, $encoding) : $_;
+       return wantarray ? ($_, $charset) : $_;
 }
 
 sub quote_rfc2047 {
@@ -927,10 +937,8 @@ sub quote_rfc2047 {
 
 sub is_rfc2047_quoted {
        my $s = shift;
-       my $token = qr/[^][()<>@,;:"\/?.= \000-\037\177-\377]+/;
-       my $encoded_text = qr/[!->@-~]+/;
        length($s) <= 75 &&
-       $s =~ m/^(?:"[[:ascii:]]*"|=\?$token\?$token\?$encoded_text\?=)$/o;
+       $s =~ m/^(?:"[[:ascii:]]*"|$re_encoded_word)$/o;
 }
 
 sub subject_needs_rfc2047_quoting {