send-email: fix garbage removal after address
authorMatthieu Moy <git@matthieu-moy.fr>
Wed, 23 Aug 2017 10:21:01 +0000 (12:21 +0200)
committerJunio C Hamano <gitster@pobox.com>
Thu, 24 Aug 2017 21:40:15 +0000 (14:40 -0700)
This is a followup over 9d33439 (send-email: only allow one address
per body tag, 2017-02-20). The first iteration did allow writting

Cc: <foo@example.com> # garbage
but did so by matching the regex ([^>]*>?), i.e. stop after the first
instance of '>'. However, it did not properly deal with

Cc: foo@example.com # garbage
Fix this using a new function strip_garbage_one_address, which does
essentially what the old ([^>]*>?) was doing, but dealing with more
corner-cases. Since we've allowed

Cc: "Foo # Bar" <foobar@example.com>
in previous versions, it makes sense to continue allowing it (but we
still remove any garbage after it). OTOH, when an address is given
without quoting, we just take the first word and ignore everything
after.

Signed-off-by: Matthieu Moy <git@matthieu-moy.fr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
git-send-email.perl
t/t9001-send-email.sh
index fa6526986ed40143a12ffcd6563fd52b49d209e7..33a69ffe5d24e5cf22d8a9ea9160ba68c14a1e6a 100755 (executable)
@@ -1089,6 +1089,26 @@ sub sanitize_address {
 
 }
 
+sub strip_garbage_one_address {
+       my ($addr) = @_;
+       chomp $addr;
+       if ($addr =~ /^(("[^"]*"|[^"<]*)? *<[^>]*>).*/) {
+               # "Foo Bar" <foobar@example.com> [possibly garbage here]
+               # Foo Bar <foobar@example.com> [possibly garbage here]
+               return $1;
+       }
+       if ($addr =~ /^(<[^>]*>).*/) {
+               # <foo@example.com> [possibly garbage here]
+               # if garbage contains other addresses, they are ignored.
+               return $1;
+       }
+       if ($addr =~ /^([^"#,\s]*)/) {
+               # address without quoting: remove anything after the address
+               return $1;
+       }
+       return $addr;
+}
+
 sub sanitize_address_list {
        return (map { sanitize_address($_) } @_);
 }
@@ -1590,10 +1610,12 @@ sub send_message {
        # Now parse the message body
        while(<$fh>) {
                $message .=  $_;
-               if (/^(Signed-off-by|Cc): ([^>]*>?)/i) {
+               if (/^(Signed-off-by|Cc): (.*)/i) {
                        chomp;
                        my ($what, $c) = ($1, $2);
-                       chomp $c;
+                       # strip garbage for the address we'll use:
+                       $c = strip_garbage_one_address($c);
+                       # sanitize a bit more to decide whether to suppress the address:
                        my $sc = sanitize_address($c);
                        if ($sc eq $sender) {
                                next if ($suppress_cc{'self'});
index d1e4e8ad19f3d3e005e177b98e4eee16f4ef1461..f30980895c2f1b93317870e6b471d67bc99b0fd2 100755 (executable)
@@ -148,6 +148,8 @@ cat >expected-cc <<\EOF
 !two@example.com!
 !three@example.com!
 !four@example.com!
+!five@example.com!
+!six@example.com!
 EOF
 "
 
@@ -161,6 +163,8 @@ test_expect_success $PREREQ 'cc trailer with various syntax' '
        Cc: <two@example.com> # trailing comments are ignored
        Cc: <three@example.com>, <not.four@example.com> one address per line
        Cc: "Some # Body" <four@example.com> [ <also.a.comment> ]
+       Cc: five@example.com # not.six@example.com
+       Cc: six@example.com, not.seven@example.com
        EOF
        clean_fake_sendmail &&
        git send-email -1 --to=recipient@example.com \