Merge branch 'kd/mailinfo-quoted-string'
authorJunio C Hamano <gitster@pobox.com>
Mon, 3 Oct 2016 20:30:38 +0000 (13:30 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 3 Oct 2016 20:30:38 +0000 (13:30 -0700)
An author name, that spelled a backslash-quoted double quote in the
human readable part "My \"double quoted\" name", was not unquoted
correctly while applying a patch from a piece of e-mail.

* kd/mailinfo-quoted-string:
mailinfo: unescape quoted-pair in header fields
t5100-mailinfo: replace common path prefix with variable

1  2 
mailinfo.c
t/t5100-mailinfo.sh
diff --combined mailinfo.c
index 2275b285f097d632e38d7f1fe59692df1f807e28,b4118a02757212871e3402532ab5c422a5ba043f..2fb3877ee44e9cdc83c43fa23971f02a94b5a49f
@@@ -54,6 -54,86 +54,86 @@@ static void parse_bogus_from(struct mai
        get_sane_name(&mi->name, &mi->name, &mi->email);
  }
  
+ static const char *unquote_comment(struct strbuf *outbuf, const char *in)
+ {
+       int c;
+       int take_next_litterally = 0;
+       strbuf_addch(outbuf, '(');
+       while ((c = *in++) != 0) {
+               if (take_next_litterally == 1) {
+                       take_next_litterally = 0;
+               } else {
+                       switch (c) {
+                       case '\\':
+                               take_next_litterally = 1;
+                               continue;
+                       case '(':
+                               in = unquote_comment(outbuf, in);
+                               continue;
+                       case ')':
+                               strbuf_addch(outbuf, ')');
+                               return in;
+                       }
+               }
+               strbuf_addch(outbuf, c);
+       }
+       return in;
+ }
+ static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
+ {
+       int c;
+       int take_next_litterally = 0;
+       while ((c = *in++) != 0) {
+               if (take_next_litterally == 1) {
+                       take_next_litterally = 0;
+               } else {
+                       switch (c) {
+                       case '\\':
+                               take_next_litterally = 1;
+                               continue;
+                       case '"':
+                               return in;
+                       }
+               }
+               strbuf_addch(outbuf, c);
+       }
+       return in;
+ }
+ static void unquote_quoted_pair(struct strbuf *line)
+ {
+       struct strbuf outbuf;
+       const char *in = line->buf;
+       int c;
+       strbuf_init(&outbuf, line->len);
+       while ((c = *in++) != 0) {
+               switch (c) {
+               case '"':
+                       in = unquote_quoted_string(&outbuf, in);
+                       continue;
+               case '(':
+                       in = unquote_comment(&outbuf, in);
+                       continue;
+               }
+               strbuf_addch(&outbuf, c);
+       }
+       strbuf_swap(&outbuf, line);
+       strbuf_release(&outbuf);
+ }
  static void handle_from(struct mailinfo *mi, const struct strbuf *from)
  {
        char *at;
        strbuf_init(&f, from->len);
        strbuf_addbuf(&f, from);
  
+       unquote_quoted_pair(&f);
        at = strchr(f.buf, '@');
        if (!at) {
                parse_bogus_from(mi, from);
@@@ -495,26 -577,26 +577,26 @@@ static int check_header(struct mailinf
                goto check_header_out;
        }
  
 -      /* for inbody stuff */
 -      if (starts_with(line->buf, ">From") && isspace(line->buf[5])) {
 -              ret = is_format_patch_separator(line->buf + 1, line->len - 1);
 -              goto check_header_out;
 -      }
 -      if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
 -              for (i = 0; header[i]; i++) {
 -                      if (!strcmp("Subject", header[i])) {
 -                              handle_header(&hdr_data[i], line);
 -                              ret = 1;
 -                              goto check_header_out;
 -                      }
 -              }
 -      }
 -
  check_header_out:
        strbuf_release(&sb);
        return ret;
  }
  
 +/*
 + * Returns 1 if the given line or any line beginning with the given line is an
 + * in-body header (that is, check_header will succeed when passed
 + * mi->s_hdr_data).
 + */
 +static int is_inbody_header(const struct mailinfo *mi,
 +                          const struct strbuf *line)
 +{
 +      int i;
 +      for (i = 0; header[i]; i++)
 +              if (!mi->s_hdr_data[i] && cmp_header(line, header[i]))
 +                      return 1;
 +      return 0;
 +}
 +
  static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
  {
        struct strbuf *ret;
@@@ -572,35 -654,37 +654,35 @@@ static inline int patchbreak(const stru
        return 0;
  }
  
 -static int is_scissors_line(const struct strbuf *line)
 +static int is_scissors_line(const char *line)
  {
 -      size_t i, len = line->len;
 +      const char *c;
        int scissors = 0, gap = 0;
 -      int first_nonblank = -1;
 -      int last_nonblank = 0, visible, perforation = 0, in_perforation = 0;
 -      const char *buf = line->buf;
 +      const char *first_nonblank = NULL, *last_nonblank = NULL;
 +      int visible, perforation = 0, in_perforation = 0;
  
 -      for (i = 0; i < len; i++) {
 -              if (isspace(buf[i])) {
 +      for (c = line; *c; c++) {
 +              if (isspace(*c)) {
                        if (in_perforation) {
                                perforation++;
                                gap++;
                        }
                        continue;
                }
 -              last_nonblank = i;
 -              if (first_nonblank < 0)
 -                      first_nonblank = i;
 -              if (buf[i] == '-') {
 +              last_nonblank = c;
 +              if (first_nonblank == NULL)
 +                      first_nonblank = c;
 +              if (*c == '-') {
                        in_perforation = 1;
                        perforation++;
                        continue;
                }
 -              if (i + 1 < len &&
 -                  (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) ||
 -                   !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) {
 +              if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) ||
 +                   !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) {
                        in_perforation = 1;
                        perforation += 2;
                        scissors += 2;
 -                      i++;
 +                      c++;
                        continue;
                }
                in_perforation = 0;
         * than half of the perforation.
         */
  
 -      visible = last_nonblank - first_nonblank + 1;
 +      if (first_nonblank && last_nonblank)
 +              visible = last_nonblank - first_nonblank + 1;
 +      else
 +              visible = 0;
        return (scissors && 8 <= visible &&
                visible < perforation * 3 &&
                gap * 2 < perforation);
  }
  
 +static void flush_inbody_header_accum(struct mailinfo *mi)
 +{
 +      if (!mi->inbody_header_accum.len)
 +              return;
 +      assert(check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0));
 +      strbuf_reset(&mi->inbody_header_accum);
 +}
 +
 +static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
 +{
 +      if (mi->inbody_header_accum.len &&
 +          (line->buf[0] == ' ' || line->buf[0] == '\t')) {
 +              if (mi->use_scissors && is_scissors_line(line->buf)) {
 +                      /*
 +                       * This is a scissors line; do not consider this line
 +                       * as a header continuation line.
 +                       */
 +                      flush_inbody_header_accum(mi);
 +                      return 0;
 +              }
 +              strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
 +              strbuf_addbuf(&mi->inbody_header_accum, line);
 +              return 1;
 +      }
 +
 +      flush_inbody_header_accum(mi);
 +
 +      if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
 +              return is_format_patch_separator(line->buf + 1, line->len - 1);
 +      if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
 +              int i;
 +              for (i = 0; header[i]; i++)
 +                      if (!strcmp("Subject", header[i])) {
 +                              handle_header(&mi->s_hdr_data[i], line);
 +                              return 1;
 +                      }
 +              return 0;
 +      }
 +      if (is_inbody_header(mi, line)) {
 +              strbuf_addbuf(&mi->inbody_header_accum, line);
 +              return 1;
 +      }
 +      return 0;
 +}
 +
  static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
  {
        assert(!mi->filter_stage);
        }
  
        if (mi->use_inbody_headers && mi->header_stage) {
 -              mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0);
 +              mi->header_stage = check_inbody_header(mi, line);
                if (mi->header_stage)
                        return 0;
        } else
        if (convert_to_utf8(mi, line, mi->charset.buf))
                return 0; /* mi->input_error already set */
  
 -      if (mi->use_scissors && is_scissors_line(line)) {
 +      if (mi->use_scissors && is_scissors_line(line->buf)) {
                int i;
  
                strbuf_setlen(&mi->log_message, 0);
@@@ -932,8 -968,6 +1014,8 @@@ static void handle_body(struct mailinf
                        break;
        } while (!strbuf_getwholeline(line, mi->input, '\n'));
  
 +      flush_inbody_header_accum(mi);
 +
  handle_body_out:
        strbuf_release(&prev);
  }
@@@ -1049,7 -1083,6 +1131,7 @@@ void setup_mailinfo(struct mailinfo *mi
        strbuf_init(&mi->email, 0);
        strbuf_init(&mi->charset, 0);
        strbuf_init(&mi->log_message, 0);
 +      strbuf_init(&mi->inbody_header_accum, 0);
        mi->header_stage = 1;
        mi->use_inbody_headers = 1;
        mi->content_top = mi->content;
@@@ -1063,7 -1096,6 +1145,7 @@@ void clear_mailinfo(struct mailinfo *mi
        strbuf_release(&mi->name);
        strbuf_release(&mi->email);
        strbuf_release(&mi->charset);
 +      strbuf_release(&mi->inbody_header_accum);
        free(mi->message_id);
  
        for (i = 0; mi->p_hdr_data[i]; i++)
diff --combined t/t5100-mailinfo.sh
index e173c33f4b64480eaa948bc9f5e1178f82433f7f,45d228ebc81ed13c83d24f90895e9a374af5a87e..e6b995161e0fd15493bdaf4e5a5df2a3c2990798
@@@ -7,37 -7,39 +7,39 @@@ test_description='git mailinfo and git 
  
  . ./test-lib.sh
  
+ DATA="$TEST_DIRECTORY/t5100"
  test_expect_success 'split sample box' \
-       'git mailsplit -o. "$TEST_DIRECTORY"/t5100/sample.mbox >last &&
+       'git mailsplit -o. "$DATA/sample.mbox" >last &&
        last=$(cat last) &&
        echo total is $last &&
 -      test $(cat last) = 17'
 +      test $(cat last) = 18'
  
  check_mailinfo () {
        mail=$1 opt=$2
        mo="$mail$opt"
-       git mailinfo -u $opt msg$mo patch$mo <$mail >info$mo &&
-       test_cmp "$TEST_DIRECTORY"/t5100/msg$mo msg$mo &&
-       test_cmp "$TEST_DIRECTORY"/t5100/patch$mo patch$mo &&
-       test_cmp "$TEST_DIRECTORY"/t5100/info$mo info$mo
+       git mailinfo -u $opt "msg$mo" "patch$mo" <"$mail" >"info$mo" &&
+       test_cmp "$DATA/msg$mo" "msg$mo" &&
+       test_cmp "$DATA/patch$mo" "patch$mo" &&
+       test_cmp "$DATA/info$mo" "info$mo"
  }
  
  
  for mail in 00*
  do
        test_expect_success "mailinfo $mail" '
-               check_mailinfo $mail "" &&
-               if test -f "$TEST_DIRECTORY"/t5100/msg$mail--scissors
+               check_mailinfo "$mail" "" &&
+               if test -f "$DATA/msg$mail--scissors"
                then
-                       check_mailinfo $mail --scissors
+                       check_mailinfo "$mail" --scissors
                fi &&
-               if test -f "$TEST_DIRECTORY"/t5100/msg$mail--no-inbody-headers
+               if test -f "$DATA/msg$mail--no-inbody-headers"
                then
-                       check_mailinfo $mail --no-inbody-headers
+                       check_mailinfo "$mail" --no-inbody-headers
                fi &&
-               if test -f "$TEST_DIRECTORY"/t5100/msg$mail--message-id
+               if test -f "$DATA/msg$mail--message-id"
                then
-                       check_mailinfo $mail --message-id
+                       check_mailinfo "$mail" --message-id
                fi
        '
  done
@@@ -45,7 -47,7 +47,7 @@@
  
  test_expect_success 'split box with rfc2047 samples' \
        'mkdir rfc2047 &&
-       git mailsplit -orfc2047 "$TEST_DIRECTORY"/t5100/rfc2047-samples.mbox \
+       git mailsplit -orfc2047 "$DATA/rfc2047-samples.mbox" \
          >rfc2047/last &&
        last=$(cat rfc2047/last) &&
        echo total is $last &&
  for mail in rfc2047/00*
  do
        test_expect_success "mailinfo $mail" '
-               git mailinfo -u $mail-msg $mail-patch <$mail >$mail-info &&
+               git mailinfo -u "$mail-msg" "$mail-patch" <"$mail" >"$mail-info" &&
                echo msg &&
-               test_cmp "$TEST_DIRECTORY"/t5100/empty $mail-msg &&
+               test_cmp "$DATA/empty" "$mail-msg" &&
                echo patch &&
-               test_cmp "$TEST_DIRECTORY"/t5100/empty $mail-patch &&
+               test_cmp "$DATA/empty" "$mail-patch" &&
                echo info &&
-               test_cmp "$TEST_DIRECTORY"/t5100/rfc2047-info-$(basename $mail) $mail-info
+               test_cmp "$DATA/rfc2047-info-$(basename $mail)" "$mail-info"
        '
  done
  
  test_expect_success 'respect NULs' '
  
-       git mailsplit -d3 -o. "$TEST_DIRECTORY"/t5100/nul-plain &&
-       test_cmp "$TEST_DIRECTORY"/t5100/nul-plain 001 &&
+       git mailsplit -d3 -o. "$DATA/nul-plain" &&
+       test_cmp "$DATA/nul-plain" 001 &&
        (cat 001 | git mailinfo msg patch) &&
        test_line_count = 4 patch
  
  
  test_expect_success 'Preserve NULs out of MIME encoded message' '
  
-       git mailsplit -d5 -o. "$TEST_DIRECTORY"/t5100/nul-b64.in &&
-       test_cmp "$TEST_DIRECTORY"/t5100/nul-b64.in 00001 &&
+       git mailsplit -d5 -o. "$DATA/nul-b64.in" &&
+       test_cmp "$DATA/nul-b64.in" 00001 &&
        git mailinfo msg patch <00001 &&
-       test_cmp "$TEST_DIRECTORY"/t5100/nul-b64.expect patch
+       test_cmp "$DATA/nul-b64.expect" patch
  
  '
  
  test_expect_success 'mailinfo on from header without name works' '
  
        mkdir info-from &&
-       git mailsplit -oinfo-from "$TEST_DIRECTORY"/t5100/info-from.in &&
-       test_cmp "$TEST_DIRECTORY"/t5100/info-from.in info-from/0001 &&
+       git mailsplit -oinfo-from "$DATA/info-from.in" &&
+       test_cmp "$DATA/info-from.in" info-from/0001 &&
        git mailinfo info-from/msg info-from/patch \
          <info-from/0001 >info-from/out &&
-       test_cmp "$TEST_DIRECTORY"/t5100/info-from.expect info-from/out
+       test_cmp "$DATA/info-from.expect" info-from/out
  
  '
  
  test_expect_success 'mailinfo finds headers after embedded From line' '
        mkdir embed-from &&
-       git mailsplit -oembed-from "$TEST_DIRECTORY"/t5100/embed-from.in &&
-       test_cmp "$TEST_DIRECTORY"/t5100/embed-from.in embed-from/0001 &&
+       git mailsplit -oembed-from "$DATA/embed-from.in" &&
+       test_cmp "$DATA/embed-from.in" embed-from/0001 &&
        git mailinfo embed-from/msg embed-from/patch \
          <embed-from/0001 >embed-from/out &&
-       test_cmp "$TEST_DIRECTORY"/t5100/embed-from.expect embed-from/out
+       test_cmp "$DATA/embed-from.expect" embed-from/out
  '
  
  test_expect_success 'mailinfo on message with quoted >From' '
        mkdir quoted-from &&
-       git mailsplit -oquoted-from "$TEST_DIRECTORY"/t5100/quoted-from.in &&
-       test_cmp "$TEST_DIRECTORY"/t5100/quoted-from.in quoted-from/0001 &&
+       git mailsplit -oquoted-from "$DATA/quoted-from.in" &&
+       test_cmp "$DATA/quoted-from.in" quoted-from/0001 &&
        git mailinfo quoted-from/msg quoted-from/patch \
          <quoted-from/0001 >quoted-from/out &&
-       test_cmp "$TEST_DIRECTORY"/t5100/quoted-from.expect quoted-from/msg
+       test_cmp "$DATA/quoted-from.expect" quoted-from/msg
  '
  
  test_expect_success 'mailinfo unescapes with --mboxrd' '
        mkdir mboxrd &&
        git mailsplit -omboxrd --mboxrd \
-               "$TEST_DIRECTORY"/t5100/sample.mboxrd >last &&
+               "$DATA/sample.mboxrd" >last &&
        test x"$(cat last)" = x2 &&
        for i in 0001 0002
        do
                git mailinfo mboxrd/msg mboxrd/patch \
                  <mboxrd/$i >mboxrd/out &&
-               test_cmp "$TEST_DIRECTORY"/t5100/${i}mboxrd mboxrd/msg
+               test_cmp "$DATA/${i}mboxrd" mboxrd/msg
        done &&
        sp=" " &&
        echo "From " >expect &&
        test_cmp expect mboxrd/msg
  '
  
+ test_expect_success 'mailinfo handles rfc2822 quoted-string' '
+       mkdir quoted-string &&
+       git mailinfo /dev/null /dev/null <"$DATA/quoted-string.in" \
+               >quoted-string/info &&
+       test_cmp "$DATA/quoted-string.expect" quoted-string/info
+ '
+ test_expect_success 'mailinfo handles rfc2822 comment' '
+       mkdir comment &&
+       git mailinfo /dev/null /dev/null <"$DATA/comment.in" \
+               >comment/info &&
+       test_cmp "$DATA/comment.expect" comment/info
+ '
  test_done