From: Junio C Hamano Date: Mon, 3 Oct 2016 20:30:38 +0000 (-0700) Subject: Merge branch 'kd/mailinfo-quoted-string' X-Git-Tag: v2.11.0-rc0~87 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/fe252ef81a6ff96b3ed11e98feb96784d35f15f8?ds=inline;hp=-c Merge branch 'kd/mailinfo-quoted-string' An author name, that spelled a backslash-quoted double quote in the human readable part "My \"double quoted\" name", was not unquoted correctly while applying a patch from a piece of e-mail. * kd/mailinfo-quoted-string: mailinfo: unescape quoted-pair in header fields t5100-mailinfo: replace common path prefix with variable --- fe252ef81a6ff96b3ed11e98feb96784d35f15f8 diff --combined mailinfo.c index 2275b285f0,b4118a0275..2fb3877ee4 --- a/mailinfo.c +++ b/mailinfo.c @@@ -54,6 -54,86 +54,86 @@@ static void parse_bogus_from(struct mai get_sane_name(&mi->name, &mi->name, &mi->email); } + static const char *unquote_comment(struct strbuf *outbuf, const char *in) + { + int c; + int take_next_litterally = 0; + + strbuf_addch(outbuf, '('); + + while ((c = *in++) != 0) { + if (take_next_litterally == 1) { + take_next_litterally = 0; + } else { + switch (c) { + case '\\': + take_next_litterally = 1; + continue; + case '(': + in = unquote_comment(outbuf, in); + continue; + case ')': + strbuf_addch(outbuf, ')'); + return in; + } + } + + strbuf_addch(outbuf, c); + } + + return in; + } + + static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in) + { + int c; + int take_next_litterally = 0; + + while ((c = *in++) != 0) { + if (take_next_litterally == 1) { + take_next_litterally = 0; + } else { + switch (c) { + case '\\': + take_next_litterally = 1; + continue; + case '"': + return in; + } + } + + strbuf_addch(outbuf, c); + } + + return in; + } + + static void unquote_quoted_pair(struct strbuf *line) + { + struct strbuf outbuf; + const char *in = line->buf; + int c; + + strbuf_init(&outbuf, line->len); + + while ((c = *in++) != 0) { + switch (c) { + case '"': + in = unquote_quoted_string(&outbuf, in); + continue; + case '(': + in = unquote_comment(&outbuf, in); + continue; + } + + strbuf_addch(&outbuf, c); + } + + strbuf_swap(&outbuf, line); + strbuf_release(&outbuf); + + } + static void handle_from(struct mailinfo *mi, const struct strbuf *from) { char *at; @@@ -63,6 -143,8 +143,8 @@@ strbuf_init(&f, from->len); strbuf_addbuf(&f, from); + unquote_quoted_pair(&f); + at = strchr(f.buf, '@'); if (!at) { parse_bogus_from(mi, from); @@@ -495,26 -577,26 +577,26 @@@ static int check_header(struct mailinf goto check_header_out; } - /* for inbody stuff */ - if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { - ret = is_format_patch_separator(line->buf + 1, line->len - 1); - goto check_header_out; - } - if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { - for (i = 0; header[i]; i++) { - if (!strcmp("Subject", header[i])) { - handle_header(&hdr_data[i], line); - ret = 1; - goto check_header_out; - } - } - } - check_header_out: strbuf_release(&sb); return ret; } +/* + * Returns 1 if the given line or any line beginning with the given line is an + * in-body header (that is, check_header will succeed when passed + * mi->s_hdr_data). + */ +static int is_inbody_header(const struct mailinfo *mi, + const struct strbuf *line) +{ + int i; + for (i = 0; header[i]; i++) + if (!mi->s_hdr_data[i] && cmp_header(line, header[i])) + return 1; + return 0; +} + static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) { struct strbuf *ret; @@@ -572,35 -654,37 +654,35 @@@ static inline int patchbreak(const stru return 0; } -static int is_scissors_line(const struct strbuf *line) +static int is_scissors_line(const char *line) { - size_t i, len = line->len; + const char *c; int scissors = 0, gap = 0; - int first_nonblank = -1; - int last_nonblank = 0, visible, perforation = 0, in_perforation = 0; - const char *buf = line->buf; + const char *first_nonblank = NULL, *last_nonblank = NULL; + int visible, perforation = 0, in_perforation = 0; - for (i = 0; i < len; i++) { - if (isspace(buf[i])) { + for (c = line; *c; c++) { + if (isspace(*c)) { if (in_perforation) { perforation++; gap++; } continue; } - last_nonblank = i; - if (first_nonblank < 0) - first_nonblank = i; - if (buf[i] == '-') { + last_nonblank = c; + if (first_nonblank == NULL) + first_nonblank = c; + if (*c == '-') { in_perforation = 1; perforation++; continue; } - if (i + 1 < len && - (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) || - !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) { + if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) || + !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) { in_perforation = 1; perforation += 2; scissors += 2; - i++; + c++; continue; } in_perforation = 0; @@@ -615,60 -699,12 +697,60 @@@ * than half of the perforation. */ - visible = last_nonblank - first_nonblank + 1; + if (first_nonblank && last_nonblank) + visible = last_nonblank - first_nonblank + 1; + else + visible = 0; return (scissors && 8 <= visible && visible < perforation * 3 && gap * 2 < perforation); } +static void flush_inbody_header_accum(struct mailinfo *mi) +{ + if (!mi->inbody_header_accum.len) + return; + assert(check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0)); + strbuf_reset(&mi->inbody_header_accum); +} + +static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line) +{ + if (mi->inbody_header_accum.len && + (line->buf[0] == ' ' || line->buf[0] == '\t')) { + if (mi->use_scissors && is_scissors_line(line->buf)) { + /* + * This is a scissors line; do not consider this line + * as a header continuation line. + */ + flush_inbody_header_accum(mi); + return 0; + } + strbuf_strip_suffix(&mi->inbody_header_accum, "\n"); + strbuf_addbuf(&mi->inbody_header_accum, line); + return 1; + } + + flush_inbody_header_accum(mi); + + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) + return is_format_patch_separator(line->buf + 1, line->len - 1); + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { + int i; + for (i = 0; header[i]; i++) + if (!strcmp("Subject", header[i])) { + handle_header(&mi->s_hdr_data[i], line); + return 1; + } + return 0; + } + if (is_inbody_header(mi, line)) { + strbuf_addbuf(&mi->inbody_header_accum, line); + return 1; + } + return 0; +} + static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) { assert(!mi->filter_stage); @@@ -679,7 -715,7 +761,7 @@@ } if (mi->use_inbody_headers && mi->header_stage) { - mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0); + mi->header_stage = check_inbody_header(mi, line); if (mi->header_stage) return 0; } else @@@ -692,7 -728,7 +774,7 @@@ if (convert_to_utf8(mi, line, mi->charset.buf)) return 0; /* mi->input_error already set */ - if (mi->use_scissors && is_scissors_line(line)) { + if (mi->use_scissors && is_scissors_line(line->buf)) { int i; strbuf_setlen(&mi->log_message, 0); @@@ -932,8 -968,6 +1014,8 @@@ static void handle_body(struct mailinf break; } while (!strbuf_getwholeline(line, mi->input, '\n')); + flush_inbody_header_accum(mi); + handle_body_out: strbuf_release(&prev); } @@@ -1049,7 -1083,6 +1131,7 @@@ void setup_mailinfo(struct mailinfo *mi strbuf_init(&mi->email, 0); strbuf_init(&mi->charset, 0); strbuf_init(&mi->log_message, 0); + strbuf_init(&mi->inbody_header_accum, 0); mi->header_stage = 1; mi->use_inbody_headers = 1; mi->content_top = mi->content; @@@ -1063,7 -1096,6 +1145,7 @@@ void clear_mailinfo(struct mailinfo *mi strbuf_release(&mi->name); strbuf_release(&mi->email); strbuf_release(&mi->charset); + strbuf_release(&mi->inbody_header_accum); free(mi->message_id); for (i = 0; mi->p_hdr_data[i]; i++) diff --combined t/t5100-mailinfo.sh index e173c33f4b,45d228ebc8..e6b995161e --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@@ -7,37 -7,39 +7,39 @@@ test_description='git mailinfo and git . ./test-lib.sh + DATA="$TEST_DIRECTORY/t5100" + test_expect_success 'split sample box' \ - 'git mailsplit -o. "$TEST_DIRECTORY"/t5100/sample.mbox >last && + 'git mailsplit -o. "$DATA/sample.mbox" >last && last=$(cat last) && echo total is $last && - test $(cat last) = 17' + test $(cat last) = 18' check_mailinfo () { mail=$1 opt=$2 mo="$mail$opt" - git mailinfo -u $opt msg$mo patch$mo <$mail >info$mo && - test_cmp "$TEST_DIRECTORY"/t5100/msg$mo msg$mo && - test_cmp "$TEST_DIRECTORY"/t5100/patch$mo patch$mo && - test_cmp "$TEST_DIRECTORY"/t5100/info$mo info$mo + git mailinfo -u $opt "msg$mo" "patch$mo" <"$mail" >"info$mo" && + test_cmp "$DATA/msg$mo" "msg$mo" && + test_cmp "$DATA/patch$mo" "patch$mo" && + test_cmp "$DATA/info$mo" "info$mo" } for mail in 00* do test_expect_success "mailinfo $mail" ' - check_mailinfo $mail "" && - if test -f "$TEST_DIRECTORY"/t5100/msg$mail--scissors + check_mailinfo "$mail" "" && + if test -f "$DATA/msg$mail--scissors" then - check_mailinfo $mail --scissors + check_mailinfo "$mail" --scissors fi && - if test -f "$TEST_DIRECTORY"/t5100/msg$mail--no-inbody-headers + if test -f "$DATA/msg$mail--no-inbody-headers" then - check_mailinfo $mail --no-inbody-headers + check_mailinfo "$mail" --no-inbody-headers fi && - if test -f "$TEST_DIRECTORY"/t5100/msg$mail--message-id + if test -f "$DATA/msg$mail--message-id" then - check_mailinfo $mail --message-id + check_mailinfo "$mail" --message-id fi ' done @@@ -45,7 -47,7 +47,7 @@@ test_expect_success 'split box with rfc2047 samples' \ 'mkdir rfc2047 && - git mailsplit -orfc2047 "$TEST_DIRECTORY"/t5100/rfc2047-samples.mbox \ + git mailsplit -orfc2047 "$DATA/rfc2047-samples.mbox" \ >rfc2047/last && last=$(cat rfc2047/last) && echo total is $last && @@@ -54,20 -56,20 +56,20 @@@ for mail in rfc2047/00* do test_expect_success "mailinfo $mail" ' - git mailinfo -u $mail-msg $mail-patch <$mail >$mail-info && + git mailinfo -u "$mail-msg" "$mail-patch" <"$mail" >"$mail-info" && echo msg && - test_cmp "$TEST_DIRECTORY"/t5100/empty $mail-msg && + test_cmp "$DATA/empty" "$mail-msg" && echo patch && - test_cmp "$TEST_DIRECTORY"/t5100/empty $mail-patch && + test_cmp "$DATA/empty" "$mail-patch" && echo info && - test_cmp "$TEST_DIRECTORY"/t5100/rfc2047-info-$(basename $mail) $mail-info + test_cmp "$DATA/rfc2047-info-$(basename $mail)" "$mail-info" ' done test_expect_success 'respect NULs' ' - git mailsplit -d3 -o. "$TEST_DIRECTORY"/t5100/nul-plain && - test_cmp "$TEST_DIRECTORY"/t5100/nul-plain 001 && + git mailsplit -d3 -o. "$DATA/nul-plain" && + test_cmp "$DATA/nul-plain" 001 && (cat 001 | git mailinfo msg patch) && test_line_count = 4 patch @@@ -75,52 -77,52 +77,52 @@@ test_expect_success 'Preserve NULs out of MIME encoded message' ' - git mailsplit -d5 -o. "$TEST_DIRECTORY"/t5100/nul-b64.in && - test_cmp "$TEST_DIRECTORY"/t5100/nul-b64.in 00001 && + git mailsplit -d5 -o. "$DATA/nul-b64.in" && + test_cmp "$DATA/nul-b64.in" 00001 && git mailinfo msg patch <00001 && - test_cmp "$TEST_DIRECTORY"/t5100/nul-b64.expect patch + test_cmp "$DATA/nul-b64.expect" patch ' test_expect_success 'mailinfo on from header without name works' ' mkdir info-from && - git mailsplit -oinfo-from "$TEST_DIRECTORY"/t5100/info-from.in && - test_cmp "$TEST_DIRECTORY"/t5100/info-from.in info-from/0001 && + git mailsplit -oinfo-from "$DATA/info-from.in" && + test_cmp "$DATA/info-from.in" info-from/0001 && git mailinfo info-from/msg info-from/patch \ info-from/out && - test_cmp "$TEST_DIRECTORY"/t5100/info-from.expect info-from/out + test_cmp "$DATA/info-from.expect" info-from/out ' test_expect_success 'mailinfo finds headers after embedded From line' ' mkdir embed-from && - git mailsplit -oembed-from "$TEST_DIRECTORY"/t5100/embed-from.in && - test_cmp "$TEST_DIRECTORY"/t5100/embed-from.in embed-from/0001 && + git mailsplit -oembed-from "$DATA/embed-from.in" && + test_cmp "$DATA/embed-from.in" embed-from/0001 && git mailinfo embed-from/msg embed-from/patch \ embed-from/out && - test_cmp "$TEST_DIRECTORY"/t5100/embed-from.expect embed-from/out + test_cmp "$DATA/embed-from.expect" embed-from/out ' test_expect_success 'mailinfo on message with quoted >From' ' mkdir quoted-from && - git mailsplit -oquoted-from "$TEST_DIRECTORY"/t5100/quoted-from.in && - test_cmp "$TEST_DIRECTORY"/t5100/quoted-from.in quoted-from/0001 && + git mailsplit -oquoted-from "$DATA/quoted-from.in" && + test_cmp "$DATA/quoted-from.in" quoted-from/0001 && git mailinfo quoted-from/msg quoted-from/patch \ quoted-from/out && - test_cmp "$TEST_DIRECTORY"/t5100/quoted-from.expect quoted-from/msg + test_cmp "$DATA/quoted-from.expect" quoted-from/msg ' test_expect_success 'mailinfo unescapes with --mboxrd' ' mkdir mboxrd && git mailsplit -omboxrd --mboxrd \ - "$TEST_DIRECTORY"/t5100/sample.mboxrd >last && + "$DATA/sample.mboxrd" >last && test x"$(cat last)" = x2 && for i in 0001 0002 do git mailinfo mboxrd/msg mboxrd/patch \ mboxrd/out && - test_cmp "$TEST_DIRECTORY"/t5100/${i}mboxrd mboxrd/msg + test_cmp "$DATA/${i}mboxrd" mboxrd/msg done && sp=" " && echo "From " >expect && @@@ -142,4 -144,18 +144,18 @@@ test_cmp expect mboxrd/msg ' + test_expect_success 'mailinfo handles rfc2822 quoted-string' ' + mkdir quoted-string && + git mailinfo /dev/null /dev/null <"$DATA/quoted-string.in" \ + >quoted-string/info && + test_cmp "$DATA/quoted-string.expect" quoted-string/info + ' + + test_expect_success 'mailinfo handles rfc2822 comment' ' + mkdir comment && + git mailinfo /dev/null /dev/null <"$DATA/comment.in" \ + >comment/info && + test_cmp "$DATA/comment.expect" comment/info + ' + test_done