static char *cleanup_subject(char *subject)
{
- if (keep_subject)
- return subject;
for (;;) {
char *p;
int len, remove;
}
}
-static void decode_header(char *it);
-static char *header[MAX_HDR_PARSED] = {
+static void decode_header(char *it, unsigned itsize);
+static const char *header[MAX_HDR_PARSED] = {
"From","Subject","Date",
};
-static int check_header(char *line, char **hdr_data)
+static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
{
int i;
/* search for the interesting parts */
for (i = 0; header[i]; i++) {
int len = strlen(header[i]);
- if (!hdr_data[i] &&
+ if ((!hdr_data[i] || overwrite) &&
!strncasecmp(line, header[i], len) &&
line[len] == ':' && isspace(line[len + 1])) {
/* Unwrap inline B and Q encoding, and optionally
* normalize the meta information to utf8.
*/
- decode_header(line + len + 2);
+ decode_header(line + len + 2, linesize - len - 2);
hdr_data[i] = xmalloc(1000 * sizeof(char));
if (! handle_header(line, hdr_data[i], len + 2)) {
return 1;
/* Content stuff */
if (!strncasecmp(line, "Content-Type", 12) &&
line[12] == ':' && isspace(line[12 + 1])) {
- decode_header(line + 12 + 2);
+ decode_header(line + 12 + 2, linesize - 12 - 2);
if (! handle_content_type(line)) {
return 1;
}
}
if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
line[25] == ':' && isspace(line[25 + 1])) {
- decode_header(line + 25 + 2);
+ decode_header(line + 25 + 2, linesize - 25 - 2);
if (! handle_content_transfer_encoding(line)) {
return 1;
}
if (addlen >= sz - len)
addlen = sz - len - 1;
memcpy(line + len, continuation, addlen);
+ line[len] = '\n';
len += addlen;
}
}
return 1;
}
-static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
+static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
{
+ char *otend = ot + otsize;
int c;
while ((c = *in++) != 0 && (in <= ep)) {
+ if (ot == otend) {
+ *--ot = '\0';
+ return -1;
+ }
if (c == '=') {
int d = *in++;
if (d == '\n' || !d)
return 0;
}
-static int decode_b_segment(char *in, char *ot, char *ep)
+static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
{
/* Decode in..ep, possibly in-place to ot */
int c, pos = 0, acc = 0;
+ char *otend = ot + otsize;
while ((c = *in++) != 0 && (in <= ep)) {
+ if (ot == otend) {
+ *--ot = '\0';
+ return -1;
+ }
if (c == '+')
c = 62;
else if (c == '/')
return 0;
}
-static void convert_to_utf8(char *line, const char *charset)
+/*
+ * When there is no known charset, guess.
+ *
+ * Right now we assume that if the target is UTF-8 (the default),
+ * and it already looks like UTF-8 (which includes US-ASCII as its
+ * subset, of course) then that is what it is and there is nothing
+ * to do.
+ *
+ * Otherwise, we default to assuming it is Latin1 for historical
+ * reasons.
+ */
+static const char *guess_charset(const char *line, const char *target_charset)
+{
+ if (is_encoding_utf8(target_charset)) {
+ if (is_utf8(line))
+ return NULL;
+ }
+ return "latin1";
+}
+
+static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
{
- static const char latin_one[] = "latin1";
- const char *input_charset = *charset ? charset : latin_one;
- char *out = reencode_string(line, metainfo_charset, input_charset);
+ char *out;
+
+ if (!charset || !*charset) {
+ charset = guess_charset(line, metainfo_charset);
+ if (!charset)
+ return;
+ }
+ if (!strcmp(metainfo_charset, charset))
+ return;
+ out = reencode_string(line, metainfo_charset, charset);
if (!out)
die("cannot convert from %s to %s\n",
- input_charset, metainfo_charset);
- strcpy(line, out);
+ charset, metainfo_charset);
+ strlcpy(line, out, linesize);
free(out);
}
-static int decode_header_bq(char *it)
+static int decode_header_bq(char *it, unsigned itsize)
{
char *in, *out, *ep, *cp, *sp;
char outbuf[1000];
default:
return rfc2047; /* no munging */
case 'b':
- sz = decode_b_segment(cp + 3, piecebuf, ep);
+ sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
break;
case 'q':
- sz = decode_q_segment(cp + 3, piecebuf, ep, 1);
+ sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
break;
}
if (sz < 0)
return rfc2047;
if (metainfo_charset)
- convert_to_utf8(piecebuf, charset_q);
+ convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
+
+ sz = strlen(piecebuf);
+ if (outbuf + sizeof(outbuf) <= out + sz)
+ return rfc2047; /* no munging */
strcpy(out, piecebuf);
- out += strlen(out);
+ out += sz;
in = ep + 2;
}
strcpy(out, in);
- strcpy(it, outbuf);
+ strlcpy(it, outbuf, itsize);
return rfc2047;
}
-static void decode_header(char *it)
+static void decode_header(char *it, unsigned itsize)
{
- if (decode_header_bq(it))
+ if (decode_header_bq(it, itsize))
return;
/* otherwise "it" is a straight copy of the input.
* This can be binary guck but there is no charset specified.
*/
if (metainfo_charset)
- convert_to_utf8(it, "");
+ convert_to_utf8(it, itsize, "");
}
-static void decode_transfer_encoding(char *line)
+static void decode_transfer_encoding(char *line, unsigned linesize)
{
char *ep;
switch (transfer_encoding) {
case TE_QP:
ep = line + strlen(line);
- decode_q_segment(line, line, ep, 0);
+ decode_q_segment(line, line, linesize, ep, 0);
break;
case TE_BASE64:
ep = line + strlen(line);
- decode_b_segment(line, line, ep);
+ decode_b_segment(line, line, linesize, ep);
break;
case TE_DONTCARE:
break;
}
}
-static int handle_filter(char *line);
+static int handle_filter(char *line, unsigned linesize);
static int find_boundary(void)
{
static int handle_boundary(void)
{
+ char newline[]="\n";
again:
if (!memcmp(line+content_top->boundary_len, "--", 2)) {
/* we hit an end boundary */
"can't recover\n");
exit(1);
}
- handle_filter("\n");
+ handle_filter(newline, sizeof(newline));
/* skip to the next boundary */
if (!find_boundary())
/* slurp in this section's info */
while (read_one_header_line(line, sizeof(line), fin))
- check_header(line, p_hdr_data);
+ check_header(line, sizeof(line), p_hdr_data, 0);
/* eat the blank line after section info */
return (fgets(line, sizeof(line), fin) != NULL);
}
-static int handle_commit_msg(char *line)
+static inline int patchbreak(const char *line)
+{
+ /* Beginning of a "diff -" header? */
+ if (!memcmp("diff -", line, 6))
+ return 1;
+
+ /* CVS "Index: " line? */
+ if (!memcmp("Index: ", line, 7))
+ return 1;
+
+ /*
+ * "--- <filename>" starts patches without headers
+ * "---<sp>*" is a manual separator
+ */
+ if (!memcmp("---", line, 3)) {
+ line += 3;
+ /* space followed by a filename? */
+ if (line[0] == ' ' && !isspace(line[1]))
+ return 1;
+ /* Just whitespace? */
+ for (;;) {
+ unsigned char c = *line++;
+ if (c == '\n')
+ return 1;
+ if (!isspace(c))
+ break;
+ }
+ return 0;
+ }
+ return 0;
+}
+
+
+static int handle_commit_msg(char *line, unsigned linesize)
{
static int still_looking = 1;
+ char *endline = line + linesize;
if (!cmitmsg)
return 0;
if (!*cp)
return 0;
}
- if ((still_looking = check_header(cp, s_hdr_data)) != 0)
+ if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
return 0;
}
- if (!memcmp("diff -", line, 6) ||
- !memcmp("---", line, 3) ||
- !memcmp("Index: ", line, 7)) {
+ /* normalize the log message to UTF-8. */
+ if (metainfo_charset)
+ convert_to_utf8(line, endline - line, charset);
+
+ if (patchbreak(line)) {
fclose(cmitmsg);
cmitmsg = NULL;
return 1;
return 0;
}
-static int handle_filter(char *line)
+static int handle_filter(char *line, unsigned linesize)
{
static int filter = 0;
*/
switch (filter) {
case 0:
- if (!handle_commit_msg(line))
+ if (!handle_commit_msg(line, linesize))
break;
filter++;
case 1:
/* flush any leftover */
if ((transfer_encoding == TE_BASE64) &&
(np != newline)) {
- handle_filter(newline);
+ handle_filter(newline, sizeof(newline));
}
if (!handle_boundary())
return;
}
- /* Unwrap transfer encoding and optionally
- * normalize the log message to UTF-8.
- */
- decode_transfer_encoding(line);
- if (metainfo_charset)
- convert_to_utf8(line, charset);
+ /* Unwrap transfer encoding */
+ decode_transfer_encoding(line, sizeof(line));
switch (transfer_encoding) {
case TE_BASE64:
/* binary data most likely doesn't have newlines */
if (message_type != TYPE_TEXT) {
- rc = handle_filter(line);
+ rc = handle_filter(line, sizeof(newline));
break;
}
/* should be sitting on a new line */
*(++np) = 0;
op++;
- rc = handle_filter(newline);
+ rc = handle_filter(newline, sizeof(newline));
np = newline;
}
} while (*op != 0);
break;
}
default:
- rc = handle_filter(line);
+ rc = handle_filter(line, sizeof(newline));
}
if (rc)
/* nothing left to filter */
return;
}
+static void output_header_lines(FILE *fout, const char *hdr, char *data)
+{
+ while (1) {
+ char *ep = strchr(data, '\n');
+ int len;
+ if (!ep)
+ len = strlen(data);
+ else
+ len = ep - data;
+ fprintf(fout, "%s: %.*s\n", hdr, len, data);
+ if (!ep)
+ break;
+ data = ep + 1;
+ }
+}
+
static void handle_info(void)
{
char *sub;
continue;
if (!memcmp(header[i], "Subject", 7)) {
- sub = cleanup_subject(hdr);
- cleanup_space(sub);
- fprintf(fout, "Subject: %s\n", sub);
+ if (keep_subject)
+ sub = hdr;
+ else {
+ sub = cleanup_subject(hdr);
+ cleanup_space(sub);
+ }
+ output_header_lines(fout, "Subject", sub);
} else if (!memcmp(header[i], "From", 4)) {
handle_from(hdr);
fprintf(fout, "Author: %s\n", name);
fprintf(fout, "\n");
}
-int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
- const char *msg, const char *patch)
+static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
+ const char *msg, const char *patch)
{
+ int peek;
keep_subject = ks;
metainfo_charset = encoding;
fin = in;
p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
+ do {
+ peek = fgetc(in);
+ } while (isspace(peek));
+ ungetc(peek, in);
+
/* process the email header */
while (read_one_header_line(line, sizeof(line), fin))
- check_header(line, p_hdr_data);
+ check_header(line, sizeof(line), p_hdr_data, 1);
handle_body();
handle_info();