"From","Subject","Date",
};
-static int check_header(char *line, char **hdr_data)
+static int check_header(char *line, char **hdr_data, int overwrite)
{
int i;
/* search for the interesting parts */
for (i = 0; header[i]; i++) {
int len = strlen(header[i]);
- if (!hdr_data[i] &&
+ if ((!hdr_data[i] || overwrite) &&
!strncasecmp(line, header[i], len) &&
line[len] == ':' && isspace(line[len + 1])) {
/* Unwrap inline B and Q encoding, and optionally
return 0;
}
+/*
+ * When there is no known charset, guess.
+ *
+ * Right now we assume that if the target is UTF-8 (the default),
+ * and it already looks like UTF-8 (which includes US-ASCII as its
+ * subset, of course) then that is what it is and there is nothing
+ * to do.
+ *
+ * Otherwise, we default to assuming it is Latin1 for historical
+ * reasons.
+ */
+static const char *guess_charset(const char *line, const char *target_charset)
+{
+ if (is_encoding_utf8(target_charset)) {
+ if (is_utf8(line))
+ return NULL;
+ }
+ return "latin1";
+}
+
static void convert_to_utf8(char *line, const char *charset)
{
- static const char latin_one[] = "latin1";
- const char *input_charset = *charset ? charset : latin_one;
- char *out = reencode_string(line, metainfo_charset, input_charset);
+ char *out;
+ if (!charset || !*charset) {
+ charset = guess_charset(line, metainfo_charset);
+ if (!charset)
+ return;
+ }
+
+ out = reencode_string(line, metainfo_charset, charset);
if (!out)
die("cannot convert from %s to %s\n",
- input_charset, metainfo_charset);
+ charset, metainfo_charset);
strcpy(line, out);
free(out);
}
static int handle_boundary(void)
{
+ char newline[]="\n";
again:
if (!memcmp(line+content_top->boundary_len, "--", 2)) {
/* we hit an end boundary */
"can't recover\n");
exit(1);
}
- handle_filter("\n");
+ handle_filter(newline);
/* skip to the next boundary */
if (!find_boundary())
/* slurp in this section's info */
while (read_one_header_line(line, sizeof(line), fin))
- check_header(line, p_hdr_data);
+ check_header(line, p_hdr_data, 0);
/* eat the blank line after section info */
return (fgets(line, sizeof(line), fin) != NULL);
if (!*cp)
return 0;
}
- if ((still_looking = check_header(cp, s_hdr_data)) != 0)
+ if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0)
return 0;
}
+ /* normalize the log message to UTF-8. */
+ if (metainfo_charset)
+ convert_to_utf8(line, charset);
+
if (patchbreak(line)) {
fclose(cmitmsg);
cmitmsg = NULL;
return;
}
- /* Unwrap transfer encoding and optionally
- * normalize the log message to UTF-8.
- */
+ /* Unwrap transfer encoding */
decode_transfer_encoding(line);
- if (metainfo_charset)
- convert_to_utf8(line, charset);
switch (transfer_encoding) {
case TE_BASE64:
fprintf(fout, "\n");
}
-int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
- const char *msg, const char *patch)
+static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
+ const char *msg, const char *patch)
{
keep_subject = ks;
metainfo_charset = encoding;
/* process the email header */
while (read_one_header_line(line, sizeof(line), fin))
- check_header(line, p_hdr_data);
+ check_header(line, p_hdr_data, 1);
handle_body();
handle_info();