static enum {
TE_DONTCARE, TE_QP, TE_BASE64
} transfer_encoding;
-static enum {
- TYPE_TEXT, TYPE_OTHER
-} message_type;
static struct strbuf charset = STRBUF_INIT;
static int patch_lines;
const char *ends, *ap = strcasestr(line, name);
size_t sz;
- if (!ap) {
- strbuf_setlen(attr, 0);
+ strbuf_setlen(attr, 0);
+ if (!ap)
return 0;
- }
ap += strlen(name);
if (*ap == '"') {
ap++;
struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
strbuf_init(boundary, line->len);
- if (!strcasestr(line->buf, "text/"))
- message_type = TYPE_OTHER;
if (slurp_attr(line->buf, "boundary=", boundary)) {
strbuf_insert(boundary, 0, "--", 2);
if (++content_top > &content[MAX_BOUNDARIES]) {
return out;
}
-/*
- * When there is no known charset, guess.
- *
- * Right now we assume that if the target is UTF-8 (the default),
- * and it already looks like UTF-8 (which includes US-ASCII as its
- * subset, of course) then that is what it is and there is nothing
- * to do.
- *
- * Otherwise, we default to assuming it is Latin1 for historical
- * reasons.
- */
-static const char *guess_charset(const struct strbuf *line, const char *target_charset)
-{
- if (is_encoding_utf8(target_charset)) {
- if (is_utf8(line->buf))
- return NULL;
- }
- return "ISO8859-1";
-}
-
static void convert_to_utf8(struct strbuf *line, const char *charset)
{
char *out;
- if (!charset || !*charset) {
- charset = guess_charset(line, metainfo_charset);
- if (!charset)
- return;
- }
+ if (!charset || !*charset)
+ return;
- if (!strcasecmp(metainfo_charset, charset))
+ if (same_encoding(metainfo_charset, charset))
return;
out = reencode_string(line->buf, metainfo_charset, charset);
if (!out)
/* set some defaults */
transfer_encoding = TE_DONTCARE;
strbuf_reset(&charset);
- message_type = TYPE_TEXT;
/* slurp in this section's info */
while (read_one_header_line(&line, fin))
strbuf_insert(&line, 0, prev.buf, prev.len);
strbuf_reset(&prev);
- /* binary data most likely doesn't have newlines */
- if (message_type != TYPE_TEXT) {
- handle_filter(&line);
- break;
- }
/*
* This is a decoded line that may contain
* multiple new lines. Pass only one chunk