* Another stupid program, this one parsing the headers of an
* email to figure out authorship and subject
*/
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#ifndef NO_ICONV
-#include <iconv.h>
-#endif
-#include "git-compat-util.h"
#include "cache.h"
#include "builtin.h"
+#include "utf8.h"
static FILE *cmitmsg, *patchfile, *fin, *fout;
-static int keep_subject = 0;
-static const char *metainfo_charset = NULL;
+static int keep_subject;
+static const char *metainfo_charset;
static char line[1000];
static char date[1000];
static char name[1000];
static char multipart_boundary[1000];
static int multipart_boundary_len;
-static int patch_lines = 0;
+static int patch_lines;
static char *sanity_check(char *name, char *email)
{
static int slurp_attr(const char *line, const char *name, char *attr)
{
- char *ends, *ap = strcasestr(line, name);
+ const char *ends, *ap = strcasestr(line, name);
size_t sz;
if (!ap) {
}
}
-static void decode_header_bq(char *it);
+static void decode_header(char *it);
typedef int (*header_fn_t)(char *);
struct header_def {
const char *name;
/* Unwrap inline B and Q encoding, and optionally
* normalize the meta information to utf8.
*/
- decode_header_bq(line + len + 2);
+ decode_header(line + len + 2);
header[i].func(line + len + 2);
break;
}
*/
int ch;
char *cp = line;
+
+ /* Count mbox From headers as headers */
+ if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
+ return 1;
+
while ((ch = *cp++)) {
if (ch == ':')
return cp != line;
return 0;
}
+/*
+ * sz is size of 'line' buffer in bytes. Must be reasonably
+ * long enough to hold one physical real-world e-mail line.
+ */
static int read_one_header_line(char *line, int sz, FILE *in)
{
- int ofs = 0;
- while (ofs < sz) {
- int peek, len;
- if (fgets(line + ofs, sz - ofs, in) == NULL)
- break;
- len = eatspace(line + ofs);
- if ((len == 0) || !is_rfc2822_header(line)) {
- /* Re-add the newline */
- line[ofs + len] = '\n';
- line[ofs + len + 1] = '\0';
- break;
- }
- ofs += len;
- /* Yuck, 2822 header "folding" */
+ int len;
+
+ /*
+ * We will read at most (sz-1) bytes and then potentially
+ * re-add NUL after it. Accessing line[sz] after this is safe
+ * and we can allow len to grow up to and including sz.
+ */
+ sz--;
+
+ /* Get the first part of the line. */
+ if (!fgets(line, sz, in))
+ return 0;
+
+ /*
+ * Is it an empty line or not a valid rfc2822 header?
+ * If so, stop here, and return false ("not a header")
+ */
+ len = eatspace(line);
+ if (!len || !is_rfc2822_header(line)) {
+ /* Re-add the newline */
+ line[len] = '\n';
+ line[len + 1] = '\0';
+ return 0;
+ }
+
+ /*
+ * Now we need to eat all the continuation lines..
+ * Yuck, 2822 header "folding"
+ */
+ for (;;) {
+ int peek, addlen;
+ static char continuation[1000];
+
peek = fgetc(in); ungetc(peek, in);
if (peek != ' ' && peek != '\t')
break;
+ if (!fgets(continuation, sizeof(continuation), in))
+ break;
+ addlen = eatspace(continuation);
+ if (len < sz - 1) {
+ if (addlen >= sz - len)
+ addlen = sz - len - 1;
+ memcpy(line + len, continuation, addlen);
+ len += addlen;
+ }
}
- /* Count mbox From headers as headers */
- if (!ofs && !memcmp(line, "From ", 5))
- ofs = 1;
- return ofs;
-}
+ line[len] = 0;
-static unsigned hexval(int c)
-{
- if (c >= '0' && c <= '9')
- return c - '0';
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- if (c >= 'A' && c <= 'F')
- return c - 'A' + 10;
- return ~0;
+ return 1;
}
static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
static void convert_to_utf8(char *line, char *charset)
{
-#ifndef NO_ICONV
- char *in, *out;
- size_t insize, outsize, nrc;
- char outbuf[4096]; /* cheat */
static char latin_one[] = "latin1";
char *input_charset = *charset ? charset : latin_one;
- iconv_t conv = iconv_open(metainfo_charset, input_charset);
-
- if (conv == (iconv_t) -1) {
- static int warned_latin1_once = 0;
- if (input_charset != latin_one) {
- fprintf(stderr, "cannot convert from %s to %s\n",
- input_charset, metainfo_charset);
- *charset = 0;
- }
- else if (!warned_latin1_once) {
- warned_latin1_once = 1;
- fprintf(stderr, "tried to convert from %s to %s, "
- "but your iconv does not work with it.\n",
- input_charset, metainfo_charset);
- }
- return;
- }
- in = line;
- insize = strlen(in);
- out = outbuf;
- outsize = sizeof(outbuf);
- nrc = iconv(conv, &in, &insize, &out, &outsize);
- iconv_close(conv);
- if (nrc == (size_t) -1)
- return;
- *out = 0;
- strcpy(line, outbuf);
-#endif
+ char *out = reencode_string(line, metainfo_charset, input_charset);
+
+ if (!out)
+ die("cannot convert from %s to %s\n",
+ input_charset, metainfo_charset);
+ strcpy(line, out);
+ free(out);
}
-static void decode_header_bq(char *it)
+static int decode_header_bq(char *it)
{
char *in, *out, *ep, *cp, *sp;
char outbuf[1000];
+ int rfc2047 = 0;
in = it;
out = outbuf;
while ((ep = strstr(in, "=?")) != NULL) {
int sz, encoding;
char charset_q[256], piecebuf[256];
+ rfc2047 = 1;
+
if (in != ep) {
sz = ep - in;
memcpy(out, in, sz);
ep += 2;
cp = strchr(ep, '?');
if (!cp)
- return; /* no munging */
+ return rfc2047; /* no munging */
for (sp = ep; sp < cp; sp++)
charset_q[sp - ep] = tolower(*sp);
charset_q[cp - ep] = 0;
encoding = cp[1];
if (!encoding || cp[2] != '?')
- return; /* no munging */
+ return rfc2047; /* no munging */
ep = strstr(cp + 3, "?=");
if (!ep)
- return; /* no munging */
+ return rfc2047; /* no munging */
switch (tolower(encoding)) {
default:
- return; /* no munging */
+ return rfc2047; /* no munging */
case 'b':
sz = decode_b_segment(cp + 3, piecebuf, ep);
break;
break;
}
if (sz < 0)
- return;
+ return rfc2047;
if (metainfo_charset)
convert_to_utf8(piecebuf, charset_q);
strcpy(out, piecebuf);
}
strcpy(out, in);
strcpy(it, outbuf);
+ return rfc2047;
+}
+
+static void decode_header(char *it)
+{
+
+ if (decode_header_bq(it))
+ return;
+ /* otherwise "it" is a straight copy of the input.
+ * This can be binary guck but there is no charset specified.
+ */
+ if (metainfo_charset)
+ convert_to_utf8(it, "");
}
static void decode_transfer_encoding(char *line)
static const char mailinfo_usage[] =
"git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info";
-int cmd_mailinfo(int argc, const char **argv, char **envp)
+int cmd_mailinfo(int argc, const char **argv, const char *prefix)
{
+ const char *def_charset;
+
/* NEEDSWORK: might want to do the optional .git/ directory
* discovery
*/
git_config(git_default_config);
+ def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
+ metainfo_charset = def_charset;
+
while (1 < argc && argv[1][0] == '-') {
if (!strcmp(argv[1], "-k"))
keep_subject = 1;
else if (!strcmp(argv[1], "-u"))
- metainfo_charset = git_commit_encoding;
+ metainfo_charset = def_charset;
+ else if (!strcmp(argv[1], "-n"))
+ metainfo_charset = NULL;
else if (!strncmp(argv[1], "--encoding=", 11))
metainfo_charset = argv[1] + 11;
else