fast-import.c: fix regression due to strbuf conversion

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c

index d94578cb4ac0649913db1542f876d5010ece7f0f..fb12248f825807b085f4e5ed761002c30925ead5 100644 (file)
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -237,8 +237,6 @@ static int eatspace(char *line)
  
  static char *cleanup_subject(char *subject)
  {
-       if (keep_subject)
-               return subject;
         for (;;) {
                 char *p;
                 int len, remove;
@@ -289,25 +287,25 @@ static void cleanup_space(char *buf)
         }
  }
  
-static void decode_header(char *it);
-static char *header[MAX_HDR_PARSED] = {
+static void decode_header(char *it, unsigned itsize);
+static const char *header[MAX_HDR_PARSED] = {
         "From","Subject","Date",
  };
  
-static int check_header(char *line, char **hdr_data)
+static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
  {
         int i;
  
         /* search for the interesting parts */
         for (i = 0; header[i]; i++) {
                 int len = strlen(header[i]);
-               if (!hdr_data[i] &&
+               if ((!hdr_data[i] || overwrite) &&
                     !strncasecmp(line, header[i], len) &&
                     line[len] == ':' && isspace(line[len + 1])) {
                         /* Unwrap inline B and Q encoding, and optionally
                          * normalize the meta information to utf8.
                          */
-                       decode_header(line + len + 2);
+                       decode_header(line + len + 2, linesize - len - 2);
                         hdr_data[i] = xmalloc(1000 * sizeof(char));
                         if (! handle_header(line, hdr_data[i], len + 2)) {
                                 return 1;
@@ -318,14 +316,14 @@ static int check_header(char *line, char **hdr_data)
         /* Content stuff */
         if (!strncasecmp(line, "Content-Type", 12) &&
                 line[12] == ':' && isspace(line[12 + 1])) {
-               decode_header(line + 12 + 2);
+               decode_header(line + 12 + 2, linesize - 12 - 2);
                 if (! handle_content_type(line)) {
                         return 1;
                 }
         }
         if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
                 line[25] == ':' && isspace(line[25 + 1])) {
-               decode_header(line + 25 + 2);
+               decode_header(line + 25 + 2, linesize - 25 - 2);
                 if (! handle_content_transfer_encoding(line)) {
                         return 1;
                 }
@@ -425,6 +423,7 @@ static int read_one_header_line(char *line, int sz, FILE *in)
                         if (addlen >= sz - len)
                                 addlen = sz - len - 1;
                         memcpy(line + len, continuation, addlen);
+                       line[len] = '\n';
                         len += addlen;
                 }
         }
@@ -433,10 +432,15 @@ static int read_one_header_line(char *line, int sz, FILE *in)
         return 1;
  }
  
-static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
+static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
  {
+       char *otend = ot + otsize;
         int c;
         while ((c = *in++) != 0 && (in <= ep)) {
+               if (ot == otend) {
+                       *--ot = '\0';
+                       return -1;
+               }
                 if (c == '=') {
                         int d = *in++;
                         if (d == '\n' || !d)
@@ -452,12 +456,17 @@ static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
         return 0;
  }
  
-static int decode_b_segment(char *in, char *ot, char *ep)
+static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
  {
         /* Decode in..ep, possibly in-place to ot */
         int c, pos = 0, acc = 0;
+       char *otend = ot + otsize;
  
         while ((c = *in++) != 0 && (in <= ep)) {
+               if (ot == otend) {
+                       *--ot = '\0';
+                       return -1;
+               }
                 if (c == '+')
                         c = 62;
                 else if (c == '/')
@@ -499,20 +508,47 @@ static int decode_b_segment(char *in, char *ot, char *ep)
         return 0;
  }
  
-static void convert_to_utf8(char *line, const char *charset)
+/*
+ * When there is no known charset, guess.
+ *
+ * Right now we assume that if the target is UTF-8 (the default),
+ * and it already looks like UTF-8 (which includes US-ASCII as its
+ * subset, of course) then that is what it is and there is nothing
+ * to do.
+ *
+ * Otherwise, we default to assuming it is Latin1 for historical
+ * reasons.
+ */
+static const char *guess_charset(const char *line, const char *target_charset)
+{
+       if (is_encoding_utf8(target_charset)) {
+               if (is_utf8(line))
+                       return NULL;
+       }
+       return "latin1";
+}
+
+static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
  {
-       static const char latin_one[] = "latin1";
-       const char *input_charset = *charset ? charset : latin_one;
-       char *out = reencode_string(line, metainfo_charset, input_charset);
+       char *out;
  
+       if (!charset || !*charset) {
+               charset = guess_charset(line, metainfo_charset);
+               if (!charset)
+                       return;
+       }
+
+       if (!strcmp(metainfo_charset, charset))
+               return;
+       out = reencode_string(line, metainfo_charset, charset);
         if (!out)
                 die("cannot convert from %s to %s\n",
-                   input_charset, metainfo_charset);
-       strcpy(line, out);
+                   charset, metainfo_charset);
+       strlcpy(line, out, linesize);
         free(out);
  }
  
-static int decode_header_bq(char *it)
+static int decode_header_bq(char *it, unsigned itsize)
  {
         char *in, *out, *ep, *cp, *sp;
         char outbuf[1000];
@@ -552,56 +588,60 @@ static int decode_header_bq(char *it)
                 default:
                         return rfc2047; /* no munging */
                 case 'b':
-                       sz = decode_b_segment(cp + 3, piecebuf, ep);
+                       sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
                         break;
                 case 'q':
-                       sz = decode_q_segment(cp + 3, piecebuf, ep, 1);
+                       sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
                         break;
                 }
                 if (sz < 0)
                         return rfc2047;
                 if (metainfo_charset)
-                       convert_to_utf8(piecebuf, charset_q);
+                       convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
+
+               sz = strlen(piecebuf);
+               if (outbuf + sizeof(outbuf) <= out + sz)
+                       return rfc2047; /* no munging */
                 strcpy(out, piecebuf);
-               out += strlen(out);
+               out += sz;
                 in = ep + 2;
         }
         strcpy(out, in);
-       strcpy(it, outbuf);
+       strlcpy(it, outbuf, itsize);
         return rfc2047;
  }
  
-static void decode_header(char *it)
+static void decode_header(char *it, unsigned itsize)
  {
  
-       if (decode_header_bq(it))
+       if (decode_header_bq(it, itsize))
                 return;
         /* otherwise "it" is a straight copy of the input.
          * This can be binary guck but there is no charset specified.
          */
         if (metainfo_charset)
-               convert_to_utf8(it, "");
+               convert_to_utf8(it, itsize, "");
  }
  
-static void decode_transfer_encoding(char *line)
+static void decode_transfer_encoding(char *line, unsigned linesize)
  {
         char *ep;
  
         switch (transfer_encoding) {
         case TE_QP:
                 ep = line + strlen(line);
-               decode_q_segment(line, line, ep, 0);
+               decode_q_segment(line, line, linesize, ep, 0);
                 break;
         case TE_BASE64:
                 ep = line + strlen(line);
-               decode_b_segment(line, line, ep);
+               decode_b_segment(line, line, linesize, ep);
                 break;
         case TE_DONTCARE:
                 break;
         }
  }
  
-static int handle_filter(char *line);
+static int handle_filter(char *line, unsigned linesize);
  
  static int find_boundary(void)
  {
@@ -614,6 +654,7 @@ static int find_boundary(void)
  
  static int handle_boundary(void)
  {
+       char newline[]="\n";
  again:
         if (!memcmp(line+content_top->boundary_len, "--", 2)) {
                 /* we hit an end boundary */
@@ -628,7 +669,7 @@ static int handle_boundary(void)
                                         "can't recover\n");
                         exit(1);
                 }
-               handle_filter("\n");
+               handle_filter(newline, sizeof(newline));
  
                 /* skip to the next boundary */
                 if (!find_boundary())
@@ -643,7 +684,7 @@ static int handle_boundary(void)
  
         /* slurp in this section's info */
         while (read_one_header_line(line, sizeof(line), fin))
-               check_header(line, p_hdr_data);
+               check_header(line, sizeof(line), p_hdr_data, 0);
  
         /* eat the blank line after section info */
         return (fgets(line, sizeof(line), fin) != NULL);
@@ -682,9 +723,10 @@ static inline int patchbreak(const char *line)
  }
  
  
-static int handle_commit_msg(char *line)
+static int handle_commit_msg(char *line, unsigned linesize)
  {
         static int still_looking = 1;
+       char *endline = line + linesize;
  
         if (!cmitmsg)
                 return 0;
@@ -699,10 +741,14 @@ static int handle_commit_msg(char *line)
                         if (!*cp)
                                 return 0;
                 }
-               if ((still_looking = check_header(cp, s_hdr_data)) != 0)
+               if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
                         return 0;
         }
  
+       /* normalize the log message to UTF-8. */
+       if (metainfo_charset)
+               convert_to_utf8(line, endline - line, charset);
+
         if (patchbreak(line)) {
                 fclose(cmitmsg);
                 cmitmsg = NULL;
@@ -720,7 +766,7 @@ static int handle_patch(char *line)
         return 0;
  }
  
-static int handle_filter(char *line)
+static int handle_filter(char *line, unsigned linesize)
  {
         static int filter = 0;
  
@@ -729,7 +775,7 @@ static int handle_filter(char *line)
          */
         switch (filter) {
         case 0:
-               if (!handle_commit_msg(line))
+               if (!handle_commit_msg(line, linesize))
                         break;
                 filter++;
         case 1:
@@ -761,18 +807,14 @@ static void handle_body(void)
                         /* flush any leftover */
                         if ((transfer_encoding == TE_BASE64)  &&
                             (np != newline)) {
-                               handle_filter(newline);
+                               handle_filter(newline, sizeof(newline));
                         }
                         if (!handle_boundary())
                                 return;
                 }
  
-               /* Unwrap transfer encoding and optionally
-                * normalize the log message to UTF-8.
-                */
-               decode_transfer_encoding(line);
-               if (metainfo_charset)
-                       convert_to_utf8(line, charset);
+               /* Unwrap transfer encoding */
+               decode_transfer_encoding(line, sizeof(line));
  
                 switch (transfer_encoding) {
                 case TE_BASE64:
@@ -781,7 +823,7 @@ static void handle_body(void)
  
                         /* binary data most likely doesn't have newlines */
                         if (message_type != TYPE_TEXT) {
-                               rc = handle_filter(line);
+                               rc = handle_filter(line, sizeof(newline));
                                 break;
                         }
  
@@ -798,7 +840,7 @@ static void handle_body(void)
                                         /* should be sitting on a new line */
                                         *(++np) = 0;
                                         op++;
-                                       rc = handle_filter(newline);
+                                       rc = handle_filter(newline, sizeof(newline));
                                         np = newline;
                                 }
                         } while (*op != 0);
@@ -808,7 +850,7 @@ static void handle_body(void)
                         break;
                 }
                 default:
-                       rc = handle_filter(line);
+                       rc = handle_filter(line, sizeof(newline));
                 }
                 if (rc)
                         /* nothing left to filter */
@@ -818,6 +860,22 @@ static void handle_body(void)
         return;
  }
  
+static void output_header_lines(FILE *fout, const char *hdr, char *data)
+{
+       while (1) {
+               char *ep = strchr(data, '\n');
+               int len;
+               if (!ep)
+                       len = strlen(data);
+               else
+                       len = ep - data;
+               fprintf(fout, "%s: %.*s\n", hdr, len, data);
+               if (!ep)
+                       break;
+               data = ep + 1;
+       }
+}
+
  static void handle_info(void)
  {
         char *sub;
@@ -835,9 +893,13 @@ static void handle_info(void)
                         continue;
  
                 if (!memcmp(header[i], "Subject", 7)) {
-                       sub = cleanup_subject(hdr);
-                       cleanup_space(sub);
-                       fprintf(fout, "Subject: %s\n", sub);
+                       if (keep_subject)
+                               sub = hdr;
+                       else {
+                               sub = cleanup_subject(hdr);
+                               cleanup_space(sub);
+                       }
+                       output_header_lines(fout, "Subject", sub);
                 } else if (!memcmp(header[i], "From", 4)) {
                         handle_from(hdr);
                         fprintf(fout, "Author: %s\n", name);
@@ -850,8 +912,8 @@ static void handle_info(void)
         fprintf(fout, "\n");
  }
  
-int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
-            const char *msg, const char *patch)
+static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
+                   const char *msg, const char *patch)
  {
         keep_subject = ks;
         metainfo_charset = encoding;
@@ -875,7 +937,7 @@ int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
  
         /* process the email header */
         while (read_one_header_line(line, sizeof(line), fin))
-               check_header(line, p_hdr_data);
+               check_header(line, sizeof(line), p_hdr_data, 1);
  
         handle_body();
         handle_info();