am/mailinfo: Disable scissors processing by default
[gitweb.git] / builtin-mailinfo.c
index 1eeeb4de6d0d54e3fd753b7f057351094e10a24e..1c14d381d23074d1c6b2a2180c25629974e3b5c4 100644 (file)
@@ -25,6 +25,7 @@ static enum  {
 static struct strbuf charset = STRBUF_INIT;
 static int patch_lines;
 static struct strbuf **p_hdr_data, **s_hdr_data;
+static int use_scissors;
 
 #define MAX_HDR_PARSED 10
 #define MAX_BOUNDARIES 5
@@ -193,8 +194,7 @@ static void handle_content_type(struct strbuf *line)
                *content_top = boundary;
                boundary = NULL;
        }
-       if (slurp_attr(line->buf, "charset=", &charset))
-               strbuf_tolower(&charset);
+       slurp_attr(line->buf, "charset=", &charset);
 
        if (boundary) {
                strbuf_release(boundary);
@@ -481,7 +481,7 @@ static const char *guess_charset(const struct strbuf *line, const char *target_c
                if (is_utf8(line->buf))
                        return NULL;
        }
-       return "latin1";
+       return "ISO8859-1";
 }
 
 static void convert_to_utf8(struct strbuf *line, const char *charset)
@@ -494,7 +494,7 @@ static void convert_to_utf8(struct strbuf *line, const char *charset)
                        return;
        }
 
-       if (!strcmp(metainfo_charset, charset))
+       if (!strcasecmp(metainfo_charset, charset))
                return;
        out = reencode_string(line->buf, metainfo_charset, charset);
        if (!out)
@@ -550,7 +550,6 @@ static int decode_header_bq(struct strbuf *it)
                if (cp + 3 - it->buf > it->len)
                        goto decode_header_bq_out;
                strbuf_add(&charset_q, ep, cp - ep);
-               strbuf_tolower(&charset_q);
 
                encoding = cp[1];
                if (!encoding || cp[2] != '?')
@@ -714,6 +713,56 @@ static inline int patchbreak(const struct strbuf *line)
        return 0;
 }
 
+static int is_scissors_line(const struct strbuf *line)
+{
+       size_t i, len = line->len;
+       int scissors = 0, gap = 0;
+       int first_nonblank = -1;
+       int last_nonblank = 0, visible, perforation = 0, in_perforation = 0;
+       const char *buf = line->buf;
+
+       for (i = 0; i < len; i++) {
+               if (isspace(buf[i])) {
+                       if (in_perforation) {
+                               perforation++;
+                               gap++;
+                       }
+                       continue;
+               }
+               last_nonblank = i;
+               if (first_nonblank < 0)
+                       first_nonblank = i;
+               if (buf[i] == '-') {
+                       in_perforation = 1;
+                       perforation++;
+                       continue;
+               }
+               if (i + 1 < len &&
+                   (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2))) {
+                       in_perforation = 1;
+                       perforation += 2;
+                       scissors += 2;
+                       i++;
+                       continue;
+               }
+               in_perforation = 0;
+       }
+
+       /*
+        * The mark must be at least 8 bytes long (e.g. "-- >8 --").
+        * Even though there can be arbitrary cruft on the same line
+        * (e.g. "cut here"), in order to avoid misidentification, the
+        * perforation must occupy more than a third of the visible
+        * width of the line, and dashes and scissors must occupy more
+        * than half of the perforation.
+        */
+
+       visible = last_nonblank - first_nonblank + 1;
+       return (scissors && 8 <= visible &&
+               visible < perforation * 3 &&
+               gap * 2 < perforation);
+}
+
 static int handle_commit_msg(struct strbuf *line)
 {
        static int still_looking = 1;
@@ -725,7 +774,8 @@ static int handle_commit_msg(struct strbuf *line)
                strbuf_ltrim(line);
                if (!line->len)
                        return 0;
-               if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
+               still_looking = check_header(line, s_hdr_data, 0);
+               if (still_looking)
                        return 0;
        }
 
@@ -733,6 +783,24 @@ static int handle_commit_msg(struct strbuf *line)
        if (metainfo_charset)
                convert_to_utf8(line, charset.buf);
 
+       if (use_scissors && is_scissors_line(line)) {
+               int i;
+               rewind(cmitmsg);
+               ftruncate(fileno(cmitmsg), 0);
+               still_looking = 1;
+
+               /*
+                * We may have already read "secondary headers"; purge
+                * them to give ourselves a clean restart.
+                */
+               for (i = 0; header[i]; i++) {
+                       if (s_hdr_data[i])
+                               strbuf_release(s_hdr_data[i]);
+                       s_hdr_data[i] = NULL;
+               }
+               return 0;
+       }
+
        if (patchbreak(line)) {
                fclose(cmitmsg);
                cmitmsg = NULL;
@@ -767,7 +835,6 @@ static void handle_filter(struct strbuf *line)
 
 static void handle_body(void)
 {
-       int len = 0;
        struct strbuf prev = STRBUF_INIT;
 
        /* Skip up to the first boundary */
@@ -777,8 +844,6 @@ static void handle_body(void)
        }
 
        do {
-               strbuf_setlen(&line, line.len + len);
-
                /* process any boundary lines */
                if (*content_top && is_multipart_boundary(&line)) {
                        /* flush any leftover */
@@ -834,10 +899,7 @@ static void handle_body(void)
                        handle_filter(&line);
                }
 
-               strbuf_reset(&line);
-               if (strbuf_avail(&line) < 100)
-                       strbuf_grow(&line, 100);
-       } while ((len = read_line_with_nul(line.buf, strbuf_avail(&line), fin)));
+       } while (!strbuf_getwholeline(&line, fin, '\n'));
 
 handle_body_out:
        strbuf_release(&prev);
@@ -893,12 +955,9 @@ static void handle_info(void)
        fprintf(fout, "\n");
 }
 
-static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
-                   const char *msg, const char *patch)
+static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch)
 {
        int peek;
-       keep_subject = ks;
-       metainfo_charset = encoding;
        fin = in;
        fout = out;
 
@@ -944,7 +1003,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
         */
        git_config(git_default_config, NULL);
 
-       def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
+       def_charset = (git_commit_encoding ? git_commit_encoding : "UTF-8");
        metainfo_charset = def_charset;
 
        while (1 < argc && argv[1][0] == '-') {
@@ -956,6 +1015,10 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
                        metainfo_charset = NULL;
                else if (!prefixcmp(argv[1], "--encoding="))
                        metainfo_charset = argv[1] + 11;
+               else if (!strcmp(argv[1], "--scissors"))
+                       use_scissors = 1;
+               else if (!strcmp(argv[1], "--no-scissors"))
+                       use_scissors = 0;
                else
                        usage(mailinfo_usage);
                argc--; argv++;
@@ -964,5 +1027,5 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
        if (argc != 3)
                usage(mailinfo_usage);
 
-       return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]);
+       return !!mailinfo(stdin, stdout, argv[1], argv[2]);
 }