Consistent message encoding while reusing log from an existing commit.
[gitweb.git] / commit.c
index d5103cd3c6358bea2acc8040dfb6eb3afd273420..9b2b842e7dcc153a12b35394a2e3f88f146b6225 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -2,6 +2,7 @@
 #include "tag.h"
 #include "commit.h"
 #include "pkt-line.h"
+#include "utf8.h"
 
 int save_commit_buffer = 1;
 
@@ -248,8 +249,10 @@ int write_shallow_commits(int fd, int use_pack_protocol)
                        if (use_pack_protocol)
                                packet_write(fd, "shallow %s", hex);
                        else {
-                               write(fd, hex,  40);
-                               write(fd, "\n", 1);
+                               if (write_in_full(fd, hex,  40) != 40)
+                                       break;
+                               if (write_in_full(fd, "\n", 1) != 1)
+                                       break;
                        }
                }
        return count;
@@ -461,20 +464,29 @@ static int get_one_line(const char *msg, unsigned long len)
        return ret;
 }
 
+/* High bit set, or ISO-2022-INT */
+static int non_ascii(int ch)
+{
+       ch = (ch & 0xff);
+       return ((ch & 0x80) || (ch == 0x1b));
+}
+
 static int is_rfc2047_special(char ch)
 {
-       return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_'));
+       return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
 }
 
-static int add_rfc2047(char *buf, const char *line, int len)
+static int add_rfc2047(char *buf, const char *line, int len,
+                      const char *encoding)
 {
        char *bp = buf;
        int i, needquote;
-       static const char q_utf8[] = "=?utf-8?q?";
+       char q_encoding[128];
+       const char *q_encoding_fmt = "=?%s?q?";
 
        for (i = needquote = 0; !needquote && i < len; i++) {
-               unsigned ch = line[i];
-               if (ch & 0x80)
+               int ch = line[i];
+               if (non_ascii(ch))
                        needquote++;
                if ((i + 1 < len) &&
                    (ch == '=' && line[i+1] == '?'))
@@ -483,8 +495,11 @@ static int add_rfc2047(char *buf, const char *line, int len)
        if (!needquote)
                return sprintf(buf, "%.*s", len, line);
 
-       memcpy(bp, q_utf8, sizeof(q_utf8)-1);
-       bp += sizeof(q_utf8)-1;
+       i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding);
+       if (sizeof(q_encoding) < i)
+               die("Insanely long encoding name %s", encoding);
+       memcpy(bp, q_encoding, i);
+       bp += i;
        for (i = 0; i < len; i++) {
                unsigned ch = line[i] & 0xFF;
                if (is_rfc2047_special(ch)) {
@@ -502,7 +517,8 @@ static int add_rfc2047(char *buf, const char *line, int len)
 }
 
 static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
-                        const char *line, int relative_date)
+                        const char *line, int relative_date,
+                        const char *encoding)
 {
        char *date;
        int namelen;
@@ -530,7 +546,8 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
                filler = "";
                strcpy(buf, "From: ");
                ret = strlen(buf);
-               ret += add_rfc2047(buf + ret, line, display_name_length);
+               ret += add_rfc2047(buf + ret, line, display_name_length,
+                                  encoding);
                memcpy(buf + ret, name_tail, namelen - display_name_length);
                ret += namelen - display_name_length;
                buf[ret++] = '\n';
@@ -597,17 +614,126 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
        return offset;
 }
 
-unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
-                                 unsigned long len, char *buf, unsigned long space,
+static char *get_header(const struct commit *commit, const char *key)
+{
+       int key_len = strlen(key);
+       const char *line = commit->buffer;
+
+       for (;;) {
+               const char *eol = strchr(line, '\n'), *next;
+
+               if (line == eol)
+                       return NULL;
+               if (!eol) {
+                       eol = line + strlen(line);
+                       next = NULL;
+               } else
+                       next = eol + 1;
+               if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
+                       int len = eol - line - key_len;
+                       char *ret = xmalloc(len);
+                       memcpy(ret, line + key_len + 1, len - 1);
+                       ret[len - 1] = '\0';
+                       return ret;
+               }
+               line = next;
+       }
+}
+
+static char *replace_encoding_header(char *buf, char *encoding)
+{
+       char *encoding_header = strstr(buf, "\nencoding ");
+       char *end_of_encoding_header;
+       int encoding_header_pos;
+       int encoding_header_len;
+       int new_len;
+       int need_len;
+       int buflen = strlen(buf) + 1;
+
+       if (!encoding_header)
+               return buf; /* should not happen but be defensive */
+       encoding_header++;
+       end_of_encoding_header = strchr(encoding_header, '\n');
+       if (!end_of_encoding_header)
+               return buf; /* should not happen but be defensive */
+       end_of_encoding_header++;
+
+       encoding_header_len = end_of_encoding_header - encoding_header;
+       encoding_header_pos = encoding_header - buf;
+
+       if (is_encoding_utf8(encoding)) {
+               /* we have re-coded to UTF-8; drop the header */
+               memmove(encoding_header, end_of_encoding_header,
+                       buflen - (encoding_header_pos + encoding_header_len));
+               return buf;
+       }
+       new_len = strlen(encoding);
+       need_len = new_len + strlen("encoding \n");
+       if (encoding_header_len < need_len) {
+               buf = xrealloc(buf, buflen + (need_len - encoding_header_len));
+               encoding_header = buf + encoding_header_pos;
+               end_of_encoding_header = encoding_header + encoding_header_len;
+       }
+       memmove(end_of_encoding_header + (need_len - encoding_header_len),
+               end_of_encoding_header,
+               buflen - (encoding_header_pos + encoding_header_len));
+       memcpy(encoding_header + 9, encoding, strlen(encoding));
+       encoding_header[9 + new_len] = '\n';
+       return buf;
+}
+
+static char *logmsg_reencode(const struct commit *commit,
+                            char *output_encoding)
+{
+       char *encoding;
+       char *out;
+       char *utf8 = "utf-8";
+
+       if (!*output_encoding)
+               return NULL;
+       encoding = get_header(commit, "encoding");
+       if (!encoding)
+               encoding = utf8;
+       if (!strcmp(encoding, output_encoding))
+               out = strdup(commit->buffer);
+       else
+               out = reencode_string(commit->buffer,
+                                     output_encoding, encoding);
+       if (out)
+               out = replace_encoding_header(out, output_encoding);
+
+       if (encoding != utf8)
+               free(encoding);
+       if (!out)
+               return NULL;
+       return out;
+}
+
+unsigned long pretty_print_commit(enum cmit_fmt fmt,
+                                 const struct commit *commit,
+                                 unsigned long len,
+                                 char *buf, unsigned long space,
                                  int abbrev, const char *subject,
-                                 const char *after_subject, int relative_date)
+                                 const char *after_subject,
+                                 int relative_date)
 {
-       int hdr = 1, body = 0;
+       int hdr = 1, body = 0, seen_title = 0;
        unsigned long offset = 0;
        int indent = 4;
        int parents_shown = 0;
        const char *msg = commit->buffer;
        int plain_non_ascii = 0;
+       char *reencoded;
+       char *encoding;
+
+       encoding = (git_log_output_encoding
+                   ? git_log_output_encoding
+                   : git_commit_encoding);
+       if (!encoding)
+               encoding = "utf-8";
+       reencoded = logmsg_reencode(commit, encoding);
+       if (reencoded)
+               msg = reencoded;
 
        if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
                indent = 0;
@@ -624,14 +750,14 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
                for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
                        if (!in_body) {
                                /* author could be non 7-bit ASCII but
-                                * the log may so; skip over the
+                                * the log may be so; skip over the
                                 * header part first.
                                 */
                                if (ch == '\n' &&
                                    i + 1 < len && msg[i+1] == '\n')
                                        in_body = 1;
                        }
-                       else if (ch & 0x80) {
+                       else if (non_ascii(ch)) {
                                plain_non_ascii = 1;
                                break;
                        }
@@ -690,13 +816,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
                                offset += add_user_info("Author", fmt,
                                                        buf + offset,
                                                        line + 7,
-                                                       relative_date);
+                                                       relative_date,
+                                                       encoding);
                        if (!memcmp(line, "committer ", 10) &&
                            (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER))
                                offset += add_user_info("Commit", fmt,
                                                        buf + offset,
                                                        line + 10,
-                                                       relative_date);
+                                                       relative_date,
+                                                       encoding);
                        continue;
                }
 
@@ -704,6 +832,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
                        body = 1;
 
                if (is_empty_line(line, &linelen)) {
+                       if (!seen_title)
+                               continue;
                        if (!body)
                                continue;
                        if (subject)
@@ -712,11 +842,13 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
                                break;
                }
 
+               seen_title = 1;
                if (subject) {
                        int slen = strlen(subject);
                        memcpy(buf + offset, subject, slen);
                        offset += slen;
-                       offset += add_rfc2047(buf + offset, line, linelen);
+                       offset += add_rfc2047(buf + offset, line, linelen,
+                                             encoding);
                }
                else {
                        memset(buf + offset, ' ', indent);
@@ -727,11 +859,17 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
                if (fmt == CMIT_FMT_ONELINE)
                        break;
                if (subject && plain_non_ascii) {
-                       static const char header[] =
-                               "Content-Type: text/plain; charset=UTF-8\n"
+                       int sz;
+                       char header[512];
+                       const char *header_fmt =
+                               "Content-Type: text/plain; charset=%s\n"
                                "Content-Transfer-Encoding: 8bit\n";
-                       memcpy(buf + offset, header, sizeof(header)-1);
-                       offset += sizeof(header)-1;
+                       sz = snprintf(header, sizeof(header), header_fmt,
+                                     encoding);
+                       if (sizeof(header) < sz)
+                               die("Encoding name %s too long", encoding);
+                       memcpy(buf + offset, header, sz);
+                       offset += sz;
                }
                if (after_subject) {
                        int slen = strlen(after_subject);
@@ -755,6 +893,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
        if (fmt == CMIT_FMT_EMAIL && !body)
                buf[offset++] = '\n';
        buf[offset] = '\0';
+
+       free(reencoded);
        return offset;
 }
 
@@ -900,13 +1040,15 @@ void sort_in_topological_order_fn(struct commit_list ** list, int lifo,
        free(nodes);
 }
 
-/* merge-rebase stuff */
+/* merge-base stuff */
+
+/* bits #0..15 in revision.h */
+#define PARENT1                (1u<<16)
+#define PARENT2                (1u<<17)
+#define STALE          (1u<<18)
+#define RESULT         (1u<<19)
 
-/* bits #0..7 in revision.h */
-#define PARENT1                (1u<< 8)
-#define PARENT2                (1u<< 9)
-#define STALE          (1u<<10)
-#define RESULT         (1u<<11)
+static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
 
 static struct commit *interesting(struct commit_list *list)
 {
@@ -972,6 +1114,7 @@ static struct commit_list *merge_bases(struct commit *one, struct commit *two)
        }
 
        /* Clean up the result to remove stale ones */
+       free_commit_list(list);
        list = result; result = NULL;
        while (list) {
                struct commit_list *n = list->next;
@@ -987,7 +1130,6 @@ struct commit_list *get_merge_bases(struct commit *one,
                                    struct commit *two,
                                     int cleanup)
 {
-       const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
        struct commit_list *list;
        struct commit **rslt;
        struct commit_list *result;
@@ -1043,3 +1185,20 @@ struct commit_list *get_merge_bases(struct commit *one,
        free(rslt);
        return result;
 }
+
+int in_merge_bases(struct commit *rev1, struct commit *rev2)
+{
+       struct commit_list *bases, *b;
+       int ret = 0;
+
+       bases = get_merge_bases(rev1, rev2, 1);
+       for (b = bases; b; b = b->next) {
+               if (!hashcmp(rev1->object.sha1, b->item->object.sha1)) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       free_commit_list(bases);
+       return ret;
+}