treat any file with NUL as binary
[gitweb.git] / pretty.c
index 9fbd73f748c6cd250b5e9534168072a1cea88a85..b987ff245b310a6693dc69ba8c71ef2915da7864 100644 (file)
--- a/pretty.c
+++ b/pretty.c
@@ -292,7 +292,18 @@ static void format_person_part(struct strbuf *sb, char part,
        /* parse name */
        for (end = 0; end < len && msg[end] != '<'; end++)
                ; /* do nothing */
+       /*
+        * If it does not even have a '<' and '>', that is
+        * quite a bogus commit author and we discard it;
+        * this is in line with add_user_info() that is used
+        * in the normal codepath.  When end points at the '<'
+        * that we found, it should have matching '>' later,
+        * which means start (beginning of email address) must
+        * be strictly below len.
+        */
        start = end + 1;
+       if (start >= len - 1)
+               return;
        while (end > 0 && isspace(msg[end - 1]))
                end--;
        if (part == 'n') {      /* name */
@@ -300,11 +311,8 @@ static void format_person_part(struct strbuf *sb, char part,
                return;
        }
 
-       if (start >= len)
-               return;
-
        /* parse email */
-       for (end = start + 1; end < len && msg[end] != '>'; end++)
+       for (end = start; end < len && msg[end] != '>'; end++)
                ; /* do nothing */
 
        if (end >= len)
@@ -354,14 +362,91 @@ static void format_person_part(struct strbuf *sb, char part,
        }
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+struct chunk {
+       size_t off;
+       size_t len;
+};
+
+struct format_commit_context {
+       const struct commit *commit;
+
+       /* These offsets are relative to the start of the commit message. */
+       int commit_header_parsed;
+       struct chunk subject;
+       struct chunk author;
+       struct chunk committer;
+       struct chunk encoding;
+       size_t body_off;
+
+       /* The following ones are relative to the result struct strbuf. */
+       struct chunk abbrev_commit_hash;
+       struct chunk abbrev_tree_hash;
+       struct chunk abbrev_parent_hashes;
+};
+
+static int add_again(struct strbuf *sb, struct chunk *chunk)
 {
-       const struct commit *commit = context;
-       struct commit_list *p;
+       if (chunk->len) {
+               strbuf_adddup(sb, chunk->off, chunk->len);
+               return 1;
+       }
+
+       /*
+        * We haven't seen this chunk before.  Our caller is surely
+        * going to add it the hard way now.  Remember the most likely
+        * start of the to-be-added chunk: the current end of the
+        * struct strbuf.
+        */
+       chunk->off = sb->len;
+       return 0;
+}
+
+static void parse_commit_header(struct format_commit_context *context)
+{
+       const char *msg = context->commit->buffer;
        int i;
        enum { HEADER, SUBJECT, BODY } state;
+
+       for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
+               int eol;
+               for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
+                       ; /* do nothing */
+
+               if (state == SUBJECT) {
+                       context->subject.off = i;
+                       context->subject.len = eol - i;
+                       i = eol;
+               }
+               if (i == eol) {
+                       state++;
+                       /* strip empty lines */
+                       while (msg[eol] == '\n' && msg[eol + 1] == '\n')
+                               eol++;
+               } else if (!prefixcmp(msg + i, "author ")) {
+                       context->author.off = i + 7;
+                       context->author.len = eol - i - 7;
+               } else if (!prefixcmp(msg + i, "committer ")) {
+                       context->committer.off = i + 10;
+                       context->committer.len = eol - i - 10;
+               } else if (!prefixcmp(msg + i, "encoding ")) {
+                       context->encoding.off = i + 9;
+                       context->encoding.len = eol - i - 9;
+               }
+               i = eol;
+               if (!msg[i])
+                       break;
+       }
+       context->body_off = i;
+       context->commit_header_parsed = 1;
+}
+
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
+{
+       struct format_commit_context *c = context;
+       const struct commit *commit = c->commit;
        const char *msg = commit->buffer;
+       struct commit_list *p;
 
        /* these are independent of the commit */
        switch (placeholder[0]) {
@@ -394,15 +479,21 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
                strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
                return;
        case 'h':               /* abbreviated commit hash */
+               if (add_again(sb, &c->abbrev_commit_hash))
+                       return;
                strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
                                                     DEFAULT_ABBREV));
+               c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
                return;
        case 'T':               /* tree hash */
                strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
                return;
        case 't':               /* abbreviated tree hash */
+               if (add_again(sb, &c->abbrev_tree_hash))
+                       return;
                strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
                                                     DEFAULT_ABBREV));
+               c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
                return;
        case 'P':               /* parent hashes */
                for (p = commit->parents; p; p = p->next) {
@@ -412,12 +503,16 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
                }
                return;
        case 'p':               /* abbreviated parent hashes */
+               if (add_again(sb, &c->abbrev_parent_hashes))
+                       return;
                for (p = commit->parents; p; p = p->next) {
                        if (p != commit->parents)
                                strbuf_addch(sb, ' ');
                        strbuf_addstr(sb, find_unique_abbrev(
                                        p->item->object.sha1, DEFAULT_ABBREV));
                }
+               c->abbrev_parent_hashes.len = sb->len -
+                                             c->abbrev_parent_hashes.off;
                return;
        case 'm':               /* left/right/bottom */
                strbuf_addch(sb, (commit->object.flags & BOUNDARY)
@@ -429,45 +524,28 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
        }
 
        /* For the rest we have to parse the commit header. */
-       for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
-               int eol;
-               for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
-                       ; /* do nothing */
+       if (!c->commit_header_parsed)
+               parse_commit_header(c);
 
-               if (state == SUBJECT) {
-                       if (placeholder[0] == 's') {
-                               strbuf_add(sb, msg + i, eol - i);
-                               return;
-                       }
-                       i = eol;
-               }
-               if (i == eol) {
-                       state++;
-                       /* strip empty lines */
-                       while (msg[eol + 1] == '\n')
-                               eol++;
-               } else if (!prefixcmp(msg + i, "author ")) {
-                       if (placeholder[0] == 'a') {
-                               format_person_part(sb, placeholder[1],
-                                                  msg + i + 7, eol - i - 7);
-                               return;
-                       }
-               } else if (!prefixcmp(msg + i, "committer ")) {
-                       if (placeholder[0] == 'c') {
-                               format_person_part(sb, placeholder[1],
-                                                  msg + i + 10, eol - i - 10);
-                               return;
-                       }
-               } else if (!prefixcmp(msg + i, "encoding ")) {
-                       if (placeholder[0] == 'e') {
-                               strbuf_add(sb, msg + i + 9, eol - i - 9);
-                               return;
-                       }
-               }
-               i = eol;
+       switch (placeholder[0]) {
+       case 's':
+               strbuf_add(sb, msg + c->subject.off, c->subject.len);
+               return;
+       case 'a':
+               format_person_part(sb, placeholder[1],
+                                  msg + c->author.off, c->author.len);
+               return;
+       case 'c':
+               format_person_part(sb, placeholder[1],
+                                  msg + c->committer.off, c->committer.len);
+               return;
+       case 'e':
+               strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
+               return;
+       case 'b':
+               strbuf_addstr(sb, msg + c->body_off);
+               return;
        }
-       if (msg[i] && placeholder[0] == 'b')    /* body */
-               strbuf_addstr(sb, msg + i);
 }
 
 void format_commit_message(const struct commit *commit,
@@ -505,7 +583,11 @@ void format_commit_message(const struct commit *commit,
                "m",            /* left/right/bottom */
                NULL
        };
-       strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
+       struct format_commit_context context;
+
+       memset(&context, 0, sizeof(context));
+       context.commit = commit;
+       strbuf_expand(sb, format, placeholders, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,