commit: record buffer length in cache
authorJeff King <peff@peff.net>
Tue, 10 Jun 2014 21:44:13 +0000 (17:44 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 13 Jun 2014 19:09:38 +0000 (12:09 -0700)
Most callsites which use the commit buffer try to use the
cached version attached to the commit, rather than
re-reading from disk. Unfortunately, that interface provides
only a pointer to the NUL-terminated buffer, with no
indication of the original length.

For the most part, this doesn't matter. People do not put
NULs in their commit messages, and the log code is happy to
treat it all as a NUL-terminated string. However, some code
paths do care. For example, when checking signatures, we
want to be very careful that we verify all the bytes to
avoid malicious trickery.

This patch just adds an optional "size" out-pointer to
get_commit_buffer and friends. The existing callers all pass
NULL (there did not seem to be any obvious sites where we
could avoid an immediate strlen() call, though perhaps with
some further refactoring we could).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
16 files changed:
builtin/blame.c
builtin/fast-export.c
builtin/fmt-merge-msg.c
builtin/index-pack.c
builtin/log.c
builtin/rev-list.c
commit.c
commit.h
fsck.c
log-tree.c
merge-recursive.c
notes-merge.c
object.c
pretty.c
sequencer.c
sha1_name.c
index 857d98a3247c9ba63cf14fa33c9d65fa5b77761b..b84e375b5c5cdd2c6f0a18f0c57462131cb14ec3 100644 (file)
@@ -1998,6 +1998,18 @@ static void append_merge_parents(struct commit_list **tail)
        strbuf_release(&line);
 }
 
+/*
+ * This isn't as simple as passing sb->buf and sb->len, because we
+ * want to transfer ownership of the buffer to the commit (so we
+ * must use detach).
+ */
+static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb)
+{
+       size_t len;
+       void *buf = strbuf_detach(sb, &len);
+       set_commit_buffer(c, buf, len);
+}
+
 /*
  * Prepare a dummy commit that represents the work tree (or staged) item.
  * Note that annotating work tree item never works in the reverse.
@@ -2046,7 +2058,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
                    ident, ident, path,
                    (!contents_from ? path :
                     (!strcmp(contents_from, "-") ? "standard input" : contents_from)));
-       set_commit_buffer(commit, strbuf_detach(&msg, NULL));
+       set_commit_buffer_from_strbuf(commit, &msg);
 
        if (!contents_from || strcmp("-", contents_from)) {
                struct stat st;
index 7ee5e08442dd65e9f28541e21a2ab8a46af00f23..05d161f19f138275a6ea995bf4b6b3b11c987693 100644 (file)
@@ -289,7 +289,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 
        parse_commit_or_die(commit);
-       commit_buffer = get_commit_buffer(commit);
+       commit_buffer = get_commit_buffer(commit, NULL);
        author = strstr(commit_buffer, "\nauthor ");
        if (!author)
                die ("Could not find author in commit %s",
index 01f6d59eefdc2d6ac413fcee406d88639d30805b..ef8b254ef218249f7f1b0f28db7ea21110851917 100644 (file)
@@ -236,7 +236,7 @@ static void record_person(int which, struct string_list *people,
        const char *field;
 
        field = (which == 'a') ? "\nauthor " : "\ncommitter ";
-       buffer = get_commit_buffer(commit);
+       buffer = get_commit_buffer(commit, NULL);
        name = strstr(buffer, field);
        if (!name)
                return;
index 42551ce4ff65c170fffb88c5e9bb639bfc83fe91..459b9f07bb674e92a11df79e0c0445acb20c595a 100644 (file)
@@ -774,7 +774,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
                        }
                        if (obj->type == OBJ_COMMIT) {
                                struct commit *commit = (struct commit *) obj;
-                               if (detach_commit_buffer(commit) != data)
+                               if (detach_commit_buffer(commit, NULL) != data)
                                        die("BUG: parse_object_buffer transmogrified our buffer");
                        }
                        obj->flags |= FLAG_CHECKED;
index 2c742606bc44a5f5eb229e0ce0cf5c942a6fa924..c599eacf721124831c07a8e14ea14fe1b951f403 100644 (file)
@@ -919,7 +919,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout,
                                &need_8bit_cte);
 
        for (i = 0; !need_8bit_cte && i < nr; i++) {
-               const char *buf = get_commit_buffer(list[i]);
+               const char *buf = get_commit_buffer(list[i], NULL);
                if (has_non_ascii(buf))
                        need_8bit_cte = 1;
                unuse_commit_buffer(list[i], buf);
index 3fcbf21c03a37b78edacca4810defa78c7cea915..ff84a825ff776abc01087f37782223ee26a1af7a 100644 (file)
@@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data)
        else
                putchar('\n');
 
-       if (revs->verbose_header && get_cached_commit_buffer(commit)) {
+       if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) {
                struct strbuf buf = STRBUF_INIT;
                struct pretty_print_context ctx = {0};
                ctx.abbrev = revs->abbrev;
index e289c783270f57bb213ef60cd24d7b950c6d52da..a036e181c7f9ff391052b0ca5d82d06310abdf88 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -245,22 +245,31 @@ int unregister_shallow(const unsigned char *sha1)
        return 0;
 }
 
-define_commit_slab(buffer_slab, void *);
+struct commit_buffer {
+       void *buffer;
+       unsigned long size;
+};
+define_commit_slab(buffer_slab, struct commit_buffer);
 static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab);
 
-void set_commit_buffer(struct commit *commit, void *buffer)
+void set_commit_buffer(struct commit *commit, void *buffer, unsigned long size)
 {
-       *buffer_slab_at(&buffer_slab, commit) = buffer;
+       struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
+       v->buffer = buffer;
+       v->size = size;
 }
 
-const void *get_cached_commit_buffer(const struct commit *commit)
+const void *get_cached_commit_buffer(const struct commit *commit, unsigned long *sizep)
 {
-       return *buffer_slab_at(&buffer_slab, commit);
+       struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
+       if (sizep)
+               *sizep = v->size;
+       return v->buffer;
 }
 
-const void *get_commit_buffer(const struct commit *commit)
+const void *get_commit_buffer(const struct commit *commit, unsigned long *sizep)
 {
-       const void *ret = get_cached_commit_buffer(commit);
+       const void *ret = get_cached_commit_buffer(commit, sizep);
        if (!ret) {
                enum object_type type;
                unsigned long size;
@@ -271,29 +280,38 @@ const void *get_commit_buffer(const struct commit *commit)
                if (type != OBJ_COMMIT)
                        die("expected commit for %s, got %s",
                            sha1_to_hex(commit->object.sha1), typename(type));
+               if (sizep)
+                       *sizep = size;
        }
        return ret;
 }
 
 void unuse_commit_buffer(const struct commit *commit, const void *buffer)
 {
-       void *cached = *buffer_slab_at(&buffer_slab, commit);
-       if (cached != buffer)
+       struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
+       if (v->buffer != buffer)
                free((void *)buffer);
 }
 
 void free_commit_buffer(struct commit *commit)
 {
-       void **b = buffer_slab_at(&buffer_slab, commit);
-       free(*b);
-       *b = NULL;
+       struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
+       free(v->buffer);
+       v->buffer = NULL;
+       v->size = 0;
 }
 
-const void *detach_commit_buffer(struct commit *commit)
+const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
 {
-       void **b = buffer_slab_at(&buffer_slab, commit);
-       void *ret = *b;
-       *b = NULL;
+       struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
+       void *ret;
+
+       ret = v->buffer;
+       if (sizep)
+               *sizep = v->size;
+
+       v->buffer = NULL;
+       v->size = 0;
        return ret;
 }
 
@@ -374,7 +392,7 @@ int parse_commit(struct commit *item)
        }
        ret = parse_commit_buffer(item, buffer, size);
        if (save_commit_buffer && !ret) {
-               set_commit_buffer(item, buffer);
+               set_commit_buffer(item, buffer, size);
                return 0;
        }
        free(buffer);
@@ -589,7 +607,7 @@ static void record_author_date(struct author_date_slab *author_date,
                               struct commit *commit)
 {
        const char *buf, *line_end, *ident_line;
-       const char *buffer = get_commit_buffer(commit);
+       const char *buffer = get_commit_buffer(commit, NULL);
        struct ident_split ident;
        char *date_end;
        unsigned long date;
index e1c25692f10ecc520c40f73f4b4eb5180e366d1e..61559a9d45d67f3fad012c74b2a7643d185e87b3 100644 (file)
--- a/commit.h
+++ b/commit.h
@@ -54,20 +54,20 @@ void parse_commit_or_die(struct commit *item);
  * Associate an object buffer with the commit. The ownership of the
  * memory is handed over to the commit, and must be free()-able.
  */
-void set_commit_buffer(struct commit *, void *buffer);
+void set_commit_buffer(struct commit *, void *buffer, unsigned long size);
 
 /*
  * Get any cached object buffer associated with the commit. Returns NULL
  * if none. The resulting memory should not be freed.
  */
-const void *get_cached_commit_buffer(const struct commit *);
+const void *get_cached_commit_buffer(const struct commit *, unsigned long *size);
 
 /*
  * Get the commit's object contents, either from cache or by reading the object
  * from disk. The resulting memory should not be modified, and must be given
  * to unuse_commit_buffer when the caller is done.
  */
-const void *get_commit_buffer(const struct commit *);
+const void *get_commit_buffer(const struct commit *, unsigned long *size);
 
 /*
  * Tell the commit subsytem that we are done with a particular commit buffer.
@@ -86,7 +86,7 @@ void free_commit_buffer(struct commit *);
  * Disassociate any cached object buffer from the commit, but do not free it.
  * The buffer (or NULL, if none) is returned.
  */
-const void *detach_commit_buffer(struct commit *);
+const void *detach_commit_buffer(struct commit *, unsigned long *sizep);
 
 /* Find beginning and length of commit subject. */
 int find_commit_subject(const char *commit_buffer, const char **subject);
diff --git a/fsck.c b/fsck.c
index 8223780592598d923c0c6807a23d67461b5c1f4e..a7233c8d0b20c06f0aebffbec9c11dacb22b6769 100644 (file)
--- a/fsck.c
+++ b/fsck.c
@@ -339,7 +339,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer,
 
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
-       const char *buffer = get_commit_buffer(commit);
+       const char *buffer = get_commit_buffer(commit, NULL);
        int ret = fsck_commit_buffer(commit, buffer, error_func);
        unuse_commit_buffer(commit, buffer);
        return ret;
index e9ef8abd3712d9f4e7a323730d2d84704fc453d0..444702163a0e80a5583c1bf3a82fa96e0da698fd 100644 (file)
@@ -588,7 +588,7 @@ void show_log(struct rev_info *opt)
                show_mergetag(opt, commit);
        }
 
-       if (!get_cached_commit_buffer(commit))
+       if (!get_cached_commit_buffer(commit, NULL))
                return;
 
        if (opt->show_notes) {
index 78908aaacc6676913b369e80c54d9e11444395bd..a9ab328923db3f2b2c945c479386316333dfcd0a 100644 (file)
@@ -190,7 +190,7 @@ static void output_commit_title(struct merge_options *o, struct commit *commit)
                        printf(_("(bad commit)\n"));
                else {
                        const char *title;
-                       const char *msg = get_commit_buffer(commit);
+                       const char *msg = get_commit_buffer(commit, NULL);
                        int len = find_commit_subject(msg, &title);
                        if (len)
                                printf("%.*s\n", len, title);
index e804db2d02820c3ba762eebbbe7b465a487d83b8..fd5fae255d2760d6b7895e95cee627b5e28f776e 100644 (file)
@@ -672,7 +672,7 @@ int notes_merge_commit(struct notes_merge_options *o,
        DIR *dir;
        struct dirent *e;
        struct strbuf path = STRBUF_INIT;
-       const char *buffer = get_commit_buffer(partial_commit);
+       const char *buffer = get_commit_buffer(partial_commit, NULL);
        const char *msg = strstr(buffer, "\n\n");
        int baselen;
 
index 67b6e3533dafc39c05d3fa8559d2fbe6d2779a65..9c31e9a5e0e0d913044501089e09037e47894f03 100644 (file)
--- a/object.c
+++ b/object.c
@@ -197,8 +197,8 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t
                if (commit) {
                        if (parse_commit_buffer(commit, buffer, size))
                                return NULL;
-                       if (!get_cached_commit_buffer(commit)) {
-                               set_commit_buffer(commit, buffer);
+                       if (!get_cached_commit_buffer(commit, NULL)) {
+                               set_commit_buffer(commit, buffer, size);
                                *eaten_p = 1;
                        }
                        obj = &commit->object;
index 915bd1e2e96a165c05e8a67523c759be8f376c2b..b9fceedbb9ffe090af689178ead7218836208c31 100644 (file)
--- a/pretty.c
+++ b/pretty.c
@@ -613,7 +613,7 @@ const char *logmsg_reencode(const struct commit *commit,
        static const char *utf8 = "UTF-8";
        const char *use_encoding;
        char *encoding;
-       const char *msg = get_commit_buffer(commit);
+       const char *msg = get_commit_buffer(commit, NULL);
        char *out;
 
        if (!output_encoding || !*output_encoding) {
@@ -642,7 +642,7 @@ const char *logmsg_reencode(const struct commit *commit,
                 * the cached copy from get_commit_buffer, we need to duplicate it
                 * to avoid munging the cached copy.
                 */
-               if (msg == get_cached_commit_buffer(commit))
+               if (msg == get_cached_commit_buffer(commit, NULL))
                        out = xstrdup(msg);
                else
                        out = (char *)msg;
index 69bcf3d80190e1049ca0e1d25fec2615fac3d705..bbaddcb05af3d720d8efd1d1d7c96ce1f9a35039 100644 (file)
@@ -662,7 +662,7 @@ static int format_todo(struct strbuf *buf, struct commit_list *todo_list,
        int subject_len;
 
        for (cur = todo_list; cur; cur = cur->next) {
-               const char *commit_buffer = get_commit_buffer(cur->item);
+               const char *commit_buffer = get_commit_buffer(cur->item, NULL);
                sha1_abbrev = find_unique_abbrev(cur->item->object.sha1, DEFAULT_ABBREV);
                subject_len = find_commit_subject(commit_buffer, &subject);
                strbuf_addf(buf, "%s %s %.*s\n", action_str, sha1_abbrev,
index 0a65d234de377f98207186b277901bd44436af6d..c2c938c4e161fc6849a87a7a19336712f36f74ff 100644 (file)
@@ -869,7 +869,7 @@ static int get_sha1_oneline(const char *prefix, unsigned char *sha1,
                commit = pop_most_recent_commit(&list, ONELINE_SEEN);
                if (!parse_object(commit->object.sha1))
                        continue;
-               buf = get_commit_buffer(commit);
+               buf = get_commit_buffer(commit, NULL);
                p = strstr(buf, "\n\n");
                matches = p && !regexec(&regex, p + 2, 0, NULL, 0);
                unuse_commit_buffer(commit, buf);