commit-graph: lazy-load trees for commits
authorDerrick Stolee <dstolee@microsoft.com>
Fri, 6 Apr 2018 19:09:46 +0000 (19:09 +0000)
committerJunio C Hamano <gitster@pobox.com>
Wed, 11 Apr 2018 01:47:16 +0000 (10:47 +0900)
The commit-graph file provides quick access to commit data, including
the OID of the root tree for each commit in the graph. When performing
a deep commit-graph walk, we may not need to load most of the trees
for these commits.

Delay loading the tree object for a commit loaded from the graph
until requested via get_commit_tree(). Do not lazy-load trees for
commits not in the graph, since that requires duplicate parsing
and the relative peformance improvement when trees are not needed
is small.

On the Linux repository, performance tests were run for the following
command:

git log --graph --oneline -1000

Before: 0.92s
After: 0.66s
Rel %: -28.3%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
commit-graph.c
commit-graph.h
commit.c
commit.h
index 264aa6d919bc3df71d189ff5fbddfc46231499f9..70fa1b25fd3d07a935cc0e7ba4cf70bc24ca3d21 100644 (file)
@@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g,
 
 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
 {
-       struct object_id oid;
        uint32_t edge_value;
        uint32_t *parent_data_ptr;
        uint64_t date_low, date_high;
@@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
        item->object.parsed = 1;
        item->graph_pos = pos;
 
-       hashcpy(oid.hash, commit_data);
-       item->maybe_tree = lookup_tree(&oid);
+       item->maybe_tree = NULL;
 
        date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
        date_low = get_be32(commit_data + g->hash_len + 12);
@@ -317,6 +315,28 @@ int parse_commit_in_graph(struct commit *item)
        return 0;
 }
 
+static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
+{
+       struct object_id oid;
+       const unsigned char *commit_data = g->chunk_commit_data +
+                                          GRAPH_DATA_WIDTH * (c->graph_pos);
+
+       hashcpy(oid.hash, commit_data);
+       c->maybe_tree = lookup_tree(&oid);
+
+       return c->maybe_tree;
+}
+
+struct tree *get_commit_tree_in_graph(const struct commit *c)
+{
+       if (c->maybe_tree)
+               return c->maybe_tree;
+       if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
+               BUG("get_commit_tree_in_graph called from non-commit-graph commit");
+
+       return load_tree_for_commit(commit_graph, (struct commit *)c);
+}
+
 static void write_graph_chunk_fanout(struct hashfile *f,
                                     struct commit **commits,
                                     int nr_commits)
index e1d8580c98a24a51e64c4d0f1d9abda10c5e92f2..260a468e73acd89b2a7706c859e8c25b264f3365 100644 (file)
@@ -17,6 +17,8 @@ char *get_commit_graph_filename(const char *obj_dir);
  */
 int parse_commit_in_graph(struct commit *item);
 
+struct tree *get_commit_tree_in_graph(const struct commit *c);
+
 struct commit_graph {
        int graph_fd;
 
index aea2ca1f8ba307f540f17d4f4b93071f9eb45b5f..711f674c18d00c8d62b117764e67c5d9c6731616 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -298,7 +298,13 @@ void free_commit_buffer(struct commit *commit)
 
 struct tree *get_commit_tree(const struct commit *commit)
 {
-       return commit->maybe_tree;
+       if (commit->maybe_tree || !commit->object.parsed)
+               return commit->maybe_tree;
+
+       if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+               BUG("commit has NULL tree, but was not loaded from commit-graph");
+
+       return get_commit_tree_in_graph(commit);
 }
 
 struct object_id *get_commit_tree_oid(const struct commit *commit)
index dc4bf97d9f3b74da1176d552f067e00b72a2d7fb..23a3f364edec6b609a39f15518cfda06af69e487 100644 (file)
--- a/commit.h
+++ b/commit.h
@@ -22,6 +22,12 @@ struct commit {
        unsigned int index;
        timestamp_t date;
        struct commit_list *parents;
+
+       /*
+        * If the commit is loaded from the commit-graph file, then this
+        * member may be NULL. Only access it through get_commit_tree()
+        * or get_commit_tree_oid().
+        */
        struct tree *maybe_tree;
        uint32_t graph_pos;
 };