commit: integrate commit graph with commit parsing
authorDerrick Stolee <dstolee@microsoft.com>
Tue, 10 Apr 2018 12:56:05 +0000 (08:56 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 11 Apr 2018 01:43:02 +0000 (10:43 +0900)
Teach Git to inspect a commit graph file to supply the contents of a
struct commit when calling parse_commit_gently(). This implementation
satisfies all post-conditions on the struct commit, including loading
parents, the root tree, and the commit date.

If core.commitGraph is false, then do not check graph files.

In test script t5318-commit-graph.sh, add output-matching conditions on
read-only graph operations.

By loading commits from the graph instead of parsing commit buffers, we
save a lot of time on long commit walks. Here are some performance
results for a copy of the Linux repository where 'master' has 678,653
reachable commits and is behind 'origin/master' by 59,929 commits.

| Command | Before | After | Rel % |
|----------------------------------|--------|--------|-------|
| log --oneline --topo-order -1000 | 8.31s | 0.94s | -88% |
| branch -vv | 1.02s | 0.14s | -86% |
| rev-list --all | 5.89s | 1.07s | -81% |
| rev-list --all --objects | 66.15s | 58.45s | -11% |

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
alloc.c
commit-graph.c
commit-graph.h
commit.c
commit.h
t/t5318-commit-graph.sh
diff --git a/alloc.c b/alloc.c
index 12afadfacdd6094912a6e18a217a9aa6318b47b2..cf4f8b61e126c0e9992dc34c6cb52b6f896a565d 100644 (file)
--- a/alloc.c
+++ b/alloc.c
@@ -93,6 +93,7 @@ void *alloc_commit_node(void)
        struct commit *c = alloc_node(&commit_state, sizeof(struct commit));
        c->object.type = OBJ_COMMIT;
        c->index = alloc_commit_index();
+       c->graph_pos = COMMIT_NOT_FROM_GRAPH;
        return c;
 }
 
index ea29c5c2d86ae69f7ea0a5ac1a5fb4dfafec9660..f745186e7f5ebaaea7852e7c6bfa3a275762c343 100644 (file)
@@ -38,7 +38,6 @@
 #define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \
                        GRAPH_OID_LEN + 8)
 
-
 char *get_commit_graph_filename(const char *obj_dir)
 {
        return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -179,6 +178,145 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
        exit(1);
 }
 
+/* global storage */
+static struct commit_graph *commit_graph = NULL;
+
+static void prepare_commit_graph_one(const char *obj_dir)
+{
+       char *graph_name;
+
+       if (commit_graph)
+               return;
+
+       graph_name = get_commit_graph_filename(obj_dir);
+       commit_graph = load_commit_graph_one(graph_name);
+
+       FREE_AND_NULL(graph_name);
+}
+
+static int prepare_commit_graph_run_once = 0;
+static void prepare_commit_graph(void)
+{
+       struct alternate_object_database *alt;
+       char *obj_dir;
+
+       if (prepare_commit_graph_run_once)
+               return;
+       prepare_commit_graph_run_once = 1;
+
+       obj_dir = get_object_directory();
+       prepare_commit_graph_one(obj_dir);
+       prepare_alt_odb();
+       for (alt = alt_odb_list; !commit_graph && alt; alt = alt->next)
+               prepare_commit_graph_one(alt->path);
+}
+
+static void close_commit_graph(void)
+{
+       if (!commit_graph)
+               return;
+
+       if (commit_graph->graph_fd >= 0) {
+               munmap((void *)commit_graph->data, commit_graph->data_len);
+               commit_graph->data = NULL;
+               close(commit_graph->graph_fd);
+       }
+
+       FREE_AND_NULL(commit_graph);
+}
+
+static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
+{
+       return bsearch_hash(oid->hash, g->chunk_oid_fanout,
+                           g->chunk_oid_lookup, g->hash_len, pos);
+}
+
+static struct commit_list **insert_parent_or_die(struct commit_graph *g,
+                                                uint64_t pos,
+                                                struct commit_list **pptr)
+{
+       struct commit *c;
+       struct object_id oid;
+       hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
+       c = lookup_commit(&oid);
+       if (!c)
+               die("could not find commit %s", oid_to_hex(&oid));
+       c->graph_pos = pos;
+       return &commit_list_insert(c, pptr)->next;
+}
+
+static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
+{
+       struct object_id oid;
+       uint32_t edge_value;
+       uint32_t *parent_data_ptr;
+       uint64_t date_low, date_high;
+       struct commit_list **pptr;
+       const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos;
+
+       item->object.parsed = 1;
+       item->graph_pos = pos;
+
+       hashcpy(oid.hash, commit_data);
+       item->tree = lookup_tree(&oid);
+
+       date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
+       date_low = get_be32(commit_data + g->hash_len + 12);
+       item->date = (timestamp_t)((date_high << 32) | date_low);
+
+       pptr = &item->parents;
+
+       edge_value = get_be32(commit_data + g->hash_len);
+       if (edge_value == GRAPH_PARENT_NONE)
+               return 1;
+       pptr = insert_parent_or_die(g, edge_value, pptr);
+
+       edge_value = get_be32(commit_data + g->hash_len + 4);
+       if (edge_value == GRAPH_PARENT_NONE)
+               return 1;
+       if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) {
+               pptr = insert_parent_or_die(g, edge_value, pptr);
+               return 1;
+       }
+
+       parent_data_ptr = (uint32_t*)(g->chunk_large_edges +
+                         4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
+       do {
+               edge_value = get_be32(parent_data_ptr);
+               pptr = insert_parent_or_die(g,
+                                           edge_value & GRAPH_EDGE_LAST_MASK,
+                                           pptr);
+               parent_data_ptr++;
+       } while (!(edge_value & GRAPH_LAST_EDGE));
+
+       return 1;
+}
+
+int parse_commit_in_graph(struct commit *item)
+{
+       if (!core_commit_graph)
+               return 0;
+       if (item->object.parsed)
+               return 1;
+
+       prepare_commit_graph();
+       if (commit_graph) {
+               uint32_t pos;
+               int found;
+               if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
+                       pos = item->graph_pos;
+                       found = 1;
+               } else {
+                       found = bsearch_graph(commit_graph, &(item->object.oid), &pos);
+               }
+
+               if (found)
+                       return fill_commit_in_graph(item, commit_graph, pos);
+       }
+
+       return 0;
+}
+
 static void write_graph_chunk_fanout(struct hashfile *f,
                                     struct commit **commits,
                                     int nr_commits)
@@ -530,6 +668,7 @@ void write_commit_graph(const char *obj_dir)
        write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
        write_graph_chunk_large_edges(f, commits.list, commits.nr);
 
+       close_commit_graph();
        finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
        commit_lock_file(&lk);
 
index 2528478f06db26abbfc22ca62e93425bf6a1da05..73b28beed15b952ced9ec1b0e978f507a552e823 100644 (file)
@@ -5,6 +5,18 @@
 
 char *get_commit_graph_filename(const char *obj_dir);
 
+/*
+ * Given a commit struct, try to fill the commit struct info, including:
+ *  1. tree object
+ *  2. date
+ *  3. parents.
+ *
+ * Returns 1 if and only if the commit was found in the packed graph.
+ *
+ * See parse_commit_buffer() for the fallback after this call.
+ */
+int parse_commit_in_graph(struct commit *item);
+
 struct commit_graph {
        int graph_fd;
 
index e8a49b9c84bf24a6ef1455f975aa9e28e2156d77..eb61729d105812ec395ed9973cd18bc2cba98306 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "tag.h"
 #include "commit.h"
+#include "commit-graph.h"
 #include "pkt-line.h"
 #include "utf8.h"
 #include "diff.h"
@@ -383,6 +384,8 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing)
                return -1;
        if (item->object.parsed)
                return 0;
+       if (parse_commit_in_graph(item))
+               return 0;
        buffer = read_sha1_file(item->object.oid.hash, &type, &size);
        if (!buffer)
                return quiet_on_missing ? -1 :
index 0fb8271665c6c98ccca803fbe002327bf38fcfb3..e57ae4b58380cce6192cc6489682838677f048ce 100644 (file)
--- a/commit.h
+++ b/commit.h
@@ -9,6 +9,8 @@
 #include "string-list.h"
 #include "pretty.h"
 
+#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF
+
 struct commit_list {
        struct commit *item;
        struct commit_list *next;
@@ -21,6 +23,7 @@ struct commit {
        timestamp_t date;
        struct commit_list *parents;
        struct tree *tree;
+       uint32_t graph_pos;
 };
 
 extern int save_commit_buffer;
index 2f44f91193324c2a5ef3534481a3278e0357e239..51de9cc4552b088a22d1f38b5e79a2ac8d948b74 100755 (executable)
@@ -7,6 +7,7 @@ test_expect_success 'setup full repo' '
        mkdir full &&
        cd "$TRASH_DIRECTORY/full" &&
        git init &&
+       git config core.commitGraph true &&
        objdir=".git/objects"
 '
 
@@ -26,6 +27,29 @@ test_expect_success 'create commits and repack' '
        git repack
 '
 
+graph_git_two_modes() {
+       git -c core.graph=true $1 >output
+       git -c core.graph=false $1 >expect
+       test_cmp output expect
+}
+
+graph_git_behavior() {
+       MSG=$1
+       DIR=$2
+       BRANCH=$3
+       COMPARE=$4
+       test_expect_success "check normal git operations: $MSG" '
+               cd "$TRASH_DIRECTORY/$DIR" &&
+               graph_git_two_modes "log --oneline $BRANCH" &&
+               graph_git_two_modes "log --topo-order $BRANCH" &&
+               graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
+               graph_git_two_modes "branch -vv" &&
+               graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
+       '
+}
+
+graph_git_behavior 'no graph' full commits/3 commits/1
+
 graph_read_expect() {
        OPTIONAL=""
        NUM_CHUNKS=3
@@ -50,6 +74,8 @@ test_expect_success 'write graph' '
        graph_read_expect "3"
 '
 
+graph_git_behavior 'graph exists' full commits/3 commits/1
+
 test_expect_success 'Add more commits' '
        cd "$TRASH_DIRECTORY/full" &&
        git reset --hard commits/1 &&
@@ -86,7 +112,6 @@ test_expect_success 'Add more commits' '
 # |___/____/
 # 1
 
-
 test_expect_success 'write graph with merges' '
        cd "$TRASH_DIRECTORY/full" &&
        git commit-graph write &&
@@ -94,6 +119,10 @@ test_expect_success 'write graph with merges' '
        graph_read_expect "10" "large_edges"
 '
 
+graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2
+graph_git_behavior 'merge 1 vs 3' full merge/1 merge/3
+graph_git_behavior 'merge 2 vs 3' full merge/2 merge/3
+
 test_expect_success 'Add one more commit' '
        cd "$TRASH_DIRECTORY/full" &&
        test_commit 8 &&
@@ -115,6 +144,9 @@ test_expect_success 'Add one more commit' '
 # |___/____/
 # 1
 
+graph_git_behavior 'mixed mode, commit 8 vs merge 1' full commits/8 merge/1
+graph_git_behavior 'mixed mode, commit 8 vs merge 2' full commits/8 merge/2
+
 test_expect_success 'write graph with new commit' '
        cd "$TRASH_DIRECTORY/full" &&
        git commit-graph write &&
@@ -122,6 +154,9 @@ test_expect_success 'write graph with new commit' '
        graph_read_expect "11" "large_edges"
 '
 
+graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1
+graph_git_behavior 'full graph, commit 8 vs merge 2' full commits/8 merge/2
+
 test_expect_success 'write graph with nothing new' '
        cd "$TRASH_DIRECTORY/full" &&
        git commit-graph write &&
@@ -129,13 +164,20 @@ test_expect_success 'write graph with nothing new' '
        graph_read_expect "11" "large_edges"
 '
 
+graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1
+graph_git_behavior 'cleared graph, commit 8 vs merge 2' full commits/8 merge/2
+
 test_expect_success 'setup bare repo' '
        cd "$TRASH_DIRECTORY" &&
        git clone --bare --no-local full bare &&
        cd bare &&
+       git config core.commitGraph true &&
        baredir="./objects"
 '
 
+graph_git_behavior 'bare repo, commit 8 vs merge 1' bare commits/8 merge/1
+graph_git_behavior 'bare repo, commit 8 vs merge 2' bare commits/8 merge/2
+
 test_expect_success 'write graph in bare repo' '
        cd "$TRASH_DIRECTORY/bare" &&
        git commit-graph write &&
@@ -143,4 +185,7 @@ test_expect_success 'write graph in bare repo' '
        graph_read_expect "11" "large_edges"
 '
 
+graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
+graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2
+
 test_done