Merge branch 'ds/commit-graph-lockfile-fix'
authorJunio C Hamano <gitster@pobox.com>
Mon, 25 Jun 2018 20:22:36 +0000 (13:22 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 25 Jun 2018 20:22:36 +0000 (13:22 -0700)
Update to ds/generation-numbers topic.

* ds/commit-graph-lockfile-fix:
commit-graph: fix UX issue when .lock file exists
commit-graph.txt: update design document
merge: check config before loading commits
commit: use generation number in remove_redundant()
commit: add short-circuit to paint_down_to_common()
commit: use generation numbers for in_merge_bases()
ref-filter: use generation number for --contains
commit-graph: always load commit-graph information
commit: use generations in paint_down_to_common()
commit-graph: compute generation numbers
commit: add generation number to struct commit
ref-filter: fix outdated comment on in_commit_list

Documentation/technical/commit-graph.txt
alloc.c
builtin/merge.c
commit-graph.c
commit-graph.h
commit.c
commit.h
object.c
ref-filter.c
sha1-file.c
t/t5318-commit-graph.sh
index 0550c6d0dc317343dae53e792cc321fd0924bd84..e1a883eb462cd9bddbc192a315464282146ac433 100644 (file)
@@ -77,6 +77,29 @@ in the commit graph. We can treat these commits as having "infinite"
 generation number and walk until reaching commits with known generation
 number.
 
+We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not
+in the commit-graph file. If a commit-graph file was written by a version
+of Git that did not compute generation numbers, then those commits will
+have generation number represented by the macro GENERATION_NUMBER_ZERO = 0.
+
+Since the commit-graph file is closed under reachability, we can guarantee
+the following weaker condition on all commits:
+
+    If A and B are commits with generation numbers N amd M, respectively,
+    and N < M, then A cannot reach B.
+
+Note how the strict inequality differs from the inequality when we have
+fully-computed generation numbers. Using strict inequality may result in
+walking a few extra commits, but the simplicity in dealing with commits
+with generation number *_INFINITY or *_ZERO is valuable.
+
+We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose
+generation numbers are computed to be at least this value. We limit at
+this value since it is the largest value that can be stored in the
+commit-graph file using the 30 bits available to generation numbers. This
+presents another case where a commit can have generation number equal to
+that of a parent.
+
 Design Details
 --------------
 
@@ -98,18 +121,14 @@ Future Work
 - The 'commit-graph' subcommand does not have a "verify" mode that is
   necessary for integration with fsck.
 
-- The file format includes room for precomputed generation numbers. These
-  are not currently computed, so all generation numbers will be marked as
-  0 (or "uncomputed"). A later patch will include this calculation.
-
 - After computing and storing generation numbers, we must make graph
   walks aware of generation numbers to gain the performance benefits they
   enable. This will mostly be accomplished by swapping a commit-date-ordered
   priority queue with one ordered by generation number. The following
   operations are important candidates:
 
-    - paint_down_to_common()
     - 'log --topo-order'
+    - 'tag --merged'
 
 - Currently, parse_commit_gently() requires filling in the root tree
   object for a commit. This passes through lookup_tree() and consequently
diff --git a/alloc.c b/alloc.c
index cf4f8b61e126c0e9992dc34c6cb52b6f896a565d..e8ab14f4a12bc65eb32152192af1faf719e3423c 100644 (file)
--- a/alloc.c
+++ b/alloc.c
@@ -94,6 +94,7 @@ void *alloc_commit_node(void)
        c->object.type = OBJ_COMMIT;
        c->index = alloc_commit_index();
        c->graph_pos = COMMIT_NOT_FROM_GRAPH;
+       c->generation = GENERATION_NUMBER_INFINITY;
        return c;
 }
 
index fb4faca2507fdf00a1bbd48817e2ff0bc9957fc3..4a4c09496cfee08417c18cc010528464a3137488 100644 (file)
@@ -1186,14 +1186,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
        branch = branch_to_free = resolve_refdup("HEAD", 0, &head_oid, NULL);
        if (branch)
                skip_prefix(branch, "refs/heads/", &branch);
+
+       init_diff_ui_defaults();
+       git_config(git_merge_config, NULL);
+
        if (!branch || is_null_oid(&head_oid))
                head_commit = NULL;
        else
                head_commit = lookup_commit_or_die(&head_oid, "HEAD");
 
-       init_diff_ui_defaults();
-       git_config(git_merge_config, NULL);
-
        if (branch_mergeoptions)
                parse_branch_merge_options(branch_mergeoptions);
        argc = parse_options(argc, argv, prefix, builtin_merge_options,
index 4c6127088ff96282fbcdc98ef5767281e52846e9..b63a1fc85eaded844fcb1a2634067f9509b5937c 100644 (file)
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "config.h"
+#include "dir.h"
 #include "git-compat-util.h"
 #include "lockfile.h"
 #include "pack.h"
@@ -248,6 +249,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g,
        return &commit_list_insert(c, pptr)->next;
 }
 
+static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
+{
+       const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos;
+       item->graph_pos = pos;
+       item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+}
+
 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
 {
        uint32_t edge_value;
@@ -265,6 +273,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
        date_low = get_be32(commit_data + g->hash_len + 12);
        item->date = (timestamp_t)((date_high << 32) | date_low);
 
+       item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+
        pptr = &item->parents;
 
        edge_value = get_be32(commit_data + g->hash_len);
@@ -293,31 +303,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin
        return 1;
 }
 
+static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
+{
+       if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
+               *pos = item->graph_pos;
+               return 1;
+       } else {
+               return bsearch_graph(g, &(item->object.oid), pos);
+       }
+}
+
 int parse_commit_in_graph(struct commit *item)
 {
+       uint32_t pos;
+
        if (!core_commit_graph)
                return 0;
        if (item->object.parsed)
                return 1;
-
        prepare_commit_graph();
-       if (commit_graph) {
-               uint32_t pos;
-               int found;
-               if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
-                       pos = item->graph_pos;
-                       found = 1;
-               } else {
-                       found = bsearch_graph(commit_graph, &(item->object.oid), &pos);
-               }
-
-               if (found)
-                       return fill_commit_in_graph(item, commit_graph, pos);
-       }
-
+       if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
+               return fill_commit_in_graph(item, commit_graph, pos);
        return 0;
 }
 
+void load_commit_graph_info(struct commit *item)
+{
+       uint32_t pos;
+       if (!core_commit_graph)
+               return;
+       prepare_commit_graph();
+       if (commit_graph && find_commit_in_graph(item, commit_graph, &pos))
+               fill_commit_graph_info(item, commit_graph, pos);
+}
+
 static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
 {
        struct object_id oid;
@@ -440,6 +459,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
                else
                        packedDate[0] = 0;
 
+               packedDate[0] |= htonl((*list)->generation << 2);
+
                packedDate[1] = htonl((*list)->date);
                hashwrite(f, packedDate, 8);
 
@@ -572,6 +593,45 @@ static void close_reachable(struct packed_oid_list *oids)
        }
 }
 
+static void compute_generation_numbers(struct packed_commit_list* commits)
+{
+       int i;
+       struct commit_list *list = NULL;
+
+       for (i = 0; i < commits->nr; i++) {
+               if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY &&
+                   commits->list[i]->generation != GENERATION_NUMBER_ZERO)
+                       continue;
+
+               commit_list_insert(commits->list[i], &list);
+               while (list) {
+                       struct commit *current = list->item;
+                       struct commit_list *parent;
+                       int all_parents_computed = 1;
+                       uint32_t max_generation = 0;
+
+                       for (parent = current->parents; parent; parent = parent->next) {
+                               if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
+                                   parent->item->generation == GENERATION_NUMBER_ZERO) {
+                                       all_parents_computed = 0;
+                                       commit_list_insert(parent->item, &list);
+                                       break;
+                               } else if (parent->item->generation > max_generation) {
+                                       max_generation = parent->item->generation;
+                               }
+                       }
+
+                       if (all_parents_computed) {
+                               current->generation = max_generation + 1;
+                               pop_commit(&list);
+
+                               if (current->generation > GENERATION_NUMBER_MAX)
+                                       current->generation = GENERATION_NUMBER_MAX;
+                       }
+               }
+       }
+}
+
 void write_commit_graph(const char *obj_dir,
                        const char **pack_indexes,
                        int nr_packs,
@@ -584,7 +644,6 @@ void write_commit_graph(const char *obj_dir,
        struct hashfile *f;
        uint32_t i, count_distinct = 0;
        char *graph_name;
-       int fd;
        struct lock_file lk = LOCK_INIT;
        uint32_t chunk_ids[5];
        uint64_t chunk_offsets[5];
@@ -695,24 +754,14 @@ void write_commit_graph(const char *obj_dir,
        if (commits.nr >= GRAPH_PARENT_MISSING)
                die(_("too many commits to write graph"));
 
-       graph_name = get_commit_graph_filename(obj_dir);
-       fd = hold_lock_file_for_update(&lk, graph_name, 0);
-
-       if (fd < 0) {
-               struct strbuf folder = STRBUF_INIT;
-               strbuf_addstr(&folder, graph_name);
-               strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf);
-
-               if (mkdir(folder.buf, 0777) < 0)
-                       die_errno(_("cannot mkdir %s"), folder.buf);
-               strbuf_release(&folder);
-
-               fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
+       compute_generation_numbers(&commits);
 
-               if (fd < 0)
-                       die_errno("unable to create '%s'", graph_name);
-       }
+       graph_name = get_commit_graph_filename(obj_dir);
+       if (safe_create_leading_directories(graph_name))
+               die_errno(_("unable to create leading directories of %s"),
+                         graph_name);
 
+       hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
        f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
 
        hashwrite_be32(f, GRAPH_SIGNATURE);
index 260a468e73acd89b2a7706c859e8c25b264f3365..96cccb10f3d53a6da77f4ad06a971dbaa152f70c 100644 (file)
@@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
  */
 int parse_commit_in_graph(struct commit *item);
 
+/*
+ * It is possible that we loaded commit contents from the commit buffer,
+ * but we also want to ensure the commit-graph content is correctly
+ * checked and filled. Fill the graph_pos and generation members of
+ * the given commit.
+ */
+void load_commit_graph_info(struct commit *item);
+
 struct tree *get_commit_tree_in_graph(const struct commit *c);
 
 struct commit_graph {
index 2d4431a21fd16f55d873b6fe2ba1126722d3c4bf..298ad747c6e0d8b1e800b5e643ab0fa6f67cd491 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -344,7 +344,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
        return ret;
 }
 
-int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size)
+int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph)
 {
        const char *tail = buffer;
        const char *bufptr = buffer;
@@ -399,6 +399,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s
        }
        item->date = parse_commit_date(bufptr, tail);
 
+       if (check_graph)
+               load_commit_graph_info(item);
+
        return 0;
 }
 
@@ -425,7 +428,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing)
                return error("Object %s not a commit",
                             oid_to_hex(&item->object.oid));
        }
-       ret = parse_commit_buffer(item, buffer, size);
+       ret = parse_commit_buffer(item, buffer, size, 0);
        if (save_commit_buffer && !ret) {
                set_commit_buffer(item, buffer, size);
                return 0;
@@ -653,6 +656,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_,
        return 0;
 }
 
+int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused)
+{
+       const struct commit *a = a_, *b = b_;
+
+       /* newer commits first */
+       if (a->generation < b->generation)
+               return 1;
+       else if (a->generation > b->generation)
+               return -1;
+
+       /* use date as a heuristic when generations are equal */
+       if (a->date < b->date)
+               return 1;
+       else if (a->date > b->date)
+               return -1;
+       return 0;
+}
+
 int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
 {
        const struct commit *a = a_, *b = b_;
@@ -800,11 +821,14 @@ static int queue_has_nonstale(struct prio_queue *queue)
 }
 
 /* all input commits in one and twos[] must have been parsed! */
-static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos)
+static struct commit_list *paint_down_to_common(struct commit *one, int n,
+                                               struct commit **twos,
+                                               int min_generation)
 {
-       struct prio_queue queue = { compare_commits_by_commit_date };
+       struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
        struct commit_list *result = NULL;
        int i;
+       uint32_t last_gen = GENERATION_NUMBER_INFINITY;
 
        one->object.flags |= PARENT1;
        if (!n) {
@@ -823,6 +847,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc
                struct commit_list *parents;
                int flags;
 
+               if (commit->generation > last_gen)
+                       BUG("bad generation skip %8x > %8x at %s",
+                           commit->generation, last_gen,
+                           oid_to_hex(&commit->object.oid));
+               last_gen = commit->generation;
+
+               if (commit->generation < min_generation)
+                       break;
+
                flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
                if (flags == (PARENT1 | PARENT2)) {
                        if (!(commit->object.flags & RESULT)) {
@@ -871,7 +904,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co
                        return NULL;
        }
 
-       list = paint_down_to_common(one, n, twos);
+       list = paint_down_to_common(one, n, twos, 0);
 
        while (list) {
                struct commit *commit = pop_commit(&list);
@@ -929,6 +962,7 @@ static int remove_redundant(struct commit **array, int cnt)
                parse_commit(array[i]);
        for (i = 0; i < cnt; i++) {
                struct commit_list *common;
+               uint32_t min_generation = array[i]->generation;
 
                if (redundant[i])
                        continue;
@@ -937,8 +971,12 @@ static int remove_redundant(struct commit **array, int cnt)
                                continue;
                        filled_index[filled] = j;
                        work[filled++] = array[j];
+
+                       if (array[j]->generation < min_generation)
+                               min_generation = array[j]->generation;
                }
-               common = paint_down_to_common(array[i], filled, work);
+               common = paint_down_to_common(array[i], filled, work,
+                                             min_generation);
                if (array[i]->object.flags & PARENT2)
                        redundant[i] = 1;
                for (j = 0; j < filled; j++)
@@ -1048,14 +1086,21 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit *
 {
        struct commit_list *bases;
        int ret = 0, i;
+       uint32_t min_generation = GENERATION_NUMBER_INFINITY;
 
        if (parse_commit(commit))
                return ret;
-       for (i = 0; i < nr_reference; i++)
+       for (i = 0; i < nr_reference; i++) {
                if (parse_commit(reference[i]))
                        return ret;
+               if (reference[i]->generation < min_generation)
+                       min_generation = reference[i]->generation;
+       }
+
+       if (commit->generation > min_generation)
+               return ret;
 
-       bases = paint_down_to_common(commit, nr_reference, reference);
+       bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
        if (commit->object.flags & PARENT2)
                ret = 1;
        clear_commit_marks(commit, all_flags);
index e48f05dffcac2fc780386626701cdc64bd795076..cb943013d06a58f2e0427a33f5ab718df23e730f 100644 (file)
--- a/commit.h
+++ b/commit.h
@@ -10,6 +10,9 @@
 #include "pretty.h"
 
 #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF
+#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF
+#define GENERATION_NUMBER_MAX 0x3FFFFFFF
+#define GENERATION_NUMBER_ZERO 0
 
 struct commit_list {
        struct commit *item;
@@ -33,6 +36,7 @@ struct commit {
         */
        struct tree *maybe_tree;
        uint32_t graph_pos;
+       uint32_t generation;
        unsigned int index;
 };
 
@@ -72,7 +76,7 @@ struct commit *lookup_commit_reference_by_name(const char *name);
  */
 struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name);
 
-int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size);
+int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph);
 int parse_commit_gently(struct commit *item, int quiet_on_missing);
 static inline int parse_commit(struct commit *item)
 {
@@ -341,6 +345,7 @@ extern int remove_signature(struct strbuf *buf);
 extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc);
 
 int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused);
+int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused);
 
 LAST_ARG_MUST_BE_NULL
 extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...);
index f7f4de3aaf6d42a6dde5811b37aa8239ac183754..d33e2a5cc84623451a0cc02c0345233675b0340d 100644 (file)
--- a/object.c
+++ b/object.c
@@ -210,7 +210,7 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type
        } else if (type == OBJ_COMMIT) {
                struct commit *commit = lookup_commit(oid);
                if (commit) {
-                       if (parse_commit_buffer(commit, buffer, size))
+                       if (parse_commit_buffer(commit, buffer, size, 1))
                                return NULL;
                        if (!get_cached_commit_buffer(commit, NULL)) {
                                set_commit_buffer(commit, buffer, size);
index 01c1a82075eca737c2e3749e295ca24bbab870b6..fa3685d91f0464a28ef13102a1672215d4ec5177 100644 (file)
@@ -16,6 +16,7 @@
 #include "trailer.h"
 #include "wt-status.h"
 #include "commit-slab.h"
+#include "commit-graph.h"
 
 static struct ref_msg {
        const char *gone;
@@ -1662,12 +1663,13 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
 }
 
 /*
- * Test whether the candidate or one of its parents is contained in the list.
+ * Test whether the candidate is contained in the list.
  * Do not recurse to find out, though, but return -1 if inconclusive.
  */
 static enum contains_result contains_test(struct commit *candidate,
                                          const struct commit_list *want,
-                                         struct contains_cache *cache)
+                                         struct contains_cache *cache,
+                                         uint32_t cutoff)
 {
        enum contains_result *cached = contains_cache_at(cache, candidate);
 
@@ -1683,6 +1685,10 @@ static enum contains_result contains_test(struct commit *candidate,
 
        /* Otherwise, we don't know; prepare to recurse */
        parse_commit_or_die(candidate);
+
+       if (candidate->generation < cutoff)
+               return CONTAINS_NO;
+
        return CONTAINS_UNKNOWN;
 }
 
@@ -1698,8 +1704,18 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
                                              struct contains_cache *cache)
 {
        struct contains_stack contains_stack = { 0, 0, NULL };
-       enum contains_result result = contains_test(candidate, want, cache);
+       enum contains_result result;
+       uint32_t cutoff = GENERATION_NUMBER_INFINITY;
+       const struct commit_list *p;
+
+       for (p = want; p; p = p->next) {
+               struct commit *c = p->item;
+               load_commit_graph_info(c);
+               if (c->generation < cutoff)
+                       cutoff = c->generation;
+       }
 
+       result = contains_test(candidate, want, cache, cutoff);
        if (result != CONTAINS_UNKNOWN)
                return result;
 
@@ -1717,7 +1733,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
                 * If we just popped the stack, parents->item has been marked,
                 * therefore contains_test will return a meaningful yes/no.
                 */
-               else switch (contains_test(parents->item, want, cache)) {
+               else switch (contains_test(parents->item, want, cache, cutoff)) {
                case CONTAINS_YES:
                        *contains_cache_at(cache, commit) = CONTAINS_YES;
                        contains_stack.nr--;
@@ -1731,7 +1747,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
                }
        }
        free(contains_stack.contains_stack);
-       return contains_test(candidate, want, cache);
+       return contains_test(candidate, want, cache, cutoff);
 }
 
 static int commit_contains(struct ref_filter *filter, struct commit *commit,
index 695e5c62764cf4befe09467f1a658b25c01baba1..de4839e634c02cb7497f05a3a4597b9fa5123e32 100644 (file)
@@ -1801,7 +1801,7 @@ static void check_commit(const void *buf, size_t size)
 {
        struct commit c;
        memset(&c, 0, sizeof(c));
-       if (parse_commit_buffer(&c, buf, size))
+       if (parse_commit_buffer(&c, buf, size, 0))
                die("corrupt commit");
 }
 
index a380419b65bdf1ecace4c23831e76bc8a7d81962..77d85aefe7da18a0d7cc2db2baaedacead6b2e97 100755 (executable)
@@ -221,4 +221,13 @@ test_expect_success 'write graph in bare repo' '
 graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
 graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2
 
+test_expect_success 'perform fast-forward merge in full repo' '
+       cd "$TRASH_DIRECTORY/full" &&
+       git checkout -b merge-5-to-8 commits/5 &&
+       git merge commits/8 &&
+       git show-ref -s merge-5-to-8 >output &&
+       git show-ref -s commits/8 >expect &&
+       test_cmp expect output
+'
+
 test_done