git tag --contains: avoid stack overflow
authorJean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Thu, 24 Apr 2014 12:24:39 +0000 (14:24 +0200)
committerJunio C Hamano <gitster@pobox.com>
Fri, 25 Apr 2014 16:35:20 +0000 (09:35 -0700)
In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Tests are run only if ulimit -s is available. This means they cannot
be run on Windows.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.lafay@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Tested-by: Stepan Kasal <kasal@ucw.cz>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/tag.c
t/t7004-tag.sh
index 6c7c6bde9de9cbc0ace94aa75df30b7b88b43aca..f3440023abecc672386381749d5e5855f3673ecf 100644 (file)
@@ -80,11 +80,19 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
        return 0;
 }
 
        return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+enum contains_result {
+       CONTAINS_UNKNOWN = -1,
+       CONTAINS_NO = 0,
+       CONTAINS_YES = 1,
+};
+
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static enum contains_result contains_test(struct commit *candidate,
                            const struct commit_list *want)
 {
                            const struct commit_list *want)
 {
-       struct commit_list *p;
-
        /* was it previously marked as containing a want commit? */
        if (candidate->object.flags & TMP_MARK)
                return 1;
        /* was it previously marked as containing a want commit? */
        if (candidate->object.flags & TMP_MARK)
                return 1;
@@ -92,26 +100,78 @@ static int contains_recurse(struct commit *candidate,
        if (candidate->object.flags & UNINTERESTING)
                return 0;
        /* or are we it? */
        if (candidate->object.flags & UNINTERESTING)
                return 0;
        /* or are we it? */
-       if (in_commit_list(want, candidate))
+       if (in_commit_list(want, candidate)) {
+               candidate->object.flags |= TMP_MARK;
                return 1;
                return 1;
+       }
 
        if (parse_commit(candidate) < 0)
                return 0;
 
 
        if (parse_commit(candidate) < 0)
                return 0;
 
-       /* Otherwise recurse and mark ourselves for future traversals. */
-       for (p = candidate->parents; p; p = p->next) {
-               if (contains_recurse(p->item, want)) {
-                       candidate->object.flags |= TMP_MARK;
-                       return 1;
-               }
-       }
-       candidate->object.flags |= UNINTERESTING;
-       return 0;
+       return -1;
 }
 
 }
 
-static int contains(struct commit *candidate, const struct commit_list *want)
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+       int nr, alloc;
+       struct stack_entry {
+               struct commit *commit;
+               struct commit_list *parents;
+       } *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+       int index = stack->nr++;
+       ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+       stack->stack[index].commit = candidate;
+       stack->stack[index].parents = candidate->parents;
+}
+
+static enum contains_result contains(struct commit *candidate,
+               const struct commit_list *want)
 {
 {
-       return contains_recurse(candidate, want);
+       struct stack stack = { 0, 0, NULL };
+       int result = contains_test(candidate, want);
+
+       if (result != CONTAINS_UNKNOWN)
+               return result;
+
+       push_to_stack(candidate, &stack);
+       while (stack.nr) {
+               struct stack_entry *entry = &stack.stack[stack.nr - 1];
+               struct commit *commit = entry->commit;
+               struct commit_list *parents = entry->parents;
+
+               if (!parents) {
+                       commit->object.flags |= UNINTERESTING;
+                       stack.nr--;
+               }
+               /*
+                * If we just popped the stack, parents->item has been marked,
+                * therefore contains_test will return a meaningful 0 or 1.
+                */
+               else switch (contains_test(parents->item, want)) {
+               case CONTAINS_YES:
+                       commit->object.flags |= TMP_MARK;
+                       stack.nr--;
+                       break;
+               case CONTAINS_NO:
+                       entry->parents = parents->next;
+                       break;
+               case CONTAINS_UNKNOWN:
+                       push_to_stack(parents->item, &stack);
+                       break;
+               }
+       }
+       free(stack.stack);
+       return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
index 143a8ea60507a35bbdfb310eb74a33cf5b88bf1f..e4ab0f5b64194da05b159782a5e41479bf47de20 100755 (executable)
@@ -1423,4 +1423,30 @@ EOF
        test_cmp expect actual
 '
 
        test_cmp expect actual
 '
 
+run_with_limited_stack () {
+       (ulimit -s 64 && "$@")
+}
+
+test_lazy_prereq ULIMIT 'run_with_limited_stack true'
+
+# we require ulimit, this excludes Windows
+test_expect_success ULIMIT '--contains works in a deep repo' '
+       >expect &&
+       i=1 &&
+       while test $i -lt 4000
+       do
+               echo "commit refs/heads/master
+committer A U Thor <author@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+               test $i = 1 && echo "from refs/heads/master^0"
+               i=$(($i + 1))
+       done | git fast-import &&
+       git checkout master &&
+       git tag far-far-away HEAD^ &&
+       run_with_limited_stack git tag --contains HEAD >actual &&
+       test_cmp expect actual
+'
+
 test_done
 test_done