quickfetch(): Prevent overflow of the rev-list command line
authorJohan Herland <johan@herland.net>
Thu, 9 Jul 2009 23:52:30 +0000 (01:52 +0200)
committerJunio C Hamano <gitster@pobox.com>
Sat, 11 Jul 2009 06:53:17 +0000 (23:53 -0700)
quickfetch() calls rev-list to check whether the objects we are about to
fetch are already present in the repo (if so, we can skip the object fetch).
However, when there are many (~1000) refs to be fetched, the rev-list
command line grows larger than the maximum command line size on some systems
(32K in Windows). This causes rev-list to fail, making quickfetch() return
non-zero, which unnecessarily triggers the transport machinery. This somehow
causes fetch to fail with an exit code.

By using the --stdin option to rev-list (and feeding the object list to its
standard input), we prevent the overflow of the rev-list command line,
which causes quickfetch(), and subsequently the overall fetch, to succeed.

However, using rev-list --stdin is not entirely straightforward: rev-list
terminates immediately when encountering an unknown object, which can
trigger SIGPIPE if we are still writing object's to its standard input.
We therefore temporarily ignore SIGPIPE so that the fetch process is not
terminated.

The patch also contains a testcase to verify the fix (note that before
the patch, the testcase would only fail on msysGit).

Signed-off-by: Johan Herland <johan@herland.net>
Improved-by: Johannes Sixt <j6t@kdbg.org>
Improved-by: Alex Riesen <raa.lkml@gmail.com>
Tested-by: Peter Krefting <peter@softwolves.pp.se>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-fetch.c
t/t5502-quickfetch.sh
index cd5eb9aff530fcb9629bd561bc774e2b9ad69afc..817dd6bff0bf0ddc9a3d00191bb3c422300b878b 100644 (file)
@@ -400,14 +400,14 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
 
 /*
  * We would want to bypass the object transfer altogether if
- * everything we are going to fetch already exists and connected
+ * everything we are going to fetch already exists and is connected
  * locally.
  *
- * The refs we are going to fetch are in to_fetch (nr_heads in
- * total).  If running
+ * The refs we are going to fetch are in ref_map.  If running
  *
- *  $ git rev-list --objects to_fetch[0] to_fetch[1] ... --not --all
+ *  $ git rev-list --objects --stdin --not --all
  *
+ * (feeding all the refs in ref_map on its standard input)
  * does not error out, that means everything reachable from the
  * refs we are going to fetch exists and is connected to some of
  * our existing refs.
@@ -416,8 +416,9 @@ static int quickfetch(struct ref *ref_map)
 {
        struct child_process revlist;
        struct ref *ref;
-       char **argv;
-       int i, err;
+       int err;
+       const char *argv[] = {"rev-list",
+               "--quiet", "--objects", "--stdin", "--not", "--all", NULL};
 
        /*
         * If we are deepening a shallow clone we already have these
@@ -429,34 +430,46 @@ static int quickfetch(struct ref *ref_map)
        if (depth)
                return -1;
 
-       for (i = 0, ref = ref_map; ref; ref = ref->next)
-               i++;
-       if (!i)
+       if (!ref_map)
                return 0;
 
-       argv = xmalloc(sizeof(*argv) * (i + 6));
-       i = 0;
-       argv[i++] = xstrdup("rev-list");
-       argv[i++] = xstrdup("--quiet");
-       argv[i++] = xstrdup("--objects");
-       for (ref = ref_map; ref; ref = ref->next)
-               argv[i++] = xstrdup(sha1_to_hex(ref->old_sha1));
-       argv[i++] = xstrdup("--not");
-       argv[i++] = xstrdup("--all");
-       argv[i++] = NULL;
-
        memset(&revlist, 0, sizeof(revlist));
-       revlist.argv = (const char**)argv;
+       revlist.argv = argv;
        revlist.git_cmd = 1;
-       revlist.no_stdin = 1;
        revlist.no_stdout = 1;
        revlist.no_stderr = 1;
-       err = run_command(&revlist);
+       revlist.in = -1;
+
+       err = start_command(&revlist);
+       if (err) {
+               error("could not run rev-list");
+               return err;
+       }
+
+       /*
+        * If rev-list --stdin encounters an unknown commit, it terminates,
+        * which will cause SIGPIPE in the write loop below.
+        */
+       sigchain_push(SIGPIPE, SIG_IGN);
+
+       for (ref = ref_map; ref; ref = ref->next) {
+               if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 ||
+                   write_in_full(revlist.in, "\n", 1) < 0) {
+                       if (errno != EPIPE && errno != EINVAL)
+                               error("failed write to rev-list: %s", strerror(errno));
+                       err = -1;
+                       break;
+               }
+       }
+
+       if (close(revlist.in)) {
+               error("failed to close rev-list's stdin: %s", strerror(errno));
+               err = -1;
+       }
+
+       sigchain_pop(SIGPIPE);
 
-       for (i = 0; argv[i]; i++)
-               free(argv[i]);
-       free(argv);
-       return err;
+       return finish_command(&revlist) || err;
 }
 
 static int fetch_refs(struct transport *transport, struct ref *ref_map)
index 16eadd6b68664884836976aafb6dcbb582603c09..1037a723fe74756f241346a077f4f3682dbbf45d 100755 (executable)
@@ -119,4 +119,24 @@ test_expect_success 'quickfetch should not copy from alternate' '
 
 '
 
+test_expect_success 'quickfetch should handle ~1000 refs (on Windows)' '
+
+       git gc &&
+       head=$(git rev-parse HEAD) &&
+       branchprefix="$head refs/heads/branch" &&
+       for i in 0 1 2 3 4 5 6 7 8 9; do
+               for j in 0 1 2 3 4 5 6 7 8 9; do
+                       for k in 0 1 2 3 4 5 6 7 8 9; do
+                               echo "$branchprefix$i$j$k" >> .git/packed-refs
+                       done
+               done
+       done &&
+       (
+               cd cloned &&
+               git fetch &&
+               git fetch
+       )
+
+'
+
 test_done