Merge branch 'jk/http-backend-deadlock' into maint
authorJunio C Hamano <gitster@pobox.com>
Tue, 16 Jun 2015 21:33:45 +0000 (14:33 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 16 Jun 2015 21:33:45 +0000 (14:33 -0700)
Communication between the HTTP server and http_backend process can
lead to a dead-lock when relaying a large ref negotiation request.
Diagnose the situation better, and mitigate it by reading such a
request first into core (to a reasonable limit).

* jk/http-backend-deadlock:
http-backend: spool ref negotiation requests to buffer
t5551: factor out tag creation
http-backend: fix die recursion with custom handler

Documentation/git-http-backend.txt
http-backend.c
t/t5551-http-fetch-smart.sh
index 3ca18c4de519ebdf0a2af6509d0d6bed3e6e492c..9268fb6b1ea2de2b29fb077a1a675c727526abeb 100644 (file)
@@ -255,6 +255,15 @@ The GIT_HTTP_EXPORT_ALL environmental variable may be passed to
 'git-http-backend' to bypass the check for the "git-daemon-export-ok"
 file in each repository before allowing export of that repository.
 
+The `GIT_HTTP_MAX_REQUEST_BUFFER` environment variable (or the
+`http.maxRequestBuffer` config variable) may be set to change the
+largest ref negotiation request that git will handle during a fetch; any
+fetch requiring a larger buffer will not succeed.  This value should not
+normally need to be changed, but may be helpful if you are fetching from
+a repository with an extremely large number of refs.  The value can be
+specified with a unit (e.g., `100M` for 100 megabytes). The default is
+10 megabytes.
+
 The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and
 GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}',
 ensuring that any reflogs created by 'git-receive-pack' contain some
index b6c0484fb24de853ac205233e4e07d2e4e94ed75..6bf139b76873bd8553652a4658e6629b24aeeacb 100644 (file)
@@ -13,18 +13,20 @@ static const char content_type[] = "Content-Type";
 static const char content_length[] = "Content-Length";
 static const char last_modified[] = "Last-Modified";
 static int getanyfile = 1;
+static unsigned long max_request_buffer = 10 * 1024 * 1024;
 
 static struct string_list *query_params;
 
 struct rpc_service {
        const char *name;
        const char *config_name;
+       unsigned buffer_input : 1;
        signed enabled : 2;
 };
 
 static struct rpc_service rpc_service[] = {
-       { "upload-pack", "uploadpack", 1 },
-       { "receive-pack", "receivepack", -1 },
+       { "upload-pack", "uploadpack", 1, 1 },
+       { "receive-pack", "receivepack", 0, -1 },
 };
 
 static struct string_list *get_parameters(void)
@@ -225,6 +227,7 @@ static void http_config(void)
        struct strbuf var = STRBUF_INIT;
 
        git_config_get_bool("http.getanyfile", &getanyfile);
+       git_config_get_ulong("http.maxrequestbuffer", &max_request_buffer);
 
        for (i = 0; i < ARRAY_SIZE(rpc_service); i++) {
                struct rpc_service *svc = &rpc_service[i];
@@ -266,9 +269,52 @@ static struct rpc_service *select_service(const char *name)
        return svc;
 }
 
-static void inflate_request(const char *prog_name, int out)
+/*
+ * This is basically strbuf_read(), except that if we
+ * hit max_request_buffer we die (we'd rather reject a
+ * maliciously large request than chew up infinite memory).
+ */
+static ssize_t read_request(int fd, unsigned char **out)
+{
+       size_t len = 0, alloc = 8192;
+       unsigned char *buf = xmalloc(alloc);
+
+       if (max_request_buffer < alloc)
+               max_request_buffer = alloc;
+
+       while (1) {
+               ssize_t cnt;
+
+               cnt = read_in_full(fd, buf + len, alloc - len);
+               if (cnt < 0) {
+                       free(buf);
+                       return -1;
+               }
+
+               /* partial read from read_in_full means we hit EOF */
+               len += cnt;
+               if (len < alloc) {
+                       *out = buf;
+                       return len;
+               }
+
+               /* otherwise, grow and try again (if we can) */
+               if (alloc == max_request_buffer)
+                       die("request was larger than our maximum size (%lu);"
+                           " try setting GIT_HTTP_MAX_REQUEST_BUFFER",
+                           max_request_buffer);
+
+               alloc = alloc_nr(alloc);
+               if (alloc > max_request_buffer)
+                       alloc = max_request_buffer;
+               REALLOC_ARRAY(buf, alloc);
+       }
+}
+
+static void inflate_request(const char *prog_name, int out, int buffer_input)
 {
        git_zstream stream;
+       unsigned char *full_request = NULL;
        unsigned char in_buf[8192];
        unsigned char out_buf[8192];
        unsigned long cnt = 0;
@@ -277,11 +323,21 @@ static void inflate_request(const char *prog_name, int out)
        git_inflate_init_gzip_only(&stream);
 
        while (1) {
-               ssize_t n = xread(0, in_buf, sizeof(in_buf));
+               ssize_t n;
+
+               if (buffer_input) {
+                       if (full_request)
+                               n = 0; /* nothing left to read */
+                       else
+                               n = read_request(0, &full_request);
+                       stream.next_in = full_request;
+               } else {
+                       n = xread(0, in_buf, sizeof(in_buf));
+                       stream.next_in = in_buf;
+               }
+
                if (n <= 0)
                        die("request ended in the middle of the gzip stream");
-
-               stream.next_in = in_buf;
                stream.avail_in = n;
 
                while (0 < stream.avail_in) {
@@ -307,9 +363,22 @@ static void inflate_request(const char *prog_name, int out)
 done:
        git_inflate_end(&stream);
        close(out);
+       free(full_request);
+}
+
+static void copy_request(const char *prog_name, int out)
+{
+       unsigned char *buf;
+       ssize_t n = read_request(0, &buf);
+       if (n < 0)
+               die_errno("error reading request body");
+       if (write_in_full(out, buf, n) != n)
+               die("%s aborted reading request", prog_name);
+       close(out);
+       free(buf);
 }
 
-static void run_service(const char **argv)
+static void run_service(const char **argv, int buffer_input)
 {
        const char *encoding = getenv("HTTP_CONTENT_ENCODING");
        const char *user = getenv("REMOTE_USER");
@@ -334,7 +403,7 @@ static void run_service(const char **argv)
                                 "GIT_COMMITTER_EMAIL=%s@http.%s", user, host);
 
        cld.argv = argv;
-       if (gzipped_request)
+       if (buffer_input || gzipped_request)
                cld.in = -1;
        cld.git_cmd = 1;
        if (start_command(&cld))
@@ -342,7 +411,9 @@ static void run_service(const char **argv)
 
        close(1);
        if (gzipped_request)
-               inflate_request(argv[0], cld.in);
+               inflate_request(argv[0], cld.in, buffer_input);
+       else if (buffer_input)
+               copy_request(argv[0], cld.in);
        else
                close(0);
 
@@ -392,7 +463,7 @@ static void get_info_refs(char *arg)
                packet_flush(1);
 
                argv[0] = svc->name;
-               run_service(argv);
+               run_service(argv, 0);
 
        } else {
                select_getanyfile();
@@ -496,25 +567,28 @@ static void service_rpc(char *service_name)
        end_headers();
 
        argv[0] = svc->name;
-       run_service(argv);
+       run_service(argv, svc->buffer_input);
        strbuf_release(&buf);
 }
 
+static int dead;
 static NORETURN void die_webcgi(const char *err, va_list params)
 {
-       static int dead;
+       if (dead <= 1) {
+               vreportf("fatal: ", err, params);
 
-       if (!dead) {
-               dead = 1;
                http_status(500, "Internal Server Error");
                hdr_nocache();
                end_headers();
-
-               vreportf("fatal: ", err, params);
        }
        exit(0); /* we successfully reported a failure ;-) */
 }
 
+static int die_webcgi_recursing(void)
+{
+       return dead++ > 1;
+}
+
 static char* getdir(void)
 {
        struct strbuf buf = STRBUF_INIT;
@@ -569,6 +643,7 @@ int main(int argc, char **argv)
 
        git_extract_argv0_path(argv[0]);
        set_die_routine(die_webcgi);
+       set_die_is_recursing_routine(die_webcgi_recursing);
 
        if (!method)
                die("No REQUEST_METHOD from server");
@@ -619,6 +694,9 @@ int main(int argc, char **argv)
                not_found("Repository not exported: '%s'", dir);
 
        http_config();
+       max_request_buffer = git_env_ulong("GIT_HTTP_MAX_REQUEST_BUFFER",
+                                          max_request_buffer);
+
        cmd->imp(cmd_arg);
        return 0;
 }
index 2b293119015af7bfb8f151d2a3ec8c51c4c0b209..58207d88250f57d448f4e83baf2add3192cb9455 100755 (executable)
@@ -218,27 +218,35 @@ test_expect_success 'transfer.hiderefs works over smart-http' '
        git -C hidden.git rev-parse --verify b
 '
 
-test_expect_success 'create 2,000 tags in the repo' '
-       (
-       cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
-       for i in $(test_seq 2000)
+# create an arbitrary number of tags, numbered from tag-$1 to tag-$2
+create_tags () {
+       rm -f marks &&
+       for i in $(test_seq "$1" "$2")
        do
-               echo "commit refs/heads/too-many-refs"
-               echo "mark :$i"
-               echo "committer git <git@example.com> $i +0000"
-               echo "data 0"
-               echo "M 644 inline bla.txt"
-               echo "data 4"
-               echo "bla"
+               # don't use here-doc, because it requires a process
+               # per loop iteration
+               echo "commit refs/heads/too-many-refs-$1" &&
+               echo "mark :$i" &&
+               echo "committer git <git@example.com> $i +0000" &&
+               echo "data 0" &&
+               echo "M 644 inline bla.txt" &&
+               echo "data 4" &&
+               echo "bla" &&
                # make every commit dangling by always
                # rewinding the branch after each commit
-               echo "reset refs/heads/too-many-refs"
-               echo "from :1"
+               echo "reset refs/heads/too-many-refs-$1" &&
+               echo "from :$1"
        done | git fast-import --export-marks=marks &&
 
        # now assign tags to all the dangling commits we created above
        tag=$(perl -e "print \"bla\" x 30") &&
        sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" <marks >>packed-refs
+}
+
+test_expect_success 'create 2,000 tags in the repo' '
+       (
+               cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+               create_tags 1 2000
        )
 '
 
@@ -259,5 +267,20 @@ test_expect_success 'large fetch-pack requests can be split across POSTs' '
        test_line_count = 2 posts
 '
 
+test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' '
+       (
+               cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+               create_tags 2001 50000
+       ) &&
+       git -C too-many-refs fetch -q --tags &&
+       (
+               cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+               create_tags 50001 100000
+       ) &&
+       git -C too-many-refs fetch -q --tags &&
+       git -C too-many-refs for-each-ref refs/tags >tags &&
+       test_line_count = 100000 tags
+'
+
 stop_httpd
 test_done