http: never use curl_easy_perform
authorJeff King <peff@peff.net>
Tue, 18 Feb 2014 10:34:20 +0000 (05:34 -0500)
committerJunio C Hamano <gitster@pobox.com>
Tue, 18 Feb 2014 23:50:57 +0000 (15:50 -0800)
We currently don't reuse http connections when fetching via
the smart-http protocol. This is bad because the TCP
handshake introduces latency, and especially because SSL
connection setup may be non-trivial.

We can fix it by consistently using curl's "multi"
interface. The reason is rather complicated:

Our http code has two ways of being used: queuing many
"slots" to be fetched in parallel, or fetching a single
request in a blocking manner. The parallel code is built on
curl's "multi" interface. Most of the single-request code
uses http_request, which is built on top of the parallel
code (we just feed it one slot, and wait until it finishes).

However, one could also accomplish the single-request scheme
by avoiding curl's multi interface entirely and just using
curl_easy_perform. This is simpler, and is used by post_rpc
in the smart-http protocol.

It does work to use the same curl handle in both contexts,
as long as it is not at the same time. However, internally
curl may not share all of the cached resources between both
contexts. In particular, a connection formed using the
"multi" code will go into a reuse pool connected to the
"multi" object. Further requests using the "easy" interface
will not be able to reuse that connection.

The smart http protocol does ref discovery via http_request,
which uses the "multi" interface, and then follows up with
the "easy" interface for its rpc calls. As a result, we make
two HTTP connections rather than reusing a single one.

We could teach the ref discovery to use the "easy"
interface. But it is only once we have done this discovery
that we know whether the protocol will be smart or dumb. If
it is dumb, then our further requests, which want to fetch
objects in parallel, will not be able to reuse the same
connection.

Instead, this patch switches post_rpc to build on the
parallel interface, which means that we use it consistently
everywhere. It's a little more complicated to use, but since
we have the infrastructure already, it doesn't add any code;
we can just factor out the relevant bits from http_request.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
http.c
http.h
remote-curl.c
diff --git a/http.c b/http.c
index 70eaa26e88cbcfa62e7d5e967b4e519432319210..1212c587afbc4df6100f6a099939550eabeb5ebf 100644 (file)
--- a/http.c
+++ b/http.c
@@ -880,6 +880,20 @@ int handle_curl_result(struct slot_results *results)
        }
 }
 
+int run_one_slot(struct active_request_slot *slot,
+                struct slot_results *results)
+{
+       slot->results = results;
+       if (!start_active_slot(slot)) {
+               snprintf(curl_errorstr, sizeof(curl_errorstr),
+                        "failed to start HTTP request");
+               return HTTP_START_FAILED;
+       }
+
+       run_active_slot(slot);
+       return handle_curl_result(results);
+}
+
 static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf)
 {
        char *ptr;
@@ -907,7 +921,6 @@ static int http_request(const char *url,
        int ret;
 
        slot = get_active_slot();
-       slot->results = &results;
        curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
 
        if (result == NULL) {
@@ -942,14 +955,7 @@ static int http_request(const char *url,
        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
        curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "gzip");
 
-       if (start_active_slot(slot)) {
-               run_active_slot(slot);
-               ret = handle_curl_result(&results);
-       } else {
-               snprintf(curl_errorstr, sizeof(curl_errorstr),
-                        "failed to start HTTP request");
-               ret = HTTP_START_FAILED;
-       }
+       ret = run_one_slot(slot, &results);
 
        if (options && options->content_type)
                curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE,
diff --git a/http.h b/http.h
index cd37d5881cb0e63e52bd33d0b49bed49645340f5..a82888445e9f04acfe27e36b1f274ffb9659cddf 100644 (file)
--- a/http.h
+++ b/http.h
@@ -90,6 +90,15 @@ extern void finish_active_slot(struct active_request_slot *slot);
 extern void finish_all_active_slots(void);
 extern int handle_curl_result(struct slot_results *results);
 
+/*
+ * This will run one slot to completion in a blocking manner, similar to how
+ * curl_easy_perform would work (but we don't want to use that, because
+ * we do not want to intermingle calls to curl_multi and curl_easy).
+ *
+ */
+int run_one_slot(struct active_request_slot *slot,
+                struct slot_results *results);
+
 #ifdef USE_CURL_MULTI
 extern void fill_active_slots(void);
 extern void add_fill_function(void *data, int (*fill)(void *));
index 10cb0114eafdfd9760b3fdb3c0217d801195ba13..52c2d96ce6183ca8b3498794304fa8dc98f8f42a 100644 (file)
@@ -423,11 +423,8 @@ static int run_slot(struct active_request_slot *slot,
        if (!results)
                results = &results_buf;
 
-       slot->results = results;
-       slot->curl_result = curl_easy_perform(slot->curl);
-       finish_active_slot(slot);
+       err = run_one_slot(slot, results);
 
-       err = handle_curl_result(results);
        if (err != HTTP_OK && err != HTTP_REAUTH) {
                error("RPC failed; result=%d, HTTP code = %ld",
                      results->curl_result, results->http_code);