Smart HTTP fetch: gzip requests
[gitweb.git] / remote-curl.c
index 5c9dd97d1da1f4896ebd3d9c5296308bd912d1e7..0d7cf16e9c06d556cad8d92b6598a8da079426a4 100644 (file)
@@ -5,6 +5,8 @@
 #include "http.h"
 #include "exec_cmd.h"
 #include "run-command.h"
+#include "pkt-line.h"
+#include "sideband.h"
 
 static struct remote *remote;
 static const char *url;
@@ -15,7 +17,8 @@ struct options {
        unsigned long depth;
        unsigned progress : 1,
                followtags : 1,
-               dry_run : 1;
+               dry_run : 1,
+               thin : 1;
 };
 static struct options options;
 
@@ -42,7 +45,7 @@ static int set_option(const char *name, const char *value)
                        options.progress = 0;
                else
                        return -1;
-               return 1 /* TODO implement later */;
+               return 0;
        }
        else if (!strcmp(name, "depth")) {
                char *end;
@@ -50,7 +53,7 @@ static int set_option(const char *name, const char *value)
                if (value == end || *end)
                        return -1;
                options.depth = v;
-               return 1 /* TODO implement later */;
+               return 0;
        }
        else if (!strcmp(name, "followtags")) {
                if (!strcmp(value, "true"))
@@ -59,7 +62,7 @@ static int set_option(const char *name, const char *value)
                        options.followtags = 0;
                else
                        return -1;
-               return 1 /* TODO implement later */;
+               return 0;
        }
        else if (!strcmp(name, "dry-run")) {
                if (!strcmp(value, "true"))
@@ -75,21 +78,46 @@ static int set_option(const char *name, const char *value)
        }
 }
 
-static struct ref *get_refs(void)
+struct discovery {
+       const char *service;
+       char *buf_alloc;
+       char *buf;
+       size_t len;
+       unsigned proto_git : 1;
+};
+static struct discovery *last_discovery;
+
+static void free_discovery(struct discovery *d)
+{
+       if (d) {
+               if (d == last_discovery)
+                       last_discovery = NULL;
+               free(d->buf_alloc);
+               free(d);
+       }
+}
+
+static struct discovery* discover_refs(const char *service)
 {
        struct strbuf buffer = STRBUF_INIT;
-       char *data, *start, *mid;
-       char *ref_name;
+       struct discovery *last = last_discovery;
        char *refs_url;
-       int i = 0;
-       int http_ret;
+       int http_ret, is_http = 0;
 
-       struct ref *refs = NULL;
-       struct ref *ref = NULL;
-       struct ref *last_ref = NULL;
+       if (last && !strcmp(service, last->service))
+               return last;
+       free_discovery(last);
 
-       refs_url = xmalloc(strlen(url) + 11);
-       sprintf(refs_url, "%s/info/refs", url);
+       strbuf_addf(&buffer, "%s/info/refs", url);
+       if (!prefixcmp(url, "http://") || !prefixcmp(url, "https://")) {
+               is_http = 1;
+               if (!strchr(url, '?'))
+                       strbuf_addch(&buffer, '?');
+               else
+                       strbuf_addch(&buffer, '&');
+               strbuf_addf(&buffer, "service=%s", service);
+       }
+       refs_url = strbuf_detach(&buffer, NULL);
 
        init_walker();
        http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
@@ -104,10 +132,86 @@ static struct ref *get_refs(void)
                die("HTTP request failed");
        }
 
-       data = buffer.buf;
+       last= xcalloc(1, sizeof(*last_discovery));
+       last->service = service;
+       last->buf_alloc = strbuf_detach(&buffer, &last->len);
+       last->buf = last->buf_alloc;
+
+       if (is_http && 5 <= last->len && last->buf[4] == '#') {
+               /* smart HTTP response; validate that the service
+                * pkt-line matches our request.
+                */
+               struct strbuf exp = STRBUF_INIT;
+
+               if (packet_get_line(&buffer, &last->buf, &last->len) <= 0)
+                       die("%s has invalid packet header", refs_url);
+               if (buffer.len && buffer.buf[buffer.len - 1] == '\n')
+                       strbuf_setlen(&buffer, buffer.len - 1);
+
+               strbuf_addf(&exp, "# service=%s", service);
+               if (strbuf_cmp(&exp, &buffer))
+                       die("invalid server response; got '%s'", buffer.buf);
+               strbuf_release(&exp);
+
+               /* The header can include additional metadata lines, up
+                * until a packet flush marker.  Ignore these now, but
+                * in the future we might start to scan them.
+                */
+               strbuf_reset(&buffer);
+               while (packet_get_line(&buffer, &last->buf, &last->len) > 0)
+                       strbuf_reset(&buffer);
+
+               last->proto_git = 1;
+       }
+
+       free(refs_url);
+       strbuf_release(&buffer);
+       last_discovery = last;
+       return last;
+}
+
+static int write_discovery(int fd, void *data)
+{
+       struct discovery *heads = data;
+       int err = 0;
+       if (write_in_full(fd, heads->buf, heads->len) != heads->len)
+               err = 1;
+       close(fd);
+       return err;
+}
+
+static struct ref *parse_git_refs(struct discovery *heads)
+{
+       struct ref *list = NULL;
+       struct async async;
+
+       memset(&async, 0, sizeof(async));
+       async.proc = write_discovery;
+       async.data = heads;
+
+       if (start_async(&async))
+               die("cannot start thread to parse advertised refs");
+       get_remote_heads(async.out, &list, 0, NULL, 0, NULL);
+       close(async.out);
+       if (finish_async(&async))
+               die("ref parsing thread failed");
+       return list;
+}
+
+static struct ref *parse_info_refs(struct discovery *heads)
+{
+       char *data, *start, *mid;
+       char *ref_name;
+       int i = 0;
+
+       struct ref *refs = NULL;
+       struct ref *ref = NULL;
+       struct ref *last_ref = NULL;
+
+       data = heads->buf;
        start = NULL;
        mid = data;
-       while (i < buffer.len) {
+       while (i < heads->len) {
                if (!start) {
                        start = &data[i];
                }
@@ -131,8 +235,7 @@ static struct ref *get_refs(void)
                i++;
        }
 
-       strbuf_release(&buffer);
-
+       init_walker();
        ref = alloc_ref("HEAD");
        if (!walker->fetch_ref(walker, ref) &&
            !resolve_remote_symref(ref, refs)) {
@@ -142,11 +245,23 @@ static struct ref *get_refs(void)
                free(ref);
        }
 
-       strbuf_release(&buffer);
-       free(refs_url);
        return refs;
 }
 
+static struct ref *get_refs(int for_push)
+{
+       struct discovery *heads;
+
+       if (for_push)
+               heads = discover_refs("git-receive-pack");
+       else
+               heads = discover_refs("git-upload-pack");
+
+       if (heads->proto_git)
+               return parse_git_refs(heads);
+       return parse_info_refs(heads);
+}
+
 static void output_refs(struct ref *refs)
 {
        struct ref *posn;
@@ -161,11 +276,244 @@ static void output_refs(struct ref *refs)
        free_refs(refs);
 }
 
+struct rpc_state {
+       const char *service_name;
+       const char **argv;
+       char *service_url;
+       char *hdr_content_type;
+       char *hdr_accept;
+       char *buf;
+       size_t alloc;
+       size_t len;
+       size_t pos;
+       int in;
+       int out;
+       struct strbuf result;
+       unsigned gzip_request : 1;
+};
+
+static size_t rpc_out(void *ptr, size_t eltsize,
+               size_t nmemb, void *buffer_)
+{
+       size_t max = eltsize * nmemb;
+       struct rpc_state *rpc = buffer_;
+       size_t avail = rpc->len - rpc->pos;
+
+       if (!avail) {
+               avail = packet_read_line(rpc->out, rpc->buf, rpc->alloc);
+               if (!avail)
+                       return 0;
+               rpc->pos = 0;
+               rpc->len = avail;
+       }
+
+       if (max < avail);
+               avail = max;
+       memcpy(ptr, rpc->buf + rpc->pos, avail);
+       rpc->pos += avail;
+       return avail;
+}
+
+static size_t rpc_in(const void *ptr, size_t eltsize,
+               size_t nmemb, void *buffer_)
+{
+       size_t size = eltsize * nmemb;
+       struct rpc_state *rpc = buffer_;
+       write_or_die(rpc->in, ptr, size);
+       return size;
+}
+
+static int post_rpc(struct rpc_state *rpc)
+{
+       struct active_request_slot *slot;
+       struct slot_results results;
+       struct curl_slist *headers = NULL;
+       int use_gzip = rpc->gzip_request;
+       char *gzip_body = NULL;
+       int err = 0, large_request = 0;
+
+       /* Try to load the entire request, if we can fit it into the
+        * allocated buffer space we can use HTTP/1.0 and avoid the
+        * chunked encoding mess.
+        */
+       while (1) {
+               size_t left = rpc->alloc - rpc->len;
+               char *buf = rpc->buf + rpc->len;
+               int n;
+
+               if (left < LARGE_PACKET_MAX) {
+                       large_request = 1;
+                       use_gzip = 0;
+                       break;
+               }
+
+               n = packet_read_line(rpc->out, buf, left);
+               if (!n)
+                       break;
+               rpc->len += n;
+       }
+
+       slot = get_active_slot();
+       slot->results = &results;
+
+       curl_easy_setopt(slot->curl, CURLOPT_POST, 1);
+       curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
+       curl_easy_setopt(slot->curl, CURLOPT_URL, rpc->service_url);
+       curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "");
+
+       headers = curl_slist_append(headers, rpc->hdr_content_type);
+       headers = curl_slist_append(headers, rpc->hdr_accept);
+
+       if (large_request) {
+               /* The request body is large and the size cannot be predicted.
+                * We must use chunked encoding to send it.
+                */
+               headers = curl_slist_append(headers, "Expect: 100-continue");
+               headers = curl_slist_append(headers, "Transfer-Encoding: chunked");
+               curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, rpc_out);
+               curl_easy_setopt(slot->curl, CURLOPT_INFILE, rpc);
+               if (options.verbosity > 1) {
+                       fprintf(stderr, "POST %s (chunked)\n", rpc->service_name);
+                       fflush(stderr);
+               }
+
+       } else if (use_gzip && 1024 < rpc->len) {
+               /* The client backend isn't giving us compressed data so
+                * we can try to deflate it ourselves, this may save on.
+                * the transfer time.
+                */
+               size_t size;
+               z_stream stream;
+               int ret;
+
+               memset(&stream, 0, sizeof(stream));
+               ret = deflateInit2(&stream, Z_BEST_COMPRESSION,
+                               Z_DEFLATED, (15 + 16),
+                               8, Z_DEFAULT_STRATEGY);
+               if (ret != Z_OK)
+                       die("cannot deflate request; zlib init error %d", ret);
+               size = deflateBound(&stream, rpc->len);
+               gzip_body = xmalloc(size);
+
+               stream.next_in = (unsigned char *)rpc->buf;
+               stream.avail_in = rpc->len;
+               stream.next_out = (unsigned char *)gzip_body;
+               stream.avail_out = size;
+
+               ret = deflate(&stream, Z_FINISH);
+               if (ret != Z_STREAM_END)
+                       die("cannot deflate request; zlib deflate error %d", ret);
+
+               ret = deflateEnd(&stream);
+               if (ret != Z_OK)
+                       die("cannot deflate request; zlib end error %d", ret);
+
+               size = stream.total_out;
+
+               headers = curl_slist_append(headers, "Content-Encoding: gzip");
+               curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
+               curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, size);
+
+               if (options.verbosity > 1) {
+                       fprintf(stderr, "POST %s (gzip %lu to %lu bytes)\n",
+                               rpc->service_name,
+                               (unsigned long)rpc->len, (unsigned long)size);
+                       fflush(stderr);
+               }
+       } else {
+               /* We know the complete request size in advance, use the
+                * more normal Content-Length approach.
+                */
+               curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, rpc->buf);
+               curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, rpc->len);
+               if (options.verbosity > 1) {
+                       fprintf(stderr, "POST %s (%lu bytes)\n",
+                               rpc->service_name, (unsigned long)rpc->len);
+                       fflush(stderr);
+               }
+       }
+
+       curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
+       curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, rpc_in);
+       curl_easy_setopt(slot->curl, CURLOPT_FILE, rpc);
+
+       slot->curl_result = curl_easy_perform(slot->curl);
+       finish_active_slot(slot);
+
+       if (results.curl_result != CURLE_OK) {
+               err |= error("RPC failed; result=%d, HTTP code = %ld",
+                       results.curl_result, results.http_code);
+       }
+
+       curl_slist_free_all(headers);
+       free(gzip_body);
+       return err;
+}
+
+static int rpc_service(struct rpc_state *rpc, struct discovery *heads)
+{
+       const char *svc = rpc->service_name;
+       struct strbuf buf = STRBUF_INIT;
+       struct child_process client;
+       int err = 0;
+
+       init_walker();
+       memset(&client, 0, sizeof(client));
+       client.in = -1;
+       client.out = -1;
+       client.git_cmd = 1;
+       client.argv = rpc->argv;
+       if (start_command(&client))
+               exit(1);
+       if (heads)
+               write_or_die(client.in, heads->buf, heads->len);
+
+       rpc->alloc = http_post_buffer;
+       rpc->buf = xmalloc(rpc->alloc);
+       rpc->in = client.in;
+       rpc->out = client.out;
+       strbuf_init(&rpc->result, 0);
+
+       strbuf_addf(&buf, "%s/%s", url, svc);
+       rpc->service_url = strbuf_detach(&buf, NULL);
+
+       strbuf_addf(&buf, "Content-Type: application/x-%s-request", svc);
+       rpc->hdr_content_type = strbuf_detach(&buf, NULL);
+
+       strbuf_addf(&buf, "Accept: application/x-%s-response", svc);
+       rpc->hdr_accept = strbuf_detach(&buf, NULL);
+
+       while (!err) {
+               int n = packet_read_line(rpc->out, rpc->buf, rpc->alloc);
+               if (!n)
+                       break;
+               rpc->pos = 0;
+               rpc->len = n;
+               err |= post_rpc(rpc);
+       }
+       strbuf_read(&rpc->result, client.out, 0);
+
+       close(client.in);
+       close(client.out);
+       client.in = -1;
+       client.out = -1;
+
+       err |= finish_command(&client);
+       free(rpc->service_url);
+       free(rpc->hdr_content_type);
+       free(rpc->hdr_accept);
+       free(rpc->buf);
+       strbuf_release(&buf);
+       return err;
+}
+
 static int fetch_dumb(int nr_heads, struct ref **to_fetch)
 {
        char **targets = xmalloc(nr_heads * sizeof(char*));
        int ret, i;
 
+       if (options.depth)
+               die("dumb http transport does not support --depth");
        for (i = 0; i < nr_heads; i++)
                targets[i] = xstrdup(sha1_to_hex(to_fetch[i]->old_sha1));
 
@@ -184,6 +532,66 @@ static int fetch_dumb(int nr_heads, struct ref **to_fetch)
        return ret ? error("Fetch failed.") : 0;
 }
 
+static int fetch_git(struct discovery *heads,
+       int nr_heads, struct ref **to_fetch)
+{
+       struct rpc_state rpc;
+       char *depth_arg = NULL;
+       const char **argv;
+       int argc = 0, i, err;
+
+       argv = xmalloc((15 + nr_heads) * sizeof(char*));
+       argv[argc++] = "fetch-pack";
+       argv[argc++] = "--stateless-rpc";
+       argv[argc++] = "--lock-pack";
+       if (options.followtags)
+               argv[argc++] = "--include-tag";
+       if (options.thin)
+               argv[argc++] = "--thin";
+       if (options.verbosity >= 3) {
+               argv[argc++] = "-v";
+               argv[argc++] = "-v";
+       }
+       if (!options.progress)
+               argv[argc++] = "--no-progress";
+       if (options.depth) {
+               struct strbuf buf = STRBUF_INIT;
+               strbuf_addf(&buf, "--depth=%lu", options.depth);
+               depth_arg = strbuf_detach(&buf, NULL);
+               argv[argc++] = depth_arg;
+       }
+       argv[argc++] = url;
+       for (i = 0; i < nr_heads; i++) {
+               struct ref *ref = to_fetch[i];
+               if (!ref->name || !*ref->name)
+                       die("cannot fetch by sha1 over smart http");
+               argv[argc++] = ref->name;
+       }
+       argv[argc++] = NULL;
+
+       memset(&rpc, 0, sizeof(rpc));
+       rpc.service_name = "git-upload-pack",
+       rpc.argv = argv;
+       rpc.gzip_request = 1;
+
+       err = rpc_service(&rpc, heads);
+       if (rpc.result.len)
+               safe_write(1, rpc.result.buf, rpc.result.len);
+       strbuf_release(&rpc.result);
+       free(argv);
+       free(depth_arg);
+       return err;
+}
+
+static int fetch(int nr_heads, struct ref **to_fetch)
+{
+       struct discovery *d = discover_refs("git-upload-pack");
+       if (d->proto_git)
+               return fetch_git(d, nr_heads, to_fetch);
+       else
+               return fetch_dumb(nr_heads, to_fetch);
+}
+
 static void parse_fetch(struct strbuf *buf)
 {
        struct ref **to_fetch = NULL;
@@ -226,7 +634,7 @@ static void parse_fetch(struct strbuf *buf)
                        break;
        } while (1);
 
-       if (fetch_dumb(nr_heads, to_fetch))
+       if (fetch(nr_heads, to_fetch))
                exit(128); /* error already reported */
        free_refs(list_head);
        free(to_fetch);
@@ -258,6 +666,52 @@ static int push_dav(int nr_spec, char **specs)
        return 0;
 }
 
+static int push_git(struct discovery *heads, int nr_spec, char **specs)
+{
+       struct rpc_state rpc;
+       const char **argv;
+       int argc = 0, i, err;
+
+       argv = xmalloc((10 + nr_spec) * sizeof(char*));
+       argv[argc++] = "send-pack";
+       argv[argc++] = "--stateless-rpc";
+       argv[argc++] = "--helper-status";
+       if (options.thin)
+               argv[argc++] = "--thin";
+       if (options.dry_run)
+               argv[argc++] = "--dry-run";
+       if (options.verbosity > 1)
+               argv[argc++] = "--verbose";
+       argv[argc++] = url;
+       for (i = 0; i < nr_spec; i++)
+               argv[argc++] = specs[i];
+       argv[argc++] = NULL;
+
+       memset(&rpc, 0, sizeof(rpc));
+       rpc.service_name = "git-receive-pack",
+       rpc.argv = argv;
+
+       err = rpc_service(&rpc, heads);
+       if (rpc.result.len)
+               safe_write(1, rpc.result.buf, rpc.result.len);
+       strbuf_release(&rpc.result);
+       free(argv);
+       return err;
+}
+
+static int push(int nr_spec, char **specs)
+{
+       struct discovery *heads = discover_refs("git-receive-pack");
+       int ret;
+
+       if (heads->proto_git)
+               ret = push_git(heads, nr_spec, specs);
+       else
+               ret = push_dav(nr_spec, specs);
+       free_discovery(heads);
+       return ret;
+}
+
 static void parse_push(struct strbuf *buf)
 {
        char **specs = NULL;
@@ -278,7 +732,7 @@ static void parse_push(struct strbuf *buf)
                        break;
        } while (1);
 
-       if (push_dav(nr_spec, specs))
+       if (push(nr_spec, specs))
                exit(128); /* error already reported */
        for (i = 0; i < nr_spec; i++)
                free(specs[i]);
@@ -301,6 +755,7 @@ int main(int argc, const char **argv)
 
        options.verbosity = 1;
        options.progress = !!isatty(2);
+       options.thin = 1;
 
        remote = remote_get(argv[1]);
 
@@ -317,7 +772,8 @@ int main(int argc, const char **argv)
                        parse_fetch(&buf);
 
                } else if (!strcmp(buf.buf, "list") || !prefixcmp(buf.buf, "list ")) {
-                       output_refs(get_refs());
+                       int for_push = !!strstr(buf.buf + 4, "for-push");
+                       output_refs(get_refs(for_push));
 
                } else if (!prefixcmp(buf.buf, "push ")) {
                        parse_push(&buf);