Merge branch 'ew/http-walker' into jk/http-walker-limit-redirect
authorJunio C Hamano <gitster@pobox.com>
Tue, 6 Dec 2016 20:40:41 +0000 (12:40 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 6 Dec 2016 20:43:23 +0000 (12:43 -0800)
* ew/http-walker:
list: avoid incompatibility with *BSD sys/queue.h
http-walker: reduce O(n) ops with doubly-linked list
http: avoid disconnecting on 404s for loose objects
http-walker: remove unused parameter from fetch_object

1  2 
http-walker.c
http.c
diff --combined http-walker.c
index 4bff31c444ca1a44a288749189ada02496e91016,0b2425531a8120fb29f37ee473a1e6e974959605..25a8b1ad4b245db8e4ff313c27f96d40dffafe24
@@@ -2,6 -2,7 +2,7 @@@
  #include "commit.h"
  #include "walker.h"
  #include "http.h"
+ #include "list.h"
  
  struct alt_base {
        char *base;
@@@ -23,7 -24,7 +24,7 @@@ struct object_request 
        struct alt_base *repo;
        enum object_request_state state;
        struct http_object_request *req;
-       struct object_request *next;
+       struct list_head node;
  };
  
  struct alternates_request {
@@@ -41,7 -42,7 +42,7 @@@ struct walker_data 
        struct alt_base *alt;
  };
  
- static struct object_request *object_queue_head;
+ static LIST_HEAD(object_queue_head);
  
  static void fetch_alternates(struct walker *walker, const char *base);
  
@@@ -110,19 -111,10 +111,10 @@@ static void process_object_response(voi
  
  static void release_object_request(struct object_request *obj_req)
  {
-       struct object_request *entry = object_queue_head;
        if (obj_req->req !=NULL && obj_req->req->localfile != -1)
                error("fd leakage in release: %d", obj_req->req->localfile);
-       if (obj_req == object_queue_head) {
-               object_queue_head = obj_req->next;
-       } else {
-               while (entry->next != NULL && entry->next != obj_req)
-                       entry = entry->next;
-               if (entry->next == obj_req)
-                       entry->next = entry->next->next;
-       }
  
+       list_del(&obj_req->node);
        free(obj_req);
  }
  
  static int fill_active_slot(struct walker *walker)
  {
        struct object_request *obj_req;
+       struct list_head *pos, *tmp, *head = &object_queue_head;
  
-       for (obj_req = object_queue_head; obj_req; obj_req = obj_req->next) {
+       list_for_each_safe(pos, tmp, head) {
+               obj_req = list_entry(pos, struct object_request, node);
                if (obj_req->state == WAITING) {
                        if (has_sha1_file(obj_req->sha1))
                                obj_req->state = COMPLETE;
  static void prefetch(struct walker *walker, unsigned char *sha1)
  {
        struct object_request *newreq;
-       struct object_request *tail;
        struct walker_data *data = walker->data;
  
        newreq = xmalloc(sizeof(*newreq));
        newreq->repo = data->alt;
        newreq->state = WAITING;
        newreq->req = NULL;
-       newreq->next = NULL;
  
        http_is_verbose = walker->get_verbosely;
-       if (object_queue_head == NULL) {
-               object_queue_head = newreq;
-       } else {
-               tail = object_queue_head;
-               while (tail->next != NULL)
-                       tail = tail->next;
-               tail->next = newreq;
-       }
+       list_add_tail(&newreq->node, &object_queue_head);
  
  #ifdef USE_CURL_MULTI
        fill_active_slots();
@@@ -290,8 -274,9 +274,8 @@@ static void process_alternates_response
                                struct strbuf target = STRBUF_INIT;
                                strbuf_add(&target, base, serverlen);
                                strbuf_add(&target, data + i, posn - i - 7);
 -                              if (walker->get_verbosely)
 -                                      fprintf(stderr, "Also look at %s\n",
 -                                              target.buf);
 +                              warning("adding alternate object store: %s",
 +                                      target.buf);
                                newalt = xmalloc(sizeof(*newalt));
                                newalt->next = NULL;
                                newalt->base = strbuf_detach(&target, NULL);
@@@ -317,9 -302,6 +301,9 @@@ static void fetch_alternates(struct wal
        struct alternates_request alt_req;
        struct walker_data *cdata = walker->data;
  
 +      if (http_follow_config != HTTP_FOLLOW_ALWAYS)
 +              return;
 +
        /*
         * If another request has already started fetching alternates,
         * wait for them to arrive and return to processing this request's
@@@ -449,15 -431,19 +433,19 @@@ static void abort_object_request(struc
        release_object_request(obj_req);
  }
  
- static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
+ static int fetch_object(struct walker *walker, unsigned char *sha1)
  {
        char *hex = sha1_to_hex(sha1);
        int ret = 0;
-       struct object_request *obj_req = object_queue_head;
+       struct object_request *obj_req = NULL;
        struct http_object_request *req;
+       struct list_head *pos, *head = &object_queue_head;
  
-       while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
-               obj_req = obj_req->next;
+       list_for_each(pos, head) {
+               obj_req = list_entry(pos, struct object_request, node);
+               if (!hashcmp(obj_req->sha1, sha1))
+                       break;
+       }
        if (obj_req == NULL)
                return error("Couldn't find request for %s in the queue", hex);
  
                req->localfile = -1;
        }
  
+       /*
+        * we turned off CURLOPT_FAILONERROR to avoid losing a
+        * persistent connection and got CURLE_OK.
+        */
+       if (req->http_code == 404 && req->curl_result == CURLE_OK &&
+                       (starts_with(req->url, "http://") ||
+                        starts_with(req->url, "https://")))
+               req->curl_result = CURLE_HTTP_RETURNED_ERROR;
        if (obj_req->state == ABORTED) {
                ret = error("Request for %s aborted", hex);
        } else if (req->curl_result != CURLE_OK &&
@@@ -520,7 -515,7 +517,7 @@@ static int fetch(struct walker *walker
        struct walker_data *data = walker->data;
        struct alt_base *altbase = data->alt;
  
-       if (!fetch_object(walker, altbase, sha1))
+       if (!fetch_object(walker, sha1))
                return 0;
        while (altbase) {
                if (!http_fetch_pack(walker, altbase, sha1))
diff --combined http.c
index a9778bfa447f2afe89af3fe1ad3527d5b8d64791,6f12661a1411a83f2874610e6df6f21f8846e36f..7b66519b676f2d64f4f95090e2490941c3d3f317
--- 1/http.c
--- 2/http.c
+++ b/http.c
@@@ -98,8 -98,6 +98,8 @@@ static int http_proactive_auth
  static const char *user_agent;
  static int curl_empty_auth;
  
 +enum http_follow_config http_follow_config = HTTP_FOLLOW_INITIAL;
 +
  #if LIBCURL_VERSION_NUM >= 0x071700
  /* Use CURLOPT_KEYPASSWD as is */
  #elif LIBCURL_VERSION_NUM >= 0x070903
@@@ -339,16 -337,6 +339,16 @@@ static int http_options(const char *var
                return 0;
        }
  
 +      if (!strcmp("http.followredirects", var)) {
 +              if (value && !strcmp(value, "initial"))
 +                      http_follow_config = HTTP_FOLLOW_INITIAL;
 +              else if (git_config_bool(var, value))
 +                      http_follow_config = HTTP_FOLLOW_ALWAYS;
 +              else
 +                      http_follow_config = HTTP_FOLLOW_NONE;
 +              return 0;
 +      }
 +
        /* Fall back on the default ones */
        return git_default_config(var, value, cb);
  }
@@@ -565,6 -553,7 +565,6 @@@ static CURL *get_curl_handle(void
                                 curl_low_speed_time);
        }
  
 -      curl_easy_setopt(result, CURLOPT_FOLLOWLOCATION, 1);
        curl_easy_setopt(result, CURLOPT_MAXREDIRS, 20);
  #if LIBCURL_VERSION_NUM >= 0x071301
        curl_easy_setopt(result, CURLOPT_POSTREDIR, CURL_REDIR_POST_ALL);
        if (is_transport_allowed("ftps"))
                allowed_protocols |= CURLPROTO_FTPS;
        curl_easy_setopt(result, CURLOPT_REDIR_PROTOCOLS, allowed_protocols);
 +      curl_easy_setopt(result, CURLOPT_PROTOCOLS, allowed_protocols);
  #else
        if (transport_restrict_protocols())
                warning("protocol restrictions not applied to curl redirects because\n"
@@@ -894,16 -882,6 +894,16 @@@ struct active_request_slot *get_active_
        curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 1);
        curl_easy_setopt(slot->curl, CURLOPT_RANGE, NULL);
  
 +      /*
 +       * Default following to off unless "ALWAYS" is configured; this gives
 +       * callers a sane starting point, and they can tweak for individual
 +       * HTTP_FOLLOW_* cases themselves.
 +       */
 +      if (http_follow_config == HTTP_FOLLOW_ALWAYS)
 +              curl_easy_setopt(slot->curl, CURLOPT_FOLLOWLOCATION, 1);
 +      else
 +              curl_easy_setopt(slot->curl, CURLOPT_FOLLOWLOCATION, 0);
 +
  #if LIBCURL_VERSION_NUM >= 0x070a08
        curl_easy_setopt(slot->curl, CURLOPT_IPRESOLVE, git_curl_ipresolve);
  #endif
@@@ -1127,7 -1105,7 +1127,7 @@@ void append_remote_object_url(struct st
  
        strbuf_addf(buf, "objects/%.*s/", 2, hex);
        if (!only_two_digit_prefix)
 -              strbuf_addf(buf, "%s", hex+2);
 +              strbuf_addstr(buf, hex + 2);
  }
  
  char *get_remote_object_url(const char *url, const char *hex,
@@@ -1144,12 -1122,9 +1144,12 @@@ static int handle_curl_result(struct sl
         * If we see a failing http code with CURLE_OK, we have turned off
         * FAILONERROR (to keep the server's custom error response), and should
         * translate the code into failure here.
 +       *
 +       * Likewise, if we see a redirect (30x code), that means we turned off
 +       * redirect-following, and we should treat the result as an error.
         */
        if (results->curl_result == CURLE_OK &&
 -          results->http_code >= 400) {
 +          results->http_code >= 300) {
                results->curl_result = CURLE_HTTP_RETURNED_ERROR;
                /*
                 * Normally curl will already have put the "reason phrase"
@@@ -1468,9 -1443,6 +1468,9 @@@ static int http_request(const char *url
                strbuf_addstr(&buf, " no-cache");
        if (options && options->keep_error)
                curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
 +      if (options && options->initial_request &&
 +          http_follow_config == HTTP_FOLLOW_INITIAL)
 +              curl_easy_setopt(slot->curl, CURLOPT_FOLLOWLOCATION, 1);
  
        headers = curl_slist_append(headers, buf.buf);
  
   *
   * Note that this assumes a sane redirect scheme. It's entirely possible
   * in the example above to end up at a URL that does not even end in
 - * "info/refs".  In such a case we simply punt, as there is not much we can
 - * do (and such a scheme is unlikely to represent a real git repository,
 - * which means we are likely about to abort anyway).
 + * "info/refs".  In such a case we die. There's not much we can do, such a
 + * scheme is unlikely to represent a real git repository, and failing to
 + * rewrite the base opens options for malicious redirects to do funny things.
   */
  static int update_url_from_redirect(struct strbuf *base,
                                    const char *asked,
                                    const struct strbuf *got)
  {
        const char *tail;
 -      size_t tail_len;
 +      size_t new_len;
  
        if (!strcmp(asked, got->buf))
                return 0;
                die("BUG: update_url_from_redirect: %s is not a superset of %s",
                    asked, base->buf);
  
 -      tail_len = strlen(tail);
 -
 -      if (got->len < tail_len ||
 -          strcmp(tail, got->buf + got->len - tail_len))
 -              return 0; /* insane redirect scheme */
 +      new_len = got->len;
 +      if (!strip_suffix_mem(got->buf, &new_len, tail))
 +              die(_("unable to update url base from redirection:\n"
 +                    "  asked for: %s\n"
 +                    "   redirect: %s"),
 +                  asked, got->buf);
  
        strbuf_reset(base);
 -      strbuf_add(base, got->buf, got->len - tail_len);
 +      strbuf_add(base, got->buf, new_len);
 +
        return 1;
  }
  
@@@ -1885,8 -1855,19 +1885,19 @@@ static size_t fwrite_sha1_file(char *pt
        unsigned char expn[4096];
        size_t size = eltsize * nmemb;
        int posn = 0;
-       struct http_object_request *freq =
-               (struct http_object_request *)data;
+       struct http_object_request *freq = data;
+       struct active_request_slot *slot = freq->slot;
+       if (slot) {
+               CURLcode c = curl_easy_getinfo(slot->curl, CURLINFO_HTTP_CODE,
+                                               &slot->http_code);
+               if (c != CURLE_OK)
+                       die("BUG: curl_easy_getinfo for HTTP code failed: %s",
+                               curl_easy_strerror(c));
+               if (slot->http_code >= 400)
+                       return size;
+       }
        do {
                ssize_t retval = xwrite(freq->localfile,
                                        (char *) ptr + posn, size - posn);
@@@ -2007,6 -1988,7 +2018,7 @@@ struct http_object_request *new_http_ob
        freq->slot = get_active_slot();
  
        curl_easy_setopt(freq->slot->curl, CURLOPT_FILE, freq);
+       curl_easy_setopt(freq->slot->curl, CURLOPT_FAILONERROR, 0);
        curl_easy_setopt(freq->slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
        curl_easy_setopt(freq->slot->curl, CURLOPT_ERRORBUFFER, freq->errorstr);
        curl_easy_setopt(freq->slot->curl, CURLOPT_URL, freq->url);