http-walker.con commit Merge branch 'mh/http-urlmatch-cleanup' (f0fcab6)
   1#include "cache.h"
   2#include "repository.h"
   3#include "commit.h"
   4#include "walker.h"
   5#include "http.h"
   6#include "list.h"
   7#include "transport.h"
   8#include "packfile.h"
   9#include "object-store.h"
  10
  11struct alt_base {
  12        char *base;
  13        int got_indices;
  14        struct packed_git *packs;
  15        struct alt_base *next;
  16};
  17
  18enum object_request_state {
  19        WAITING,
  20        ABORTED,
  21        ACTIVE,
  22        COMPLETE
  23};
  24
  25struct object_request {
  26        struct walker *walker;
  27        struct object_id oid;
  28        struct alt_base *repo;
  29        enum object_request_state state;
  30        struct http_object_request *req;
  31        struct list_head node;
  32};
  33
  34struct alternates_request {
  35        struct walker *walker;
  36        const char *base;
  37        struct strbuf *url;
  38        struct strbuf *buffer;
  39        struct active_request_slot *slot;
  40        int http_specific;
  41};
  42
  43struct walker_data {
  44        const char *url;
  45        int got_alternates;
  46        struct alt_base *alt;
  47};
  48
  49static LIST_HEAD(object_queue_head);
  50
  51static void fetch_alternates(struct walker *walker, const char *base);
  52
  53static void process_object_response(void *callback_data);
  54
  55static void start_object_request(struct walker *walker,
  56                                 struct object_request *obj_req)
  57{
  58        struct active_request_slot *slot;
  59        struct http_object_request *req;
  60
  61        req = new_http_object_request(obj_req->repo->base, &obj_req->oid);
  62        if (req == NULL) {
  63                obj_req->state = ABORTED;
  64                return;
  65        }
  66        obj_req->req = req;
  67
  68        slot = req->slot;
  69        slot->callback_func = process_object_response;
  70        slot->callback_data = obj_req;
  71
  72        /* Try to get the request started, abort the request on error */
  73        obj_req->state = ACTIVE;
  74        if (!start_active_slot(slot)) {
  75                obj_req->state = ABORTED;
  76                release_http_object_request(req);
  77                return;
  78        }
  79}
  80
  81static void finish_object_request(struct object_request *obj_req)
  82{
  83        if (finish_http_object_request(obj_req->req))
  84                return;
  85
  86        if (obj_req->req->rename == 0)
  87                walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid));
  88}
  89
  90static void process_object_response(void *callback_data)
  91{
  92        struct object_request *obj_req =
  93                (struct object_request *)callback_data;
  94        struct walker *walker = obj_req->walker;
  95        struct walker_data *data = walker->data;
  96        struct alt_base *alt = data->alt;
  97
  98        process_http_object_request(obj_req->req);
  99        obj_req->state = COMPLETE;
 100
 101        normalize_curl_result(&obj_req->req->curl_result,
 102                              obj_req->req->http_code,
 103                              obj_req->req->errorstr,
 104                              sizeof(obj_req->req->errorstr));
 105
 106        /* Use alternates if necessary */
 107        if (missing_target(obj_req->req)) {
 108                fetch_alternates(walker, alt->base);
 109                if (obj_req->repo->next != NULL) {
 110                        obj_req->repo =
 111                                obj_req->repo->next;
 112                        release_http_object_request(obj_req->req);
 113                        start_object_request(walker, obj_req);
 114                        return;
 115                }
 116        }
 117
 118        finish_object_request(obj_req);
 119}
 120
 121static void release_object_request(struct object_request *obj_req)
 122{
 123        if (obj_req->req !=NULL && obj_req->req->localfile != -1)
 124                error("fd leakage in release: %d", obj_req->req->localfile);
 125
 126        list_del(&obj_req->node);
 127        free(obj_req);
 128}
 129
 130#ifdef USE_CURL_MULTI
 131static int fill_active_slot(struct walker *walker)
 132{
 133        struct object_request *obj_req;
 134        struct list_head *pos, *tmp, *head = &object_queue_head;
 135
 136        list_for_each_safe(pos, tmp, head) {
 137                obj_req = list_entry(pos, struct object_request, node);
 138                if (obj_req->state == WAITING) {
 139                        if (has_object_file(&obj_req->oid))
 140                                obj_req->state = COMPLETE;
 141                        else {
 142                                start_object_request(walker, obj_req);
 143                                return 1;
 144                        }
 145                }
 146        }
 147        return 0;
 148}
 149#endif
 150
 151static void prefetch(struct walker *walker, unsigned char *sha1)
 152{
 153        struct object_request *newreq;
 154        struct walker_data *data = walker->data;
 155
 156        newreq = xmalloc(sizeof(*newreq));
 157        newreq->walker = walker;
 158        hashcpy(newreq->oid.hash, sha1);
 159        newreq->repo = data->alt;
 160        newreq->state = WAITING;
 161        newreq->req = NULL;
 162
 163        http_is_verbose = walker->get_verbosely;
 164        list_add_tail(&newreq->node, &object_queue_head);
 165
 166#ifdef USE_CURL_MULTI
 167        fill_active_slots();
 168        step_active_slots();
 169#endif
 170}
 171
 172static int is_alternate_allowed(const char *url)
 173{
 174        const char *protocols[] = {
 175                "http", "https", "ftp", "ftps"
 176        };
 177        int i;
 178
 179        if (http_follow_config != HTTP_FOLLOW_ALWAYS) {
 180                warning("alternate disabled by http.followRedirects: %s", url);
 181                return 0;
 182        }
 183
 184        for (i = 0; i < ARRAY_SIZE(protocols); i++) {
 185                const char *end;
 186                if (skip_prefix(url, protocols[i], &end) &&
 187                    starts_with(end, "://"))
 188                        break;
 189        }
 190
 191        if (i >= ARRAY_SIZE(protocols)) {
 192                warning("ignoring alternate with unknown protocol: %s", url);
 193                return 0;
 194        }
 195        if (!is_transport_allowed(protocols[i], 0)) {
 196                warning("ignoring alternate with restricted protocol: %s", url);
 197                return 0;
 198        }
 199
 200        return 1;
 201}
 202
 203static void process_alternates_response(void *callback_data)
 204{
 205        struct alternates_request *alt_req =
 206                (struct alternates_request *)callback_data;
 207        struct walker *walker = alt_req->walker;
 208        struct walker_data *cdata = walker->data;
 209        struct active_request_slot *slot = alt_req->slot;
 210        struct alt_base *tail = cdata->alt;
 211        const char *base = alt_req->base;
 212        const char null_byte = '\0';
 213        char *data;
 214        int i = 0;
 215
 216        normalize_curl_result(&slot->curl_result, slot->http_code,
 217                              curl_errorstr, sizeof(curl_errorstr));
 218
 219        if (alt_req->http_specific) {
 220                if (slot->curl_result != CURLE_OK ||
 221                    !alt_req->buffer->len) {
 222
 223                        /* Try reusing the slot to get non-http alternates */
 224                        alt_req->http_specific = 0;
 225                        strbuf_reset(alt_req->url);
 226                        strbuf_addf(alt_req->url, "%s/objects/info/alternates",
 227                                    base);
 228                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 229                                         alt_req->url->buf);
 230                        active_requests++;
 231                        slot->in_use = 1;
 232                        if (slot->finished != NULL)
 233                                (*slot->finished) = 0;
 234                        if (!start_active_slot(slot)) {
 235                                cdata->got_alternates = -1;
 236                                slot->in_use = 0;
 237                                if (slot->finished != NULL)
 238                                        (*slot->finished) = 1;
 239                        }
 240                        return;
 241                }
 242        } else if (slot->curl_result != CURLE_OK) {
 243                if (!missing_target(slot)) {
 244                        cdata->got_alternates = -1;
 245                        return;
 246                }
 247        }
 248
 249        fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
 250        alt_req->buffer->len--;
 251        data = alt_req->buffer->buf;
 252
 253        while (i < alt_req->buffer->len) {
 254                int posn = i;
 255                while (posn < alt_req->buffer->len && data[posn] != '\n')
 256                        posn++;
 257                if (data[posn] == '\n') {
 258                        int okay = 0;
 259                        int serverlen = 0;
 260                        struct alt_base *newalt;
 261                        if (data[i] == '/') {
 262                                /*
 263                                 * This counts
 264                                 * http://git.host/pub/scm/linux.git/
 265                                 * -----------here^
 266                                 * so memcpy(dst, base, serverlen) will
 267                                 * copy up to "...git.host".
 268                                 */
 269                                const char *colon_ss = strstr(base,"://");
 270                                if (colon_ss) {
 271                                        serverlen = (strchr(colon_ss + 3, '/')
 272                                                     - base);
 273                                        okay = 1;
 274                                }
 275                        } else if (!memcmp(data + i, "../", 3)) {
 276                                /*
 277                                 * Relative URL; chop the corresponding
 278                                 * number of subpath from base (and ../
 279                                 * from data), and concatenate the result.
 280                                 *
 281                                 * The code first drops ../ from data, and
 282                                 * then drops one ../ from data and one path
 283                                 * from base.  IOW, one extra ../ is dropped
 284                                 * from data than path is dropped from base.
 285                                 *
 286                                 * This is not wrong.  The alternate in
 287                                 *     http://git.host/pub/scm/linux.git/
 288                                 * to borrow from
 289                                 *     http://git.host/pub/scm/linus.git/
 290                                 * is ../../linus.git/objects/.  You need
 291                                 * two ../../ to borrow from your direct
 292                                 * neighbour.
 293                                 */
 294                                i += 3;
 295                                serverlen = strlen(base);
 296                                while (i + 2 < posn &&
 297                                       !memcmp(data + i, "../", 3)) {
 298                                        do {
 299                                                serverlen--;
 300                                        } while (serverlen &&
 301                                                 base[serverlen - 1] != '/');
 302                                        i += 3;
 303                                }
 304                                /* If the server got removed, give up. */
 305                                okay = strchr(base, ':') - base + 3 <
 306                                       serverlen;
 307                        } else if (alt_req->http_specific) {
 308                                char *colon = strchr(data + i, ':');
 309                                char *slash = strchr(data + i, '/');
 310                                if (colon && slash && colon < data + posn &&
 311                                    slash < data + posn && colon < slash) {
 312                                        okay = 1;
 313                                }
 314                        }
 315                        if (okay) {
 316                                struct strbuf target = STRBUF_INIT;
 317                                strbuf_add(&target, base, serverlen);
 318                                strbuf_add(&target, data + i, posn - i);
 319                                if (!strbuf_strip_suffix(&target, "objects")) {
 320                                        warning("ignoring alternate that does"
 321                                                " not end in 'objects': %s",
 322                                                target.buf);
 323                                        strbuf_release(&target);
 324                                } else if (is_alternate_allowed(target.buf)) {
 325                                        warning("adding alternate object store: %s",
 326                                                target.buf);
 327                                        newalt = xmalloc(sizeof(*newalt));
 328                                        newalt->next = NULL;
 329                                        newalt->base = strbuf_detach(&target, NULL);
 330                                        newalt->got_indices = 0;
 331                                        newalt->packs = NULL;
 332
 333                                        while (tail->next != NULL)
 334                                                tail = tail->next;
 335                                        tail->next = newalt;
 336                                } else {
 337                                        strbuf_release(&target);
 338                                }
 339                        }
 340                }
 341                i = posn + 1;
 342        }
 343
 344        cdata->got_alternates = 1;
 345}
 346
 347static void fetch_alternates(struct walker *walker, const char *base)
 348{
 349        struct strbuf buffer = STRBUF_INIT;
 350        struct strbuf url = STRBUF_INIT;
 351        struct active_request_slot *slot;
 352        struct alternates_request alt_req;
 353        struct walker_data *cdata = walker->data;
 354
 355        /*
 356         * If another request has already started fetching alternates,
 357         * wait for them to arrive and return to processing this request's
 358         * curl message
 359         */
 360#ifdef USE_CURL_MULTI
 361        while (cdata->got_alternates == 0) {
 362                step_active_slots();
 363        }
 364#endif
 365
 366        /* Nothing to do if they've already been fetched */
 367        if (cdata->got_alternates == 1)
 368                return;
 369
 370        /* Start the fetch */
 371        cdata->got_alternates = 0;
 372
 373        if (walker->get_verbosely)
 374                fprintf(stderr, "Getting alternates list for %s\n", base);
 375
 376        strbuf_addf(&url, "%s/objects/info/http-alternates", base);
 377
 378        /*
 379         * Use a callback to process the result, since another request
 380         * may fail and need to have alternates loaded before continuing
 381         */
 382        slot = get_active_slot();
 383        slot->callback_func = process_alternates_response;
 384        alt_req.walker = walker;
 385        slot->callback_data = &alt_req;
 386
 387        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 388        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 389        curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
 390
 391        alt_req.base = base;
 392        alt_req.url = &url;
 393        alt_req.buffer = &buffer;
 394        alt_req.http_specific = 1;
 395        alt_req.slot = slot;
 396
 397        if (start_active_slot(slot))
 398                run_active_slot(slot);
 399        else
 400                cdata->got_alternates = -1;
 401
 402        strbuf_release(&buffer);
 403        strbuf_release(&url);
 404}
 405
 406static int fetch_indices(struct walker *walker, struct alt_base *repo)
 407{
 408        int ret;
 409
 410        if (repo->got_indices)
 411                return 0;
 412
 413        if (walker->get_verbosely)
 414                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 415
 416        switch (http_get_info_packs(repo->base, &repo->packs)) {
 417        case HTTP_OK:
 418        case HTTP_MISSING_TARGET:
 419                repo->got_indices = 1;
 420                ret = 0;
 421                break;
 422        default:
 423                repo->got_indices = 0;
 424                ret = -1;
 425        }
 426
 427        return ret;
 428}
 429
 430static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 431{
 432        struct packed_git *target;
 433        int ret;
 434        struct slot_results results;
 435        struct http_pack_request *preq;
 436
 437        if (fetch_indices(walker, repo))
 438                return -1;
 439        target = find_sha1_pack(sha1, repo->packs);
 440        if (!target)
 441                return -1;
 442
 443        if (walker->get_verbosely) {
 444                fprintf(stderr, "Getting pack %s\n",
 445                        hash_to_hex(target->hash));
 446                fprintf(stderr, " which contains %s\n",
 447                        hash_to_hex(sha1));
 448        }
 449
 450        preq = new_http_pack_request(target, repo->base);
 451        if (preq == NULL)
 452                goto abort;
 453        preq->lst = &repo->packs;
 454        preq->slot->results = &results;
 455
 456        if (start_active_slot(preq->slot)) {
 457                run_active_slot(preq->slot);
 458                if (results.curl_result != CURLE_OK) {
 459                        error("Unable to get pack file %s\n%s", preq->url,
 460                              curl_errorstr);
 461                        goto abort;
 462                }
 463        } else {
 464                error("Unable to start request");
 465                goto abort;
 466        }
 467
 468        ret = finish_http_pack_request(preq);
 469        release_http_pack_request(preq);
 470        if (ret)
 471                return ret;
 472
 473        return 0;
 474
 475abort:
 476        return -1;
 477}
 478
 479static void abort_object_request(struct object_request *obj_req)
 480{
 481        release_object_request(obj_req);
 482}
 483
 484static int fetch_object(struct walker *walker, unsigned char *hash)
 485{
 486        char *hex = hash_to_hex(hash);
 487        int ret = 0;
 488        struct object_request *obj_req = NULL;
 489        struct http_object_request *req;
 490        struct list_head *pos, *head = &object_queue_head;
 491
 492        list_for_each(pos, head) {
 493                obj_req = list_entry(pos, struct object_request, node);
 494                if (hasheq(obj_req->oid.hash, hash))
 495                        break;
 496        }
 497        if (obj_req == NULL)
 498                return error("Couldn't find request for %s in the queue", hex);
 499
 500        if (has_object_file(&obj_req->oid)) {
 501                if (obj_req->req != NULL)
 502                        abort_http_object_request(obj_req->req);
 503                abort_object_request(obj_req);
 504                return 0;
 505        }
 506
 507#ifdef USE_CURL_MULTI
 508        while (obj_req->state == WAITING)
 509                step_active_slots();
 510#else
 511        start_object_request(walker, obj_req);
 512#endif
 513
 514        /*
 515         * obj_req->req might change when fetching alternates in the callback
 516         * process_object_response; therefore, the "shortcut" variable, req,
 517         * is used only after we're done with slots.
 518         */
 519        while (obj_req->state == ACTIVE)
 520                run_active_slot(obj_req->req->slot);
 521
 522        req = obj_req->req;
 523
 524        if (req->localfile != -1) {
 525                close(req->localfile);
 526                req->localfile = -1;
 527        }
 528
 529        normalize_curl_result(&req->curl_result, req->http_code,
 530                              req->errorstr, sizeof(req->errorstr));
 531
 532        if (obj_req->state == ABORTED) {
 533                ret = error("Request for %s aborted", hex);
 534        } else if (req->curl_result != CURLE_OK &&
 535                   req->http_code != 416) {
 536                if (missing_target(req))
 537                        ret = -1; /* Be silent, it is probably in a pack. */
 538                else
 539                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
 540                                    req->errorstr, req->curl_result,
 541                                    req->http_code, hex);
 542        } else if (req->zret != Z_STREAM_END) {
 543                walker->corrupt_object_found++;
 544                ret = error("File %s (%s) corrupt", hex, req->url);
 545        } else if (!oideq(&obj_req->oid, &req->real_oid)) {
 546                ret = error("File %s has bad hash", hex);
 547        } else if (req->rename < 0) {
 548                struct strbuf buf = STRBUF_INIT;
 549                loose_object_path(the_repository, &buf, &req->oid);
 550                ret = error("unable to write sha1 filename %s", buf.buf);
 551                strbuf_release(&buf);
 552        }
 553
 554        release_http_object_request(req);
 555        release_object_request(obj_req);
 556        return ret;
 557}
 558
 559static int fetch(struct walker *walker, unsigned char *hash)
 560{
 561        struct walker_data *data = walker->data;
 562        struct alt_base *altbase = data->alt;
 563
 564        if (!fetch_object(walker, hash))
 565                return 0;
 566        while (altbase) {
 567                if (!http_fetch_pack(walker, altbase, hash))
 568                        return 0;
 569                fetch_alternates(walker, data->alt->base);
 570                altbase = altbase->next;
 571        }
 572        return error("Unable to find %s under %s", hash_to_hex(hash),
 573                     data->alt->base);
 574}
 575
 576static int fetch_ref(struct walker *walker, struct ref *ref)
 577{
 578        struct walker_data *data = walker->data;
 579        return http_fetch_ref(data->alt->base, ref);
 580}
 581
 582static void cleanup(struct walker *walker)
 583{
 584        struct walker_data *data = walker->data;
 585        struct alt_base *alt, *alt_next;
 586
 587        if (data) {
 588                alt = data->alt;
 589                while (alt) {
 590                        alt_next = alt->next;
 591
 592                        free(alt->base);
 593                        free(alt);
 594
 595                        alt = alt_next;
 596                }
 597                free(data);
 598                walker->data = NULL;
 599        }
 600}
 601
 602struct walker *get_http_walker(const char *url)
 603{
 604        char *s;
 605        struct walker_data *data = xmalloc(sizeof(struct walker_data));
 606        struct walker *walker = xmalloc(sizeof(struct walker));
 607
 608        data->alt = xmalloc(sizeof(*data->alt));
 609        data->alt->base = xstrdup(url);
 610        for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
 611                *s = 0;
 612
 613        data->alt->got_indices = 0;
 614        data->alt->packs = NULL;
 615        data->alt->next = NULL;
 616        data->got_alternates = -1;
 617
 618        walker->corrupt_object_found = 0;
 619        walker->fetch = fetch;
 620        walker->fetch_ref = fetch_ref;
 621        walker->prefetch = prefetch;
 622        walker->cleanup = cleanup;
 623        walker->data = data;
 624
 625#ifdef USE_CURL_MULTI
 626        add_fill_function(walker, (int (*)(void *)) fill_active_slot);
 627#endif
 628
 629        return walker;
 630}