http-walker.con commit Merge branch 'jc/pull-rebase-ff' into maint (e4ec408)
   1#include "cache.h"
   2#include "commit.h"
   3#include "walker.h"
   4#include "http.h"
   5#include "list.h"
   6
   7struct alt_base {
   8        char *base;
   9        int got_indices;
  10        struct packed_git *packs;
  11        struct alt_base *next;
  12};
  13
  14enum object_request_state {
  15        WAITING,
  16        ABORTED,
  17        ACTIVE,
  18        COMPLETE
  19};
  20
  21struct object_request {
  22        struct walker *walker;
  23        unsigned char sha1[20];
  24        struct alt_base *repo;
  25        enum object_request_state state;
  26        struct http_object_request *req;
  27        struct list_head node;
  28};
  29
  30struct alternates_request {
  31        struct walker *walker;
  32        const char *base;
  33        struct strbuf *url;
  34        struct strbuf *buffer;
  35        struct active_request_slot *slot;
  36        int http_specific;
  37};
  38
  39struct walker_data {
  40        const char *url;
  41        int got_alternates;
  42        struct alt_base *alt;
  43};
  44
  45static LIST_HEAD(object_queue_head);
  46
  47static void fetch_alternates(struct walker *walker, const char *base);
  48
  49static void process_object_response(void *callback_data);
  50
  51static void start_object_request(struct walker *walker,
  52                                 struct object_request *obj_req)
  53{
  54        struct active_request_slot *slot;
  55        struct http_object_request *req;
  56
  57        req = new_http_object_request(obj_req->repo->base, obj_req->sha1);
  58        if (req == NULL) {
  59                obj_req->state = ABORTED;
  60                return;
  61        }
  62        obj_req->req = req;
  63
  64        slot = req->slot;
  65        slot->callback_func = process_object_response;
  66        slot->callback_data = obj_req;
  67
  68        /* Try to get the request started, abort the request on error */
  69        obj_req->state = ACTIVE;
  70        if (!start_active_slot(slot)) {
  71                obj_req->state = ABORTED;
  72                release_http_object_request(req);
  73                return;
  74        }
  75}
  76
  77static void finish_object_request(struct object_request *obj_req)
  78{
  79        if (finish_http_object_request(obj_req->req))
  80                return;
  81
  82        if (obj_req->req->rename == 0)
  83                walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
  84}
  85
  86static void process_object_response(void *callback_data)
  87{
  88        struct object_request *obj_req =
  89                (struct object_request *)callback_data;
  90        struct walker *walker = obj_req->walker;
  91        struct walker_data *data = walker->data;
  92        struct alt_base *alt = data->alt;
  93
  94        process_http_object_request(obj_req->req);
  95        obj_req->state = COMPLETE;
  96
  97        /* Use alternates if necessary */
  98        if (missing_target(obj_req->req)) {
  99                fetch_alternates(walker, alt->base);
 100                if (obj_req->repo->next != NULL) {
 101                        obj_req->repo =
 102                                obj_req->repo->next;
 103                        release_http_object_request(obj_req->req);
 104                        start_object_request(walker, obj_req);
 105                        return;
 106                }
 107        }
 108
 109        finish_object_request(obj_req);
 110}
 111
 112static void release_object_request(struct object_request *obj_req)
 113{
 114        if (obj_req->req !=NULL && obj_req->req->localfile != -1)
 115                error("fd leakage in release: %d", obj_req->req->localfile);
 116
 117        list_del(&obj_req->node);
 118        free(obj_req);
 119}
 120
 121#ifdef USE_CURL_MULTI
 122static int fill_active_slot(struct walker *walker)
 123{
 124        struct object_request *obj_req;
 125        struct list_head *pos, *tmp, *head = &object_queue_head;
 126
 127        list_for_each_safe(pos, tmp, head) {
 128                obj_req = list_entry(pos, struct object_request, node);
 129                if (obj_req->state == WAITING) {
 130                        if (has_sha1_file(obj_req->sha1))
 131                                obj_req->state = COMPLETE;
 132                        else {
 133                                start_object_request(walker, obj_req);
 134                                return 1;
 135                        }
 136                }
 137        }
 138        return 0;
 139}
 140#endif
 141
 142static void prefetch(struct walker *walker, unsigned char *sha1)
 143{
 144        struct object_request *newreq;
 145        struct walker_data *data = walker->data;
 146
 147        newreq = xmalloc(sizeof(*newreq));
 148        newreq->walker = walker;
 149        hashcpy(newreq->sha1, sha1);
 150        newreq->repo = data->alt;
 151        newreq->state = WAITING;
 152        newreq->req = NULL;
 153
 154        http_is_verbose = walker->get_verbosely;
 155        list_add_tail(&newreq->node, &object_queue_head);
 156
 157#ifdef USE_CURL_MULTI
 158        fill_active_slots();
 159        step_active_slots();
 160#endif
 161}
 162
 163static void process_alternates_response(void *callback_data)
 164{
 165        struct alternates_request *alt_req =
 166                (struct alternates_request *)callback_data;
 167        struct walker *walker = alt_req->walker;
 168        struct walker_data *cdata = walker->data;
 169        struct active_request_slot *slot = alt_req->slot;
 170        struct alt_base *tail = cdata->alt;
 171        const char *base = alt_req->base;
 172        const char null_byte = '\0';
 173        char *data;
 174        int i = 0;
 175
 176        if (alt_req->http_specific) {
 177                if (slot->curl_result != CURLE_OK ||
 178                    !alt_req->buffer->len) {
 179
 180                        /* Try reusing the slot to get non-http alternates */
 181                        alt_req->http_specific = 0;
 182                        strbuf_reset(alt_req->url);
 183                        strbuf_addf(alt_req->url, "%s/objects/info/alternates",
 184                                    base);
 185                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 186                                         alt_req->url->buf);
 187                        active_requests++;
 188                        slot->in_use = 1;
 189                        if (slot->finished != NULL)
 190                                (*slot->finished) = 0;
 191                        if (!start_active_slot(slot)) {
 192                                cdata->got_alternates = -1;
 193                                slot->in_use = 0;
 194                                if (slot->finished != NULL)
 195                                        (*slot->finished) = 1;
 196                        }
 197                        return;
 198                }
 199        } else if (slot->curl_result != CURLE_OK) {
 200                if (!missing_target(slot)) {
 201                        cdata->got_alternates = -1;
 202                        return;
 203                }
 204        }
 205
 206        fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
 207        alt_req->buffer->len--;
 208        data = alt_req->buffer->buf;
 209
 210        while (i < alt_req->buffer->len) {
 211                int posn = i;
 212                while (posn < alt_req->buffer->len && data[posn] != '\n')
 213                        posn++;
 214                if (data[posn] == '\n') {
 215                        int okay = 0;
 216                        int serverlen = 0;
 217                        struct alt_base *newalt;
 218                        if (data[i] == '/') {
 219                                /*
 220                                 * This counts
 221                                 * http://git.host/pub/scm/linux.git/
 222                                 * -----------here^
 223                                 * so memcpy(dst, base, serverlen) will
 224                                 * copy up to "...git.host".
 225                                 */
 226                                const char *colon_ss = strstr(base,"://");
 227                                if (colon_ss) {
 228                                        serverlen = (strchr(colon_ss + 3, '/')
 229                                                     - base);
 230                                        okay = 1;
 231                                }
 232                        } else if (!memcmp(data + i, "../", 3)) {
 233                                /*
 234                                 * Relative URL; chop the corresponding
 235                                 * number of subpath from base (and ../
 236                                 * from data), and concatenate the result.
 237                                 *
 238                                 * The code first drops ../ from data, and
 239                                 * then drops one ../ from data and one path
 240                                 * from base.  IOW, one extra ../ is dropped
 241                                 * from data than path is dropped from base.
 242                                 *
 243                                 * This is not wrong.  The alternate in
 244                                 *     http://git.host/pub/scm/linux.git/
 245                                 * to borrow from
 246                                 *     http://git.host/pub/scm/linus.git/
 247                                 * is ../../linus.git/objects/.  You need
 248                                 * two ../../ to borrow from your direct
 249                                 * neighbour.
 250                                 */
 251                                i += 3;
 252                                serverlen = strlen(base);
 253                                while (i + 2 < posn &&
 254                                       !memcmp(data + i, "../", 3)) {
 255                                        do {
 256                                                serverlen--;
 257                                        } while (serverlen &&
 258                                                 base[serverlen - 1] != '/');
 259                                        i += 3;
 260                                }
 261                                /* If the server got removed, give up. */
 262                                okay = strchr(base, ':') - base + 3 <
 263                                       serverlen;
 264                        } else if (alt_req->http_specific) {
 265                                char *colon = strchr(data + i, ':');
 266                                char *slash = strchr(data + i, '/');
 267                                if (colon && slash && colon < data + posn &&
 268                                    slash < data + posn && colon < slash) {
 269                                        okay = 1;
 270                                }
 271                        }
 272                        /* skip "objects\n" at end */
 273                        if (okay) {
 274                                struct strbuf target = STRBUF_INIT;
 275                                strbuf_add(&target, base, serverlen);
 276                                strbuf_add(&target, data + i, posn - i - 7);
 277                                warning("adding alternate object store: %s",
 278                                        target.buf);
 279                                newalt = xmalloc(sizeof(*newalt));
 280                                newalt->next = NULL;
 281                                newalt->base = strbuf_detach(&target, NULL);
 282                                newalt->got_indices = 0;
 283                                newalt->packs = NULL;
 284
 285                                while (tail->next != NULL)
 286                                        tail = tail->next;
 287                                tail->next = newalt;
 288                        }
 289                }
 290                i = posn + 1;
 291        }
 292
 293        cdata->got_alternates = 1;
 294}
 295
 296static void fetch_alternates(struct walker *walker, const char *base)
 297{
 298        struct strbuf buffer = STRBUF_INIT;
 299        struct strbuf url = STRBUF_INIT;
 300        struct active_request_slot *slot;
 301        struct alternates_request alt_req;
 302        struct walker_data *cdata = walker->data;
 303
 304        if (http_follow_config != HTTP_FOLLOW_ALWAYS)
 305                return;
 306
 307        /*
 308         * If another request has already started fetching alternates,
 309         * wait for them to arrive and return to processing this request's
 310         * curl message
 311         */
 312#ifdef USE_CURL_MULTI
 313        while (cdata->got_alternates == 0) {
 314                step_active_slots();
 315        }
 316#endif
 317
 318        /* Nothing to do if they've already been fetched */
 319        if (cdata->got_alternates == 1)
 320                return;
 321
 322        /* Start the fetch */
 323        cdata->got_alternates = 0;
 324
 325        if (walker->get_verbosely)
 326                fprintf(stderr, "Getting alternates list for %s\n", base);
 327
 328        strbuf_addf(&url, "%s/objects/info/http-alternates", base);
 329
 330        /*
 331         * Use a callback to process the result, since another request
 332         * may fail and need to have alternates loaded before continuing
 333         */
 334        slot = get_active_slot();
 335        slot->callback_func = process_alternates_response;
 336        alt_req.walker = walker;
 337        slot->callback_data = &alt_req;
 338
 339        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 340        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 341        curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
 342
 343        alt_req.base = base;
 344        alt_req.url = &url;
 345        alt_req.buffer = &buffer;
 346        alt_req.http_specific = 1;
 347        alt_req.slot = slot;
 348
 349        if (start_active_slot(slot))
 350                run_active_slot(slot);
 351        else
 352                cdata->got_alternates = -1;
 353
 354        strbuf_release(&buffer);
 355        strbuf_release(&url);
 356}
 357
 358static int fetch_indices(struct walker *walker, struct alt_base *repo)
 359{
 360        int ret;
 361
 362        if (repo->got_indices)
 363                return 0;
 364
 365        if (walker->get_verbosely)
 366                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 367
 368        switch (http_get_info_packs(repo->base, &repo->packs)) {
 369        case HTTP_OK:
 370        case HTTP_MISSING_TARGET:
 371                repo->got_indices = 1;
 372                ret = 0;
 373                break;
 374        default:
 375                repo->got_indices = 0;
 376                ret = -1;
 377        }
 378
 379        return ret;
 380}
 381
 382static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 383{
 384        struct packed_git *target;
 385        int ret;
 386        struct slot_results results;
 387        struct http_pack_request *preq;
 388
 389        if (fetch_indices(walker, repo))
 390                return -1;
 391        target = find_sha1_pack(sha1, repo->packs);
 392        if (!target)
 393                return -1;
 394
 395        if (walker->get_verbosely) {
 396                fprintf(stderr, "Getting pack %s\n",
 397                        sha1_to_hex(target->sha1));
 398                fprintf(stderr, " which contains %s\n",
 399                        sha1_to_hex(sha1));
 400        }
 401
 402        preq = new_http_pack_request(target, repo->base);
 403        if (preq == NULL)
 404                goto abort;
 405        preq->lst = &repo->packs;
 406        preq->slot->results = &results;
 407
 408        if (start_active_slot(preq->slot)) {
 409                run_active_slot(preq->slot);
 410                if (results.curl_result != CURLE_OK) {
 411                        error("Unable to get pack file %s\n%s", preq->url,
 412                              curl_errorstr);
 413                        goto abort;
 414                }
 415        } else {
 416                error("Unable to start request");
 417                goto abort;
 418        }
 419
 420        ret = finish_http_pack_request(preq);
 421        release_http_pack_request(preq);
 422        if (ret)
 423                return ret;
 424
 425        return 0;
 426
 427abort:
 428        return -1;
 429}
 430
 431static void abort_object_request(struct object_request *obj_req)
 432{
 433        release_object_request(obj_req);
 434}
 435
 436static int fetch_object(struct walker *walker, unsigned char *sha1)
 437{
 438        char *hex = sha1_to_hex(sha1);
 439        int ret = 0;
 440        struct object_request *obj_req = NULL;
 441        struct http_object_request *req;
 442        struct list_head *pos, *head = &object_queue_head;
 443
 444        list_for_each(pos, head) {
 445                obj_req = list_entry(pos, struct object_request, node);
 446                if (!hashcmp(obj_req->sha1, sha1))
 447                        break;
 448        }
 449        if (obj_req == NULL)
 450                return error("Couldn't find request for %s in the queue", hex);
 451
 452        if (has_sha1_file(obj_req->sha1)) {
 453                if (obj_req->req != NULL)
 454                        abort_http_object_request(obj_req->req);
 455                abort_object_request(obj_req);
 456                return 0;
 457        }
 458
 459#ifdef USE_CURL_MULTI
 460        while (obj_req->state == WAITING)
 461                step_active_slots();
 462#else
 463        start_object_request(walker, obj_req);
 464#endif
 465
 466        /*
 467         * obj_req->req might change when fetching alternates in the callback
 468         * process_object_response; therefore, the "shortcut" variable, req,
 469         * is used only after we're done with slots.
 470         */
 471        while (obj_req->state == ACTIVE)
 472                run_active_slot(obj_req->req->slot);
 473
 474        req = obj_req->req;
 475
 476        if (req->localfile != -1) {
 477                close(req->localfile);
 478                req->localfile = -1;
 479        }
 480
 481        /*
 482         * we turned off CURLOPT_FAILONERROR to avoid losing a
 483         * persistent connection and got CURLE_OK.
 484         */
 485        if (req->http_code >= 300 && req->curl_result == CURLE_OK &&
 486                        (starts_with(req->url, "http://") ||
 487                         starts_with(req->url, "https://"))) {
 488                req->curl_result = CURLE_HTTP_RETURNED_ERROR;
 489                xsnprintf(req->errorstr, sizeof(req->errorstr),
 490                          "HTTP request failed");
 491        }
 492
 493        if (obj_req->state == ABORTED) {
 494                ret = error("Request for %s aborted", hex);
 495        } else if (req->curl_result != CURLE_OK &&
 496                   req->http_code != 416) {
 497                if (missing_target(req))
 498                        ret = -1; /* Be silent, it is probably in a pack. */
 499                else
 500                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
 501                                    req->errorstr, req->curl_result,
 502                                    req->http_code, hex);
 503        } else if (req->zret != Z_STREAM_END) {
 504                walker->corrupt_object_found++;
 505                ret = error("File %s (%s) corrupt", hex, req->url);
 506        } else if (hashcmp(obj_req->sha1, req->real_sha1)) {
 507                ret = error("File %s has bad hash", hex);
 508        } else if (req->rename < 0) {
 509                ret = error("unable to write sha1 filename %s",
 510                            sha1_file_name(req->sha1));
 511        }
 512
 513        release_http_object_request(req);
 514        release_object_request(obj_req);
 515        return ret;
 516}
 517
 518static int fetch(struct walker *walker, unsigned char *sha1)
 519{
 520        struct walker_data *data = walker->data;
 521        struct alt_base *altbase = data->alt;
 522
 523        if (!fetch_object(walker, sha1))
 524                return 0;
 525        while (altbase) {
 526                if (!http_fetch_pack(walker, altbase, sha1))
 527                        return 0;
 528                fetch_alternates(walker, data->alt->base);
 529                altbase = altbase->next;
 530        }
 531        return error("Unable to find %s under %s", sha1_to_hex(sha1),
 532                     data->alt->base);
 533}
 534
 535static int fetch_ref(struct walker *walker, struct ref *ref)
 536{
 537        struct walker_data *data = walker->data;
 538        return http_fetch_ref(data->alt->base, ref);
 539}
 540
 541static void cleanup(struct walker *walker)
 542{
 543        struct walker_data *data = walker->data;
 544        struct alt_base *alt, *alt_next;
 545
 546        if (data) {
 547                alt = data->alt;
 548                while (alt) {
 549                        alt_next = alt->next;
 550
 551                        free(alt->base);
 552                        free(alt);
 553
 554                        alt = alt_next;
 555                }
 556                free(data);
 557                walker->data = NULL;
 558        }
 559}
 560
 561struct walker *get_http_walker(const char *url)
 562{
 563        char *s;
 564        struct walker_data *data = xmalloc(sizeof(struct walker_data));
 565        struct walker *walker = xmalloc(sizeof(struct walker));
 566
 567        data->alt = xmalloc(sizeof(*data->alt));
 568        data->alt->base = xstrdup(url);
 569        for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
 570                *s = 0;
 571
 572        data->alt->got_indices = 0;
 573        data->alt->packs = NULL;
 574        data->alt->next = NULL;
 575        data->got_alternates = -1;
 576
 577        walker->corrupt_object_found = 0;
 578        walker->fetch = fetch;
 579        walker->fetch_ref = fetch_ref;
 580        walker->prefetch = prefetch;
 581        walker->cleanup = cleanup;
 582        walker->data = data;
 583
 584#ifdef USE_CURL_MULTI
 585        add_fill_function(walker, (int (*)(void *)) fill_active_slot);
 586#endif
 587
 588        return walker;
 589}