http-walker.con commit parse-options: make resuming easier after PARSE_OPT_STOP_AT_NON_OPTION (979240f)
   1#include "cache.h"
   2#include "commit.h"
   3#include "walker.h"
   4#include "http.h"
   5
   6struct alt_base
   7{
   8        char *base;
   9        int got_indices;
  10        struct packed_git *packs;
  11        struct alt_base *next;
  12};
  13
  14enum object_request_state {
  15        WAITING,
  16        ABORTED,
  17        ACTIVE,
  18        COMPLETE
  19};
  20
  21struct object_request
  22{
  23        struct walker *walker;
  24        unsigned char sha1[20];
  25        struct alt_base *repo;
  26        enum object_request_state state;
  27        struct http_object_request *req;
  28        struct object_request *next;
  29};
  30
  31struct alternates_request {
  32        struct walker *walker;
  33        const char *base;
  34        char *url;
  35        struct strbuf *buffer;
  36        struct active_request_slot *slot;
  37        int http_specific;
  38};
  39
  40struct walker_data {
  41        const char *url;
  42        int got_alternates;
  43        struct alt_base *alt;
  44};
  45
  46static struct object_request *object_queue_head;
  47
  48static void fetch_alternates(struct walker *walker, const char *base);
  49
  50static void process_object_response(void *callback_data);
  51
  52static void start_object_request(struct walker *walker,
  53                                 struct object_request *obj_req)
  54{
  55        struct active_request_slot *slot;
  56        struct http_object_request *req;
  57
  58        req = new_http_object_request(obj_req->repo->base, obj_req->sha1);
  59        if (req == NULL) {
  60                obj_req->state = ABORTED;
  61                return;
  62        }
  63        obj_req->req = req;
  64
  65        slot = req->slot;
  66        slot->callback_func = process_object_response;
  67        slot->callback_data = obj_req;
  68
  69        /* Try to get the request started, abort the request on error */
  70        obj_req->state = ACTIVE;
  71        if (!start_active_slot(slot)) {
  72                obj_req->state = ABORTED;
  73                release_http_object_request(req);
  74                return;
  75        }
  76}
  77
  78static void finish_object_request(struct object_request *obj_req)
  79{
  80        if (finish_http_object_request(obj_req->req))
  81                return;
  82
  83        if (obj_req->req->rename == 0)
  84                walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
  85}
  86
  87static void process_object_response(void *callback_data)
  88{
  89        struct object_request *obj_req =
  90                (struct object_request *)callback_data;
  91        struct walker *walker = obj_req->walker;
  92        struct walker_data *data = walker->data;
  93        struct alt_base *alt = data->alt;
  94
  95        process_http_object_request(obj_req->req);
  96        obj_req->state = COMPLETE;
  97
  98        /* Use alternates if necessary */
  99        if (missing_target(obj_req->req)) {
 100                fetch_alternates(walker, alt->base);
 101                if (obj_req->repo->next != NULL) {
 102                        obj_req->repo =
 103                                obj_req->repo->next;
 104                        release_http_object_request(obj_req->req);
 105                        start_object_request(walker, obj_req);
 106                        return;
 107                }
 108        }
 109
 110        finish_object_request(obj_req);
 111}
 112
 113static void release_object_request(struct object_request *obj_req)
 114{
 115        struct object_request *entry = object_queue_head;
 116
 117        if (obj_req->req !=NULL && obj_req->req->localfile != -1)
 118                error("fd leakage in release: %d", obj_req->req->localfile);
 119        if (obj_req == object_queue_head) {
 120                object_queue_head = obj_req->next;
 121        } else {
 122                while (entry->next != NULL && entry->next != obj_req)
 123                        entry = entry->next;
 124                if (entry->next == obj_req)
 125                        entry->next = entry->next->next;
 126        }
 127
 128        free(obj_req);
 129}
 130
 131#ifdef USE_CURL_MULTI
 132static int fill_active_slot(struct walker *walker)
 133{
 134        struct object_request *obj_req;
 135
 136        for (obj_req = object_queue_head; obj_req; obj_req = obj_req->next) {
 137                if (obj_req->state == WAITING) {
 138                        if (has_sha1_file(obj_req->sha1))
 139                                obj_req->state = COMPLETE;
 140                        else {
 141                                start_object_request(walker, obj_req);
 142                                return 1;
 143                        }
 144                }
 145        }
 146        return 0;
 147}
 148#endif
 149
 150static void prefetch(struct walker *walker, unsigned char *sha1)
 151{
 152        struct object_request *newreq;
 153        struct object_request *tail;
 154        struct walker_data *data = walker->data;
 155
 156        newreq = xmalloc(sizeof(*newreq));
 157        newreq->walker = walker;
 158        hashcpy(newreq->sha1, sha1);
 159        newreq->repo = data->alt;
 160        newreq->state = WAITING;
 161        newreq->req = NULL;
 162        newreq->next = NULL;
 163
 164        http_is_verbose = walker->get_verbosely;
 165
 166        if (object_queue_head == NULL) {
 167                object_queue_head = newreq;
 168        } else {
 169                tail = object_queue_head;
 170                while (tail->next != NULL)
 171                        tail = tail->next;
 172                tail->next = newreq;
 173        }
 174
 175#ifdef USE_CURL_MULTI
 176        fill_active_slots();
 177        step_active_slots();
 178#endif
 179}
 180
 181static void process_alternates_response(void *callback_data)
 182{
 183        struct alternates_request *alt_req =
 184                (struct alternates_request *)callback_data;
 185        struct walker *walker = alt_req->walker;
 186        struct walker_data *cdata = walker->data;
 187        struct active_request_slot *slot = alt_req->slot;
 188        struct alt_base *tail = cdata->alt;
 189        const char *base = alt_req->base;
 190        static const char null_byte = '\0';
 191        char *data;
 192        int i = 0;
 193
 194        if (alt_req->http_specific) {
 195                if (slot->curl_result != CURLE_OK ||
 196                    !alt_req->buffer->len) {
 197
 198                        /* Try reusing the slot to get non-http alternates */
 199                        alt_req->http_specific = 0;
 200                        sprintf(alt_req->url, "%s/objects/info/alternates",
 201                                base);
 202                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 203                                         alt_req->url);
 204                        active_requests++;
 205                        slot->in_use = 1;
 206                        if (slot->finished != NULL)
 207                                (*slot->finished) = 0;
 208                        if (!start_active_slot(slot)) {
 209                                cdata->got_alternates = -1;
 210                                slot->in_use = 0;
 211                                if (slot->finished != NULL)
 212                                        (*slot->finished) = 1;
 213                        }
 214                        return;
 215                }
 216        } else if (slot->curl_result != CURLE_OK) {
 217                if (!missing_target(slot)) {
 218                        cdata->got_alternates = -1;
 219                        return;
 220                }
 221        }
 222
 223        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 224        alt_req->buffer->len--;
 225        data = alt_req->buffer->buf;
 226
 227        while (i < alt_req->buffer->len) {
 228                int posn = i;
 229                while (posn < alt_req->buffer->len && data[posn] != '\n')
 230                        posn++;
 231                if (data[posn] == '\n') {
 232                        int okay = 0;
 233                        int serverlen = 0;
 234                        struct alt_base *newalt;
 235                        char *target = NULL;
 236                        if (data[i] == '/') {
 237                                /*
 238                                 * This counts
 239                                 * http://git.host/pub/scm/linux.git/
 240                                 * -----------here^
 241                                 * so memcpy(dst, base, serverlen) will
 242                                 * copy up to "...git.host".
 243                                 */
 244                                const char *colon_ss = strstr(base,"://");
 245                                if (colon_ss) {
 246                                        serverlen = (strchr(colon_ss + 3, '/')
 247                                                     - base);
 248                                        okay = 1;
 249                                }
 250                        } else if (!memcmp(data + i, "../", 3)) {
 251                                /*
 252                                 * Relative URL; chop the corresponding
 253                                 * number of subpath from base (and ../
 254                                 * from data), and concatenate the result.
 255                                 *
 256                                 * The code first drops ../ from data, and
 257                                 * then drops one ../ from data and one path
 258                                 * from base.  IOW, one extra ../ is dropped
 259                                 * from data than path is dropped from base.
 260                                 *
 261                                 * This is not wrong.  The alternate in
 262                                 *     http://git.host/pub/scm/linux.git/
 263                                 * to borrow from
 264                                 *     http://git.host/pub/scm/linus.git/
 265                                 * is ../../linus.git/objects/.  You need
 266                                 * two ../../ to borrow from your direct
 267                                 * neighbour.
 268                                 */
 269                                i += 3;
 270                                serverlen = strlen(base);
 271                                while (i + 2 < posn &&
 272                                       !memcmp(data + i, "../", 3)) {
 273                                        do {
 274                                                serverlen--;
 275                                        } while (serverlen &&
 276                                                 base[serverlen - 1] != '/');
 277                                        i += 3;
 278                                }
 279                                /* If the server got removed, give up. */
 280                                okay = strchr(base, ':') - base + 3 <
 281                                       serverlen;
 282                        } else if (alt_req->http_specific) {
 283                                char *colon = strchr(data + i, ':');
 284                                char *slash = strchr(data + i, '/');
 285                                if (colon && slash && colon < data + posn &&
 286                                    slash < data + posn && colon < slash) {
 287                                        okay = 1;
 288                                }
 289                        }
 290                        /* skip "objects\n" at end */
 291                        if (okay) {
 292                                target = xmalloc(serverlen + posn - i - 6);
 293                                memcpy(target, base, serverlen);
 294                                memcpy(target + serverlen, data + i,
 295                                       posn - i - 7);
 296                                target[serverlen + posn - i - 7] = 0;
 297                                if (walker->get_verbosely)
 298                                        fprintf(stderr,
 299                                                "Also look at %s\n", target);
 300                                newalt = xmalloc(sizeof(*newalt));
 301                                newalt->next = NULL;
 302                                newalt->base = target;
 303                                newalt->got_indices = 0;
 304                                newalt->packs = NULL;
 305
 306                                while (tail->next != NULL)
 307                                        tail = tail->next;
 308                                tail->next = newalt;
 309                        }
 310                }
 311                i = posn + 1;
 312        }
 313
 314        cdata->got_alternates = 1;
 315}
 316
 317static void fetch_alternates(struct walker *walker, const char *base)
 318{
 319        struct strbuf buffer = STRBUF_INIT;
 320        char *url;
 321        struct active_request_slot *slot;
 322        struct alternates_request alt_req;
 323        struct walker_data *cdata = walker->data;
 324
 325        /*
 326         * If another request has already started fetching alternates,
 327         * wait for them to arrive and return to processing this request's
 328         * curl message
 329         */
 330#ifdef USE_CURL_MULTI
 331        while (cdata->got_alternates == 0) {
 332                step_active_slots();
 333        }
 334#endif
 335
 336        /* Nothing to do if they've already been fetched */
 337        if (cdata->got_alternates == 1)
 338                return;
 339
 340        /* Start the fetch */
 341        cdata->got_alternates = 0;
 342
 343        if (walker->get_verbosely)
 344                fprintf(stderr, "Getting alternates list for %s\n", base);
 345
 346        url = xmalloc(strlen(base) + 31);
 347        sprintf(url, "%s/objects/info/http-alternates", base);
 348
 349        /*
 350         * Use a callback to process the result, since another request
 351         * may fail and need to have alternates loaded before continuing
 352         */
 353        slot = get_active_slot();
 354        slot->callback_func = process_alternates_response;
 355        alt_req.walker = walker;
 356        slot->callback_data = &alt_req;
 357
 358        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 359        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 360        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 361
 362        alt_req.base = base;
 363        alt_req.url = url;
 364        alt_req.buffer = &buffer;
 365        alt_req.http_specific = 1;
 366        alt_req.slot = slot;
 367
 368        if (start_active_slot(slot))
 369                run_active_slot(slot);
 370        else
 371                cdata->got_alternates = -1;
 372
 373        strbuf_release(&buffer);
 374        free(url);
 375}
 376
 377static int fetch_indices(struct walker *walker, struct alt_base *repo)
 378{
 379        int ret;
 380
 381        if (repo->got_indices)
 382                return 0;
 383
 384        if (walker->get_verbosely)
 385                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 386
 387        switch (http_get_info_packs(repo->base, &repo->packs)) {
 388        case HTTP_OK:
 389        case HTTP_MISSING_TARGET:
 390                repo->got_indices = 1;
 391                ret = 0;
 392                break;
 393        default:
 394                repo->got_indices = 0;
 395                ret = -1;
 396        }
 397
 398        return ret;
 399}
 400
 401static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 402{
 403        struct packed_git *target;
 404        int ret;
 405        struct slot_results results;
 406        struct http_pack_request *preq;
 407
 408        if (fetch_indices(walker, repo))
 409                return -1;
 410        target = find_sha1_pack(sha1, repo->packs);
 411        if (!target)
 412                return -1;
 413
 414        if (walker->get_verbosely) {
 415                fprintf(stderr, "Getting pack %s\n",
 416                        sha1_to_hex(target->sha1));
 417                fprintf(stderr, " which contains %s\n",
 418                        sha1_to_hex(sha1));
 419        }
 420
 421        preq = new_http_pack_request(target, repo->base);
 422        if (preq == NULL)
 423                goto abort;
 424        preq->lst = &repo->packs;
 425        preq->slot->results = &results;
 426
 427        if (start_active_slot(preq->slot)) {
 428                run_active_slot(preq->slot);
 429                if (results.curl_result != CURLE_OK) {
 430                        error("Unable to get pack file %s\n%s", preq->url,
 431                              curl_errorstr);
 432                        goto abort;
 433                }
 434        } else {
 435                error("Unable to start request");
 436                goto abort;
 437        }
 438
 439        ret = finish_http_pack_request(preq);
 440        release_http_pack_request(preq);
 441        if (ret)
 442                return ret;
 443
 444        return 0;
 445
 446abort:
 447        return -1;
 448}
 449
 450static void abort_object_request(struct object_request *obj_req)
 451{
 452        release_object_request(obj_req);
 453}
 454
 455static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 456{
 457        char *hex = sha1_to_hex(sha1);
 458        int ret = 0;
 459        struct object_request *obj_req = object_queue_head;
 460        struct http_object_request *req;
 461
 462        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
 463                obj_req = obj_req->next;
 464        if (obj_req == NULL)
 465                return error("Couldn't find request for %s in the queue", hex);
 466
 467        if (has_sha1_file(obj_req->sha1)) {
 468                if (obj_req->req != NULL)
 469                        abort_http_object_request(obj_req->req);
 470                abort_object_request(obj_req);
 471                return 0;
 472        }
 473
 474#ifdef USE_CURL_MULTI
 475        while (obj_req->state == WAITING)
 476                step_active_slots();
 477#else
 478        start_object_request(walker, obj_req);
 479#endif
 480
 481        /*
 482         * obj_req->req might change when fetching alternates in the callback
 483         * process_object_response; therefore, the "shortcut" variable, req,
 484         * is used only after we're done with slots.
 485         */
 486        while (obj_req->state == ACTIVE)
 487                run_active_slot(obj_req->req->slot);
 488
 489        req = obj_req->req;
 490
 491        if (req->localfile != -1) {
 492                close(req->localfile);
 493                req->localfile = -1;
 494        }
 495
 496        if (obj_req->state == ABORTED) {
 497                ret = error("Request for %s aborted", hex);
 498        } else if (req->curl_result != CURLE_OK &&
 499                   req->http_code != 416) {
 500                if (missing_target(req))
 501                        ret = -1; /* Be silent, it is probably in a pack. */
 502                else
 503                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
 504                                    req->errorstr, req->curl_result,
 505                                    req->http_code, hex);
 506        } else if (req->zret != Z_STREAM_END) {
 507                walker->corrupt_object_found++;
 508                ret = error("File %s (%s) corrupt", hex, req->url);
 509        } else if (hashcmp(obj_req->sha1, req->real_sha1)) {
 510                ret = error("File %s has bad hash", hex);
 511        } else if (req->rename < 0) {
 512                ret = error("unable to write sha1 filename %s",
 513                            sha1_file_name(req->sha1));
 514        }
 515
 516        release_http_object_request(req);
 517        release_object_request(obj_req);
 518        return ret;
 519}
 520
 521static int fetch(struct walker *walker, unsigned char *sha1)
 522{
 523        struct walker_data *data = walker->data;
 524        struct alt_base *altbase = data->alt;
 525
 526        if (!fetch_object(walker, altbase, sha1))
 527                return 0;
 528        while (altbase) {
 529                if (!fetch_pack(walker, altbase, sha1))
 530                        return 0;
 531                fetch_alternates(walker, data->alt->base);
 532                altbase = altbase->next;
 533        }
 534        return error("Unable to find %s under %s", sha1_to_hex(sha1),
 535                     data->alt->base);
 536}
 537
 538static int fetch_ref(struct walker *walker, struct ref *ref)
 539{
 540        struct walker_data *data = walker->data;
 541        return http_fetch_ref(data->alt->base, ref);
 542}
 543
 544static void cleanup(struct walker *walker)
 545{
 546        struct walker_data *data = walker->data;
 547        struct alt_base *alt, *alt_next;
 548
 549        if (data) {
 550                alt = data->alt;
 551                while (alt) {
 552                        alt_next = alt->next;
 553
 554                        free(alt->base);
 555                        free(alt);
 556
 557                        alt = alt_next;
 558                }
 559                free(data);
 560                walker->data = NULL;
 561        }
 562}
 563
 564struct walker *get_http_walker(const char *url)
 565{
 566        char *s;
 567        struct walker_data *data = xmalloc(sizeof(struct walker_data));
 568        struct walker *walker = xmalloc(sizeof(struct walker));
 569
 570        data->alt = xmalloc(sizeof(*data->alt));
 571        data->alt->base = xmalloc(strlen(url) + 1);
 572        strcpy(data->alt->base, url);
 573        for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
 574                *s = 0;
 575
 576        data->alt->got_indices = 0;
 577        data->alt->packs = NULL;
 578        data->alt->next = NULL;
 579        data->got_alternates = -1;
 580
 581        walker->corrupt_object_found = 0;
 582        walker->fetch = fetch;
 583        walker->fetch_ref = fetch_ref;
 584        walker->prefetch = prefetch;
 585        walker->cleanup = cleanup;
 586        walker->data = data;
 587
 588#ifdef USE_CURL_MULTI
 589        add_fill_function(walker, (int (*)(void *)) fill_active_slot);
 590#endif
 591
 592        return walker;
 593}