http-walker.con commit commit: allow associating auxiliary info on-demand (96c4f4a)
   1#include "cache.h"
   2#include "commit.h"
   3#include "walker.h"
   4#include "http.h"
   5
   6struct alt_base {
   7        char *base;
   8        int got_indices;
   9        struct packed_git *packs;
  10        struct alt_base *next;
  11};
  12
  13enum object_request_state {
  14        WAITING,
  15        ABORTED,
  16        ACTIVE,
  17        COMPLETE
  18};
  19
  20struct object_request {
  21        struct walker *walker;
  22        unsigned char sha1[20];
  23        struct alt_base *repo;
  24        enum object_request_state state;
  25        struct http_object_request *req;
  26        struct object_request *next;
  27};
  28
  29struct alternates_request {
  30        struct walker *walker;
  31        const char *base;
  32        char *url;
  33        struct strbuf *buffer;
  34        struct active_request_slot *slot;
  35        int http_specific;
  36};
  37
  38struct walker_data {
  39        const char *url;
  40        int got_alternates;
  41        struct alt_base *alt;
  42};
  43
  44static struct object_request *object_queue_head;
  45
  46static void fetch_alternates(struct walker *walker, const char *base);
  47
  48static void process_object_response(void *callback_data);
  49
  50static void start_object_request(struct walker *walker,
  51                                 struct object_request *obj_req)
  52{
  53        struct active_request_slot *slot;
  54        struct http_object_request *req;
  55
  56        req = new_http_object_request(obj_req->repo->base, obj_req->sha1);
  57        if (req == NULL) {
  58                obj_req->state = ABORTED;
  59                return;
  60        }
  61        obj_req->req = req;
  62
  63        slot = req->slot;
  64        slot->callback_func = process_object_response;
  65        slot->callback_data = obj_req;
  66
  67        /* Try to get the request started, abort the request on error */
  68        obj_req->state = ACTIVE;
  69        if (!start_active_slot(slot)) {
  70                obj_req->state = ABORTED;
  71                release_http_object_request(req);
  72                return;
  73        }
  74}
  75
  76static void finish_object_request(struct object_request *obj_req)
  77{
  78        if (finish_http_object_request(obj_req->req))
  79                return;
  80
  81        if (obj_req->req->rename == 0)
  82                walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
  83}
  84
  85static void process_object_response(void *callback_data)
  86{
  87        struct object_request *obj_req =
  88                (struct object_request *)callback_data;
  89        struct walker *walker = obj_req->walker;
  90        struct walker_data *data = walker->data;
  91        struct alt_base *alt = data->alt;
  92
  93        process_http_object_request(obj_req->req);
  94        obj_req->state = COMPLETE;
  95
  96        /* Use alternates if necessary */
  97        if (missing_target(obj_req->req)) {
  98                fetch_alternates(walker, alt->base);
  99                if (obj_req->repo->next != NULL) {
 100                        obj_req->repo =
 101                                obj_req->repo->next;
 102                        release_http_object_request(obj_req->req);
 103                        start_object_request(walker, obj_req);
 104                        return;
 105                }
 106        }
 107
 108        finish_object_request(obj_req);
 109}
 110
 111static void release_object_request(struct object_request *obj_req)
 112{
 113        struct object_request *entry = object_queue_head;
 114
 115        if (obj_req->req !=NULL && obj_req->req->localfile != -1)
 116                error("fd leakage in release: %d", obj_req->req->localfile);
 117        if (obj_req == object_queue_head) {
 118                object_queue_head = obj_req->next;
 119        } else {
 120                while (entry->next != NULL && entry->next != obj_req)
 121                        entry = entry->next;
 122                if (entry->next == obj_req)
 123                        entry->next = entry->next->next;
 124        }
 125
 126        free(obj_req);
 127}
 128
 129#ifdef USE_CURL_MULTI
 130static int fill_active_slot(struct walker *walker)
 131{
 132        struct object_request *obj_req;
 133
 134        for (obj_req = object_queue_head; obj_req; obj_req = obj_req->next) {
 135                if (obj_req->state == WAITING) {
 136                        if (has_sha1_file(obj_req->sha1))
 137                                obj_req->state = COMPLETE;
 138                        else {
 139                                start_object_request(walker, obj_req);
 140                                return 1;
 141                        }
 142                }
 143        }
 144        return 0;
 145}
 146#endif
 147
 148static void prefetch(struct walker *walker, unsigned char *sha1)
 149{
 150        struct object_request *newreq;
 151        struct object_request *tail;
 152        struct walker_data *data = walker->data;
 153
 154        newreq = xmalloc(sizeof(*newreq));
 155        newreq->walker = walker;
 156        hashcpy(newreq->sha1, sha1);
 157        newreq->repo = data->alt;
 158        newreq->state = WAITING;
 159        newreq->req = NULL;
 160        newreq->next = NULL;
 161
 162        http_is_verbose = walker->get_verbosely;
 163
 164        if (object_queue_head == NULL) {
 165                object_queue_head = newreq;
 166        } else {
 167                tail = object_queue_head;
 168                while (tail->next != NULL)
 169                        tail = tail->next;
 170                tail->next = newreq;
 171        }
 172
 173#ifdef USE_CURL_MULTI
 174        fill_active_slots();
 175        step_active_slots();
 176#endif
 177}
 178
 179static void process_alternates_response(void *callback_data)
 180{
 181        struct alternates_request *alt_req =
 182                (struct alternates_request *)callback_data;
 183        struct walker *walker = alt_req->walker;
 184        struct walker_data *cdata = walker->data;
 185        struct active_request_slot *slot = alt_req->slot;
 186        struct alt_base *tail = cdata->alt;
 187        const char *base = alt_req->base;
 188        const char null_byte = '\0';
 189        char *data;
 190        int i = 0;
 191
 192        if (alt_req->http_specific) {
 193                if (slot->curl_result != CURLE_OK ||
 194                    !alt_req->buffer->len) {
 195
 196                        /* Try reusing the slot to get non-http alternates */
 197                        alt_req->http_specific = 0;
 198                        sprintf(alt_req->url, "%s/objects/info/alternates",
 199                                base);
 200                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 201                                         alt_req->url);
 202                        active_requests++;
 203                        slot->in_use = 1;
 204                        if (slot->finished != NULL)
 205                                (*slot->finished) = 0;
 206                        if (!start_active_slot(slot)) {
 207                                cdata->got_alternates = -1;
 208                                slot->in_use = 0;
 209                                if (slot->finished != NULL)
 210                                        (*slot->finished) = 1;
 211                        }
 212                        return;
 213                }
 214        } else if (slot->curl_result != CURLE_OK) {
 215                if (!missing_target(slot)) {
 216                        cdata->got_alternates = -1;
 217                        return;
 218                }
 219        }
 220
 221        fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
 222        alt_req->buffer->len--;
 223        data = alt_req->buffer->buf;
 224
 225        while (i < alt_req->buffer->len) {
 226                int posn = i;
 227                while (posn < alt_req->buffer->len && data[posn] != '\n')
 228                        posn++;
 229                if (data[posn] == '\n') {
 230                        int okay = 0;
 231                        int serverlen = 0;
 232                        struct alt_base *newalt;
 233                        char *target = NULL;
 234                        if (data[i] == '/') {
 235                                /*
 236                                 * This counts
 237                                 * http://git.host/pub/scm/linux.git/
 238                                 * -----------here^
 239                                 * so memcpy(dst, base, serverlen) will
 240                                 * copy up to "...git.host".
 241                                 */
 242                                const char *colon_ss = strstr(base,"://");
 243                                if (colon_ss) {
 244                                        serverlen = (strchr(colon_ss + 3, '/')
 245                                                     - base);
 246                                        okay = 1;
 247                                }
 248                        } else if (!memcmp(data + i, "../", 3)) {
 249                                /*
 250                                 * Relative URL; chop the corresponding
 251                                 * number of subpath from base (and ../
 252                                 * from data), and concatenate the result.
 253                                 *
 254                                 * The code first drops ../ from data, and
 255                                 * then drops one ../ from data and one path
 256                                 * from base.  IOW, one extra ../ is dropped
 257                                 * from data than path is dropped from base.
 258                                 *
 259                                 * This is not wrong.  The alternate in
 260                                 *     http://git.host/pub/scm/linux.git/
 261                                 * to borrow from
 262                                 *     http://git.host/pub/scm/linus.git/
 263                                 * is ../../linus.git/objects/.  You need
 264                                 * two ../../ to borrow from your direct
 265                                 * neighbour.
 266                                 */
 267                                i += 3;
 268                                serverlen = strlen(base);
 269                                while (i + 2 < posn &&
 270                                       !memcmp(data + i, "../", 3)) {
 271                                        do {
 272                                                serverlen--;
 273                                        } while (serverlen &&
 274                                                 base[serverlen - 1] != '/');
 275                                        i += 3;
 276                                }
 277                                /* If the server got removed, give up. */
 278                                okay = strchr(base, ':') - base + 3 <
 279                                       serverlen;
 280                        } else if (alt_req->http_specific) {
 281                                char *colon = strchr(data + i, ':');
 282                                char *slash = strchr(data + i, '/');
 283                                if (colon && slash && colon < data + posn &&
 284                                    slash < data + posn && colon < slash) {
 285                                        okay = 1;
 286                                }
 287                        }
 288                        /* skip "objects\n" at end */
 289                        if (okay) {
 290                                target = xmalloc(serverlen + posn - i - 6);
 291                                memcpy(target, base, serverlen);
 292                                memcpy(target + serverlen, data + i,
 293                                       posn - i - 7);
 294                                target[serverlen + posn - i - 7] = 0;
 295                                if (walker->get_verbosely)
 296                                        fprintf(stderr,
 297                                                "Also look at %s\n", target);
 298                                newalt = xmalloc(sizeof(*newalt));
 299                                newalt->next = NULL;
 300                                newalt->base = target;
 301                                newalt->got_indices = 0;
 302                                newalt->packs = NULL;
 303
 304                                while (tail->next != NULL)
 305                                        tail = tail->next;
 306                                tail->next = newalt;
 307                        }
 308                }
 309                i = posn + 1;
 310        }
 311
 312        cdata->got_alternates = 1;
 313}
 314
 315static void fetch_alternates(struct walker *walker, const char *base)
 316{
 317        struct strbuf buffer = STRBUF_INIT;
 318        char *url;
 319        struct active_request_slot *slot;
 320        struct alternates_request alt_req;
 321        struct walker_data *cdata = walker->data;
 322
 323        /*
 324         * If another request has already started fetching alternates,
 325         * wait for them to arrive and return to processing this request's
 326         * curl message
 327         */
 328#ifdef USE_CURL_MULTI
 329        while (cdata->got_alternates == 0) {
 330                step_active_slots();
 331        }
 332#endif
 333
 334        /* Nothing to do if they've already been fetched */
 335        if (cdata->got_alternates == 1)
 336                return;
 337
 338        /* Start the fetch */
 339        cdata->got_alternates = 0;
 340
 341        if (walker->get_verbosely)
 342                fprintf(stderr, "Getting alternates list for %s\n", base);
 343
 344        url = xmalloc(strlen(base) + 31);
 345        sprintf(url, "%s/objects/info/http-alternates", base);
 346
 347        /*
 348         * Use a callback to process the result, since another request
 349         * may fail and need to have alternates loaded before continuing
 350         */
 351        slot = get_active_slot();
 352        slot->callback_func = process_alternates_response;
 353        alt_req.walker = walker;
 354        slot->callback_data = &alt_req;
 355
 356        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 357        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 358        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 359
 360        alt_req.base = base;
 361        alt_req.url = url;
 362        alt_req.buffer = &buffer;
 363        alt_req.http_specific = 1;
 364        alt_req.slot = slot;
 365
 366        if (start_active_slot(slot))
 367                run_active_slot(slot);
 368        else
 369                cdata->got_alternates = -1;
 370
 371        strbuf_release(&buffer);
 372        free(url);
 373}
 374
 375static int fetch_indices(struct walker *walker, struct alt_base *repo)
 376{
 377        int ret;
 378
 379        if (repo->got_indices)
 380                return 0;
 381
 382        if (walker->get_verbosely)
 383                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 384
 385        switch (http_get_info_packs(repo->base, &repo->packs)) {
 386        case HTTP_OK:
 387        case HTTP_MISSING_TARGET:
 388                repo->got_indices = 1;
 389                ret = 0;
 390                break;
 391        default:
 392                repo->got_indices = 0;
 393                ret = -1;
 394        }
 395
 396        return ret;
 397}
 398
 399static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 400{
 401        struct packed_git *target;
 402        int ret;
 403        struct slot_results results;
 404        struct http_pack_request *preq;
 405
 406        if (fetch_indices(walker, repo))
 407                return -1;
 408        target = find_sha1_pack(sha1, repo->packs);
 409        if (!target)
 410                return -1;
 411
 412        if (walker->get_verbosely) {
 413                fprintf(stderr, "Getting pack %s\n",
 414                        sha1_to_hex(target->sha1));
 415                fprintf(stderr, " which contains %s\n",
 416                        sha1_to_hex(sha1));
 417        }
 418
 419        preq = new_http_pack_request(target, repo->base);
 420        if (preq == NULL)
 421                goto abort;
 422        preq->lst = &repo->packs;
 423        preq->slot->results = &results;
 424
 425        if (start_active_slot(preq->slot)) {
 426                run_active_slot(preq->slot);
 427                if (results.curl_result != CURLE_OK) {
 428                        error("Unable to get pack file %s\n%s", preq->url,
 429                              curl_errorstr);
 430                        goto abort;
 431                }
 432        } else {
 433                error("Unable to start request");
 434                goto abort;
 435        }
 436
 437        ret = finish_http_pack_request(preq);
 438        release_http_pack_request(preq);
 439        if (ret)
 440                return ret;
 441
 442        return 0;
 443
 444abort:
 445        return -1;
 446}
 447
 448static void abort_object_request(struct object_request *obj_req)
 449{
 450        release_object_request(obj_req);
 451}
 452
 453static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
 454{
 455        char *hex = sha1_to_hex(sha1);
 456        int ret = 0;
 457        struct object_request *obj_req = object_queue_head;
 458        struct http_object_request *req;
 459
 460        while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
 461                obj_req = obj_req->next;
 462        if (obj_req == NULL)
 463                return error("Couldn't find request for %s in the queue", hex);
 464
 465        if (has_sha1_file(obj_req->sha1)) {
 466                if (obj_req->req != NULL)
 467                        abort_http_object_request(obj_req->req);
 468                abort_object_request(obj_req);
 469                return 0;
 470        }
 471
 472#ifdef USE_CURL_MULTI
 473        while (obj_req->state == WAITING)
 474                step_active_slots();
 475#else
 476        start_object_request(walker, obj_req);
 477#endif
 478
 479        /*
 480         * obj_req->req might change when fetching alternates in the callback
 481         * process_object_response; therefore, the "shortcut" variable, req,
 482         * is used only after we're done with slots.
 483         */
 484        while (obj_req->state == ACTIVE)
 485                run_active_slot(obj_req->req->slot);
 486
 487        req = obj_req->req;
 488
 489        if (req->localfile != -1) {
 490                close(req->localfile);
 491                req->localfile = -1;
 492        }
 493
 494        if (obj_req->state == ABORTED) {
 495                ret = error("Request for %s aborted", hex);
 496        } else if (req->curl_result != CURLE_OK &&
 497                   req->http_code != 416) {
 498                if (missing_target(req))
 499                        ret = -1; /* Be silent, it is probably in a pack. */
 500                else
 501                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
 502                                    req->errorstr, req->curl_result,
 503                                    req->http_code, hex);
 504        } else if (req->zret != Z_STREAM_END) {
 505                walker->corrupt_object_found++;
 506                ret = error("File %s (%s) corrupt", hex, req->url);
 507        } else if (hashcmp(obj_req->sha1, req->real_sha1)) {
 508                ret = error("File %s has bad hash", hex);
 509        } else if (req->rename < 0) {
 510                ret = error("unable to write sha1 filename %s",
 511                            sha1_file_name(req->sha1));
 512        }
 513
 514        release_http_object_request(req);
 515        release_object_request(obj_req);
 516        return ret;
 517}
 518
 519static int fetch(struct walker *walker, unsigned char *sha1)
 520{
 521        struct walker_data *data = walker->data;
 522        struct alt_base *altbase = data->alt;
 523
 524        if (!fetch_object(walker, altbase, sha1))
 525                return 0;
 526        while (altbase) {
 527                if (!http_fetch_pack(walker, altbase, sha1))
 528                        return 0;
 529                fetch_alternates(walker, data->alt->base);
 530                altbase = altbase->next;
 531        }
 532        return error("Unable to find %s under %s", sha1_to_hex(sha1),
 533                     data->alt->base);
 534}
 535
 536static int fetch_ref(struct walker *walker, struct ref *ref)
 537{
 538        struct walker_data *data = walker->data;
 539        return http_fetch_ref(data->alt->base, ref);
 540}
 541
 542static void cleanup(struct walker *walker)
 543{
 544        struct walker_data *data = walker->data;
 545        struct alt_base *alt, *alt_next;
 546
 547        if (data) {
 548                alt = data->alt;
 549                while (alt) {
 550                        alt_next = alt->next;
 551
 552                        free(alt->base);
 553                        free(alt);
 554
 555                        alt = alt_next;
 556                }
 557                free(data);
 558                walker->data = NULL;
 559        }
 560}
 561
 562struct walker *get_http_walker(const char *url)
 563{
 564        char *s;
 565        struct walker_data *data = xmalloc(sizeof(struct walker_data));
 566        struct walker *walker = xmalloc(sizeof(struct walker));
 567
 568        data->alt = xmalloc(sizeof(*data->alt));
 569        data->alt->base = xmalloc(strlen(url) + 1);
 570        strcpy(data->alt->base, url);
 571        for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
 572                *s = 0;
 573
 574        data->alt->got_indices = 0;
 575        data->alt->packs = NULL;
 576        data->alt->next = NULL;
 577        data->got_alternates = -1;
 578
 579        walker->corrupt_object_found = 0;
 580        walker->fetch = fetch;
 581        walker->fetch_ref = fetch_ref;
 582        walker->prefetch = prefetch;
 583        walker->cleanup = cleanup;
 584        walker->data = data;
 585
 586#ifdef USE_CURL_MULTI
 587        add_fill_function(walker, (int (*)(void *)) fill_active_slot);
 588#endif
 589
 590        return walker;
 591}