http-fetch.con commit Added Packing Heursitics IRC writeup. (b116b29)
   1#include "cache.h"
   2#include "commit.h"
   3#include "pack.h"
   4#include "fetch.h"
   5#include "http.h"
   6
   7#define PREV_BUF_SIZE 4096
   8#define RANGE_HEADER_SIZE 30
   9
  10static int got_alternates = -1;
  11static int corrupt_object_found = 0;
  12
  13static struct curl_slist *no_pragma_header;
  14
  15struct alt_base
  16{
  17        char *base;
  18        int got_indices;
  19        struct packed_git *packs;
  20        struct alt_base *next;
  21};
  22
  23static struct alt_base *alt = NULL;
  24
  25enum object_request_state {
  26        WAITING,
  27        ABORTED,
  28        ACTIVE,
  29        COMPLETE,
  30};
  31
  32struct object_request
  33{
  34        unsigned char sha1[20];
  35        struct alt_base *repo;
  36        char *url;
  37        char filename[PATH_MAX];
  38        char tmpfile[PATH_MAX];
  39        int local;
  40        enum object_request_state state;
  41        CURLcode curl_result;
  42        char errorstr[CURL_ERROR_SIZE];
  43        long http_code;
  44        unsigned char real_sha1[20];
  45        SHA_CTX c;
  46        z_stream stream;
  47        int zret;
  48        int rename;
  49        struct active_request_slot *slot;
  50        struct object_request *next;
  51};
  52
  53struct alternates_request {
  54        char *base;
  55        char *url;
  56        struct buffer *buffer;
  57        struct active_request_slot *slot;
  58        int http_specific;
  59};
  60
  61static struct object_request *object_queue_head = NULL;
  62
  63static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
  64                               void *data)
  65{
  66        unsigned char expn[4096];
  67        size_t size = eltsize * nmemb;
  68        int posn = 0;
  69        struct object_request *obj_req = (struct object_request *)data;
  70        do {
  71                ssize_t retval = write(obj_req->local,
  72                                       ptr + posn, size - posn);
  73                if (retval < 0)
  74                        return posn;
  75                posn += retval;
  76        } while (posn < size);
  77
  78        obj_req->stream.avail_in = size;
  79        obj_req->stream.next_in = ptr;
  80        do {
  81                obj_req->stream.next_out = expn;
  82                obj_req->stream.avail_out = sizeof(expn);
  83                obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
  84                SHA1_Update(&obj_req->c, expn,
  85                            sizeof(expn) - obj_req->stream.avail_out);
  86        } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
  87        data_received++;
  88        return size;
  89}
  90
  91static void fetch_alternates(char *base);
  92
  93static void process_object_response(void *callback_data);
  94
  95static void start_object_request(struct object_request *obj_req)
  96{
  97        char *hex = sha1_to_hex(obj_req->sha1);
  98        char prevfile[PATH_MAX];
  99        char *url;
 100        char *posn;
 101        int prevlocal;
 102        unsigned char prev_buf[PREV_BUF_SIZE];
 103        ssize_t prev_read = 0;
 104        long prev_posn = 0;
 105        char range[RANGE_HEADER_SIZE];
 106        struct curl_slist *range_header = NULL;
 107        struct active_request_slot *slot;
 108
 109        snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 110        unlink(prevfile);
 111        rename(obj_req->tmpfile, prevfile);
 112        unlink(obj_req->tmpfile);
 113
 114        if (obj_req->local != -1)
 115                error("fd leakage in start: %d", obj_req->local);
 116        obj_req->local = open(obj_req->tmpfile,
 117                              O_WRONLY | O_CREAT | O_EXCL, 0666);
 118        /* This could have failed due to the "lazy directory creation";
 119         * try to mkdir the last path component.
 120         */
 121        if (obj_req->local < 0 && errno == ENOENT) {
 122                char *dir = strrchr(obj_req->tmpfile, '/');
 123                if (dir) {
 124                        *dir = 0;
 125                        mkdir(obj_req->tmpfile, 0777);
 126                        *dir = '/';
 127                }
 128                obj_req->local = open(obj_req->tmpfile,
 129                                      O_WRONLY | O_CREAT | O_EXCL, 0666);
 130        }
 131
 132        if (obj_req->local < 0) {
 133                obj_req->state = ABORTED;
 134                error("Couldn't create temporary file %s for %s: %s",
 135                      obj_req->tmpfile, obj_req->filename, strerror(errno));
 136                return;
 137        }
 138
 139        memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 140
 141        inflateInit(&obj_req->stream);
 142
 143        SHA1_Init(&obj_req->c);
 144
 145        url = xmalloc(strlen(obj_req->repo->base) + 50);
 146        obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
 147        strcpy(url, obj_req->repo->base);
 148        posn = url + strlen(obj_req->repo->base);
 149        strcpy(posn, "objects/");
 150        posn += 8;
 151        memcpy(posn, hex, 2);
 152        posn += 2;
 153        *(posn++) = '/';
 154        strcpy(posn, hex + 2);
 155        strcpy(obj_req->url, url);
 156
 157        /* If a previous temp file is present, process what was already
 158           fetched. */
 159        prevlocal = open(prevfile, O_RDONLY);
 160        if (prevlocal != -1) {
 161                do {
 162                        prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
 163                        if (prev_read>0) {
 164                                if (fwrite_sha1_file(prev_buf,
 165                                                     1,
 166                                                     prev_read,
 167                                                     obj_req) == prev_read) {
 168                                        prev_posn += prev_read;
 169                                } else {
 170                                        prev_read = -1;
 171                                }
 172                        }
 173                } while (prev_read > 0);
 174                close(prevlocal);
 175        }
 176        unlink(prevfile);
 177
 178        /* Reset inflate/SHA1 if there was an error reading the previous temp
 179           file; also rewind to the beginning of the local file. */
 180        if (prev_read == -1) {
 181                memset(&obj_req->stream, 0, sizeof(obj_req->stream));
 182                inflateInit(&obj_req->stream);
 183                SHA1_Init(&obj_req->c);
 184                if (prev_posn>0) {
 185                        prev_posn = 0;
 186                        lseek(obj_req->local, SEEK_SET, 0);
 187                        ftruncate(obj_req->local, 0);
 188                }
 189        }
 190
 191        slot = get_active_slot();
 192        slot->callback_func = process_object_response;
 193        slot->callback_data = obj_req;
 194        obj_req->slot = slot;
 195
 196        curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
 197        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 198        curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
 199        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 200        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 201
 202        /* If we have successfully processed data from a previous fetch
 203           attempt, only fetch the data we don't already have. */
 204        if (prev_posn>0) {
 205                if (get_verbosely)
 206                        fprintf(stderr,
 207                                "Resuming fetch of object %s at byte %ld\n",
 208                                hex, prev_posn);
 209                sprintf(range, "Range: bytes=%ld-", prev_posn);
 210                range_header = curl_slist_append(range_header, range);
 211                curl_easy_setopt(slot->curl,
 212                                 CURLOPT_HTTPHEADER, range_header);
 213        }
 214
 215        /* Try to get the request started, abort the request on error */
 216        obj_req->state = ACTIVE;
 217        if (!start_active_slot(slot)) {
 218                obj_req->state = ABORTED;
 219                obj_req->slot = NULL;
 220                close(obj_req->local); obj_req->local = -1;
 221                free(obj_req->url);
 222                return;
 223        }
 224}
 225
 226static void finish_object_request(struct object_request *obj_req)
 227{
 228        struct stat st;
 229
 230        fchmod(obj_req->local, 0444);
 231        close(obj_req->local); obj_req->local = -1;
 232
 233        if (obj_req->http_code == 416) {
 234                fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
 235        } else if (obj_req->curl_result != CURLE_OK) {
 236                if (stat(obj_req->tmpfile, &st) == 0)
 237                        if (st.st_size == 0)
 238                                unlink(obj_req->tmpfile);
 239                return;
 240        }
 241
 242        inflateEnd(&obj_req->stream);
 243        SHA1_Final(obj_req->real_sha1, &obj_req->c);
 244        if (obj_req->zret != Z_STREAM_END) {
 245                unlink(obj_req->tmpfile);
 246                return;
 247        }
 248        if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 249                unlink(obj_req->tmpfile);
 250                return;
 251        }
 252        obj_req->rename =
 253                move_temp_to_file(obj_req->tmpfile, obj_req->filename);
 254
 255        if (obj_req->rename == 0)
 256                pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
 257}
 258
 259static void process_object_response(void *callback_data)
 260{
 261        struct object_request *obj_req =
 262                (struct object_request *)callback_data;
 263
 264        obj_req->curl_result = obj_req->slot->curl_result;
 265        obj_req->http_code = obj_req->slot->http_code;
 266        obj_req->slot = NULL;
 267        obj_req->state = COMPLETE;
 268
 269        /* Use alternates if necessary */
 270        if (obj_req->http_code == 404 ||
 271            obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 272                fetch_alternates(alt->base);
 273                if (obj_req->repo->next != NULL) {
 274                        obj_req->repo =
 275                                obj_req->repo->next;
 276                        close(obj_req->local);
 277                        obj_req->local = -1;
 278                        start_object_request(obj_req);
 279                        return;
 280                }
 281        }
 282
 283        finish_object_request(obj_req);
 284}
 285
 286static void release_object_request(struct object_request *obj_req)
 287{
 288        struct object_request *entry = object_queue_head;
 289
 290        if (obj_req->local != -1)
 291                error("fd leakage in release: %d", obj_req->local);
 292        if (obj_req == object_queue_head) {
 293                object_queue_head = obj_req->next;
 294        } else {
 295                while (entry->next != NULL && entry->next != obj_req)
 296                        entry = entry->next;
 297                if (entry->next == obj_req)
 298                        entry->next = entry->next->next;
 299        }
 300
 301        free(obj_req->url);
 302        free(obj_req);
 303}
 304
 305#ifdef USE_CURL_MULTI
 306void fill_active_slots(void)
 307{
 308        struct object_request *obj_req = object_queue_head;
 309        struct active_request_slot *slot = active_queue_head;
 310        int num_transfers;
 311
 312        while (active_requests < max_requests && obj_req != NULL) {
 313                if (obj_req->state == WAITING) {
 314                        if (has_sha1_file(obj_req->sha1))
 315                                obj_req->state = COMPLETE;
 316                        else
 317                                start_object_request(obj_req);
 318                        curl_multi_perform(curlm, &num_transfers);
 319                }
 320                obj_req = obj_req->next;
 321        }
 322
 323        while (slot != NULL) {
 324                if (!slot->in_use && slot->curl != NULL) {
 325                        curl_easy_cleanup(slot->curl);
 326                        slot->curl = NULL;
 327                }
 328                slot = slot->next;
 329        }
 330}
 331#endif
 332
 333void prefetch(unsigned char *sha1)
 334{
 335        struct object_request *newreq;
 336        struct object_request *tail;
 337        char *filename = sha1_file_name(sha1);
 338
 339        newreq = xmalloc(sizeof(*newreq));
 340        memcpy(newreq->sha1, sha1, 20);
 341        newreq->repo = alt;
 342        newreq->url = NULL;
 343        newreq->local = -1;
 344        newreq->state = WAITING;
 345        snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
 346        snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
 347                 "%s.temp", filename);
 348        newreq->next = NULL;
 349
 350        if (object_queue_head == NULL) {
 351                object_queue_head = newreq;
 352        } else {
 353                tail = object_queue_head;
 354                while (tail->next != NULL) {
 355                        tail = tail->next;
 356                }
 357                tail->next = newreq;
 358        }
 359
 360#ifdef USE_CURL_MULTI
 361        fill_active_slots();
 362        step_active_slots();
 363#endif
 364}
 365
 366static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 367{
 368        char *hex = sha1_to_hex(sha1);
 369        char *filename;
 370        char *url;
 371        char tmpfile[PATH_MAX];
 372        long prev_posn = 0;
 373        char range[RANGE_HEADER_SIZE];
 374        struct curl_slist *range_header = NULL;
 375
 376        FILE *indexfile;
 377        struct active_request_slot *slot;
 378        struct slot_results results;
 379
 380        if (has_pack_index(sha1))
 381                return 0;
 382
 383        if (get_verbosely)
 384                fprintf(stderr, "Getting index for pack %s\n", hex);
 385
 386        url = xmalloc(strlen(repo->base) + 64);
 387        sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
 388
 389        filename = sha1_pack_index_name(sha1);
 390        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 391        indexfile = fopen(tmpfile, "a");
 392        if (!indexfile)
 393                return error("Unable to open local file %s for pack index",
 394                             filename);
 395
 396        slot = get_active_slot();
 397        slot->results = &results;
 398        curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
 399        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 400        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 401        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 402        slot->local = indexfile;
 403
 404        /* If there is data present from a previous transfer attempt,
 405           resume where it left off */
 406        prev_posn = ftell(indexfile);
 407        if (prev_posn>0) {
 408                if (get_verbosely)
 409                        fprintf(stderr,
 410                                "Resuming fetch of index for pack %s at byte %ld\n",
 411                                hex, prev_posn);
 412                sprintf(range, "Range: bytes=%ld-", prev_posn);
 413                range_header = curl_slist_append(range_header, range);
 414                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 415        }
 416
 417        if (start_active_slot(slot)) {
 418                run_active_slot(slot);
 419                if (results.curl_result != CURLE_OK) {
 420                        fclose(indexfile);
 421                        return error("Unable to get pack index %s\n%s", url,
 422                                     curl_errorstr);
 423                }
 424        } else {
 425                fclose(indexfile);
 426                return error("Unable to start request");
 427        }
 428
 429        fclose(indexfile);
 430
 431        return move_temp_to_file(tmpfile, filename);
 432}
 433
 434static int setup_index(struct alt_base *repo, unsigned char *sha1)
 435{
 436        struct packed_git *new_pack;
 437        if (has_pack_file(sha1))
 438                return 0; // don't list this as something we can get
 439
 440        if (fetch_index(repo, sha1))
 441                return -1;
 442
 443        new_pack = parse_pack_index(sha1);
 444        new_pack->next = repo->packs;
 445        repo->packs = new_pack;
 446        return 0;
 447}
 448
 449static void process_alternates_response(void *callback_data)
 450{
 451        struct alternates_request *alt_req =
 452                (struct alternates_request *)callback_data;
 453        struct active_request_slot *slot = alt_req->slot;
 454        struct alt_base *tail = alt;
 455        char *base = alt_req->base;
 456        static const char null_byte = '\0';
 457        char *data;
 458        int i = 0;
 459
 460        if (alt_req->http_specific) {
 461                if (slot->curl_result != CURLE_OK ||
 462                    !alt_req->buffer->posn) {
 463
 464                        /* Try reusing the slot to get non-http alternates */
 465                        alt_req->http_specific = 0;
 466                        sprintf(alt_req->url, "%s/objects/info/alternates",
 467                                base);
 468                        curl_easy_setopt(slot->curl, CURLOPT_URL,
 469                                         alt_req->url);
 470                        active_requests++;
 471                        slot->in_use = 1;
 472                        if (slot->finished != NULL)
 473                                (*slot->finished) = 0;
 474                        if (!start_active_slot(slot)) {
 475                                got_alternates = -1;
 476                                slot->in_use = 0;
 477                                if (slot->finished != NULL)
 478                                        (*slot->finished) = 1;
 479                        }
 480                        return;
 481                }
 482        } else if (slot->curl_result != CURLE_OK) {
 483                if (slot->http_code != 404 &&
 484                    slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
 485                        got_alternates = -1;
 486                        return;
 487                }
 488        }
 489
 490        fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
 491        alt_req->buffer->posn--;
 492        data = alt_req->buffer->buffer;
 493
 494        while (i < alt_req->buffer->posn) {
 495                int posn = i;
 496                while (posn < alt_req->buffer->posn && data[posn] != '\n')
 497                        posn++;
 498                if (data[posn] == '\n') {
 499                        int okay = 0;
 500                        int serverlen = 0;
 501                        struct alt_base *newalt;
 502                        char *target = NULL;
 503                        if (data[i] == '/') {
 504                                serverlen = strchr(base + 8, '/') - base;
 505                                okay = 1;
 506                        } else if (!memcmp(data + i, "../", 3)) {
 507                                i += 3;
 508                                serverlen = strlen(base);
 509                                while (i + 2 < posn &&
 510                                       !memcmp(data + i, "../", 3)) {
 511                                        do {
 512                                                serverlen--;
 513                                        } while (serverlen &&
 514                                                 base[serverlen - 1] != '/');
 515                                        i += 3;
 516                                }
 517                                // If the server got removed, give up.
 518                                okay = strchr(base, ':') - base + 3 <
 519                                        serverlen;
 520                        } else if (alt_req->http_specific) {
 521                                char *colon = strchr(data + i, ':');
 522                                char *slash = strchr(data + i, '/');
 523                                if (colon && slash && colon < data + posn &&
 524                                    slash < data + posn && colon < slash) {
 525                                        okay = 1;
 526                                }
 527                        }
 528                        // skip 'objects' at end
 529                        if (okay) {
 530                                target = xmalloc(serverlen + posn - i - 6);
 531                                strncpy(target, base, serverlen);
 532                                strncpy(target + serverlen, data + i,
 533                                        posn - i - 7);
 534                                target[serverlen + posn - i - 7] = '\0';
 535                                if (get_verbosely)
 536                                        fprintf(stderr,
 537                                                "Also look at %s\n", target);
 538                                newalt = xmalloc(sizeof(*newalt));
 539                                newalt->next = NULL;
 540                                newalt->base = target;
 541                                newalt->got_indices = 0;
 542                                newalt->packs = NULL;
 543                                while (tail->next != NULL)
 544                                        tail = tail->next;
 545                                tail->next = newalt;
 546                        }
 547                }
 548                i = posn + 1;
 549        }
 550
 551        got_alternates = 1;
 552}
 553
 554static void fetch_alternates(char *base)
 555{
 556        struct buffer buffer;
 557        char *url;
 558        char *data;
 559        struct active_request_slot *slot;
 560        struct alternates_request alt_req;
 561
 562        /* If another request has already started fetching alternates,
 563           wait for them to arrive and return to processing this request's
 564           curl message */
 565#ifdef USE_CURL_MULTI
 566        while (got_alternates == 0) {
 567                step_active_slots();
 568        }
 569#endif
 570
 571        /* Nothing to do if they've already been fetched */
 572        if (got_alternates == 1)
 573                return;
 574
 575        /* Start the fetch */
 576        got_alternates = 0;
 577
 578        data = xmalloc(4096);
 579        buffer.size = 4096;
 580        buffer.posn = 0;
 581        buffer.buffer = data;
 582
 583        if (get_verbosely)
 584                fprintf(stderr, "Getting alternates list for %s\n", base);
 585
 586        url = xmalloc(strlen(base) + 31);
 587        sprintf(url, "%s/objects/info/http-alternates", base);
 588
 589        /* Use a callback to process the result, since another request
 590           may fail and need to have alternates loaded before continuing */
 591        slot = get_active_slot();
 592        slot->callback_func = process_alternates_response;
 593        slot->callback_data = &alt_req;
 594
 595        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 596        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 597        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 598
 599        alt_req.base = base;
 600        alt_req.url = url;
 601        alt_req.buffer = &buffer;
 602        alt_req.http_specific = 1;
 603        alt_req.slot = slot;
 604
 605        if (start_active_slot(slot))
 606                run_active_slot(slot);
 607        else
 608                got_alternates = -1;
 609
 610        free(data);
 611        free(url);
 612}
 613
 614static int fetch_indices(struct alt_base *repo)
 615{
 616        unsigned char sha1[20];
 617        char *url;
 618        struct buffer buffer;
 619        char *data;
 620        int i = 0;
 621
 622        struct active_request_slot *slot;
 623        struct slot_results results;
 624
 625        if (repo->got_indices)
 626                return 0;
 627
 628        data = xmalloc(4096);
 629        buffer.size = 4096;
 630        buffer.posn = 0;
 631        buffer.buffer = data;
 632
 633        if (get_verbosely)
 634                fprintf(stderr, "Getting pack list for %s\n", repo->base);
 635
 636        url = xmalloc(strlen(repo->base) + 21);
 637        sprintf(url, "%s/objects/info/packs", repo->base);
 638
 639        slot = get_active_slot();
 640        slot->results = &results;
 641        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 642        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 643        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 644        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 645        if (start_active_slot(slot)) {
 646                run_active_slot(slot);
 647                if (results.curl_result != CURLE_OK) {
 648                        if (results.http_code == 404 ||
 649                            results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
 650                                repo->got_indices = 1;
 651                                free(buffer.buffer);
 652                                return 0;
 653                        } else {
 654                                repo->got_indices = 0;
 655                                free(buffer.buffer);
 656                                return error("%s", curl_errorstr);
 657                        }
 658                }
 659        } else {
 660                repo->got_indices = 0;
 661                free(buffer.buffer);
 662                return error("Unable to start request");
 663        }
 664
 665        data = buffer.buffer;
 666        while (i < buffer.posn) {
 667                switch (data[i]) {
 668                case 'P':
 669                        i++;
 670                        if (i + 52 <= buffer.posn &&
 671                            !strncmp(data + i, " pack-", 6) &&
 672                            !strncmp(data + i + 46, ".pack\n", 6)) {
 673                                get_sha1_hex(data + i + 6, sha1);
 674                                setup_index(repo, sha1);
 675                                i += 51;
 676                                break;
 677                        }
 678                default:
 679                        while (i < buffer.posn && data[i] != '\n')
 680                                i++;
 681                }
 682                i++;
 683        }
 684
 685        free(buffer.buffer);
 686        repo->got_indices = 1;
 687        return 0;
 688}
 689
 690static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 691{
 692        char *url;
 693        struct packed_git *target;
 694        struct packed_git **lst;
 695        FILE *packfile;
 696        char *filename;
 697        char tmpfile[PATH_MAX];
 698        int ret;
 699        long prev_posn = 0;
 700        char range[RANGE_HEADER_SIZE];
 701        struct curl_slist *range_header = NULL;
 702
 703        struct active_request_slot *slot;
 704        struct slot_results results;
 705
 706        if (fetch_indices(repo))
 707                return -1;
 708        target = find_sha1_pack(sha1, repo->packs);
 709        if (!target)
 710                return -1;
 711
 712        if (get_verbosely) {
 713                fprintf(stderr, "Getting pack %s\n",
 714                        sha1_to_hex(target->sha1));
 715                fprintf(stderr, " which contains %s\n",
 716                        sha1_to_hex(sha1));
 717        }
 718
 719        url = xmalloc(strlen(repo->base) + 65);
 720        sprintf(url, "%s/objects/pack/pack-%s.pack",
 721                repo->base, sha1_to_hex(target->sha1));
 722
 723        filename = sha1_pack_name(target->sha1);
 724        snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
 725        packfile = fopen(tmpfile, "a");
 726        if (!packfile)
 727                return error("Unable to open local file %s for pack",
 728                             filename);
 729
 730        slot = get_active_slot();
 731        slot->results = &results;
 732        curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
 733        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 734        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 735        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
 736        slot->local = packfile;
 737
 738        /* If there is data present from a previous transfer attempt,
 739           resume where it left off */
 740        prev_posn = ftell(packfile);
 741        if (prev_posn>0) {
 742                if (get_verbosely)
 743                        fprintf(stderr,
 744                                "Resuming fetch of pack %s at byte %ld\n",
 745                                sha1_to_hex(target->sha1), prev_posn);
 746                sprintf(range, "Range: bytes=%ld-", prev_posn);
 747                range_header = curl_slist_append(range_header, range);
 748                curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
 749        }
 750
 751        if (start_active_slot(slot)) {
 752                run_active_slot(slot);
 753                if (results.curl_result != CURLE_OK) {
 754                        fclose(packfile);
 755                        return error("Unable to get pack file %s\n%s", url,
 756                                     curl_errorstr);
 757                }
 758        } else {
 759                fclose(packfile);
 760                return error("Unable to start request");
 761        }
 762
 763        fclose(packfile);
 764
 765        ret = move_temp_to_file(tmpfile, filename);
 766        if (ret)
 767                return ret;
 768
 769        lst = &repo->packs;
 770        while (*lst != target)
 771                lst = &((*lst)->next);
 772        *lst = (*lst)->next;
 773
 774        if (verify_pack(target, 0))
 775                return -1;
 776        install_packed_git(target);
 777
 778        return 0;
 779}
 780
 781static void abort_object_request(struct object_request *obj_req)
 782{
 783        if (obj_req->local >= 0) {
 784                close(obj_req->local);
 785                obj_req->local = -1;
 786        }
 787        unlink(obj_req->tmpfile);
 788        if (obj_req->slot) {
 789                release_active_slot(obj_req->slot);
 790                obj_req->slot = NULL;
 791        }
 792        release_object_request(obj_req);
 793}
 794
 795static int fetch_object(struct alt_base *repo, unsigned char *sha1)
 796{
 797        char *hex = sha1_to_hex(sha1);
 798        int ret = 0;
 799        struct object_request *obj_req = object_queue_head;
 800
 801        while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
 802                obj_req = obj_req->next;
 803        if (obj_req == NULL)
 804                return error("Couldn't find request for %s in the queue", hex);
 805
 806        if (has_sha1_file(obj_req->sha1)) {
 807                abort_object_request(obj_req);
 808                return 0;
 809        }
 810
 811#ifdef USE_CURL_MULTI
 812        while (obj_req->state == WAITING) {
 813                step_active_slots();
 814        }
 815#else
 816        start_object_request(obj_req);
 817#endif
 818
 819        while (obj_req->state == ACTIVE) {
 820                run_active_slot(obj_req->slot);
 821        }
 822        if (obj_req->local != -1) {
 823                close(obj_req->local); obj_req->local = -1;
 824        }
 825
 826        if (obj_req->state == ABORTED) {
 827                ret = error("Request for %s aborted", hex);
 828        } else if (obj_req->curl_result != CURLE_OK &&
 829                   obj_req->http_code != 416) {
 830                if (obj_req->http_code == 404 ||
 831                    obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
 832                        ret = -1; /* Be silent, it is probably in a pack. */
 833                else
 834                        ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
 835                                    obj_req->errorstr, obj_req->curl_result,
 836                                    obj_req->http_code, hex);
 837        } else if (obj_req->zret != Z_STREAM_END) {
 838                corrupt_object_found++;
 839                ret = error("File %s (%s) corrupt", hex, obj_req->url);
 840        } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
 841                ret = error("File %s has bad hash", hex);
 842        } else if (obj_req->rename < 0) {
 843                ret = error("unable to write sha1 filename %s",
 844                            obj_req->filename);
 845        }
 846
 847        release_object_request(obj_req);
 848        return ret;
 849}
 850
 851int fetch(unsigned char *sha1)
 852{
 853        struct alt_base *altbase = alt;
 854
 855        if (!fetch_object(altbase, sha1))
 856                return 0;
 857        while (altbase) {
 858                if (!fetch_pack(altbase, sha1))
 859                        return 0;
 860                fetch_alternates(alt->base);
 861                altbase = altbase->next;
 862        }
 863        return error("Unable to find %s under %s", sha1_to_hex(sha1),
 864                     alt->base);
 865}
 866
 867static inline int needs_quote(int ch)
 868{
 869        switch (ch) {
 870        case '/': case '-': case '.':
 871        case 'A'...'Z': case 'a'...'z': case '0'...'9':
 872                return 0;
 873        default:
 874                return 1;
 875        }
 876}
 877
 878static inline int hex(int v)
 879{
 880        if (v < 10) return '0' + v;
 881        else return 'A' + v - 10;
 882}
 883
 884static char *quote_ref_url(const char *base, const char *ref)
 885{
 886        const char *cp;
 887        char *dp, *qref;
 888        int len, baselen, ch;
 889
 890        baselen = strlen(base);
 891        len = baselen + 6; /* "refs/" + NUL */
 892        for (cp = ref; (ch = *cp) != 0; cp++, len++)
 893                if (needs_quote(ch))
 894                        len += 2; /* extra two hex plus replacement % */
 895        qref = xmalloc(len);
 896        memcpy(qref, base, baselen);
 897        memcpy(qref + baselen, "refs/", 5);
 898        for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
 899                if (needs_quote(ch)) {
 900                        *dp++ = '%';
 901                        *dp++ = hex((ch >> 4) & 0xF);
 902                        *dp++ = hex(ch & 0xF);
 903                }
 904                else
 905                        *dp++ = ch;
 906        }
 907        *dp = 0;
 908
 909        return qref;
 910}
 911
 912int fetch_ref(char *ref, unsigned char *sha1)
 913{
 914        char *url;
 915        char hex[42];
 916        struct buffer buffer;
 917        char *base = alt->base;
 918        struct active_request_slot *slot;
 919        struct slot_results results;
 920        buffer.size = 41;
 921        buffer.posn = 0;
 922        buffer.buffer = hex;
 923        hex[41] = '\0';
 924
 925        url = quote_ref_url(base, ref);
 926        slot = get_active_slot();
 927        slot->results = &results;
 928        curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
 929        curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 930        curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
 931        curl_easy_setopt(slot->curl, CURLOPT_URL, url);
 932        if (start_active_slot(slot)) {
 933                run_active_slot(slot);
 934                if (results.curl_result != CURLE_OK)
 935                        return error("Couldn't get %s for %s\n%s",
 936                                     url, ref, curl_errorstr);
 937        } else {
 938                return error("Unable to start request");
 939        }
 940
 941        hex[40] = '\0';
 942        get_sha1_hex(hex, sha1);
 943        return 0;
 944}
 945
 946int main(int argc, char **argv)
 947{
 948        char *commit_id;
 949        char *url;
 950        int arg = 1;
 951        int rc = 0;
 952
 953        setup_git_directory();
 954
 955        while (arg < argc && argv[arg][0] == '-') {
 956                if (argv[arg][1] == 't') {
 957                        get_tree = 1;
 958                } else if (argv[arg][1] == 'c') {
 959                        get_history = 1;
 960                } else if (argv[arg][1] == 'a') {
 961                        get_all = 1;
 962                        get_tree = 1;
 963                        get_history = 1;
 964                } else if (argv[arg][1] == 'v') {
 965                        get_verbosely = 1;
 966                } else if (argv[arg][1] == 'w') {
 967                        write_ref = argv[arg + 1];
 968                        arg++;
 969                } else if (!strcmp(argv[arg], "--recover")) {
 970                        get_recover = 1;
 971                }
 972                arg++;
 973        }
 974        if (argc < arg + 2) {
 975                usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
 976                return 1;
 977        }
 978        commit_id = argv[arg];
 979        url = argv[arg + 1];
 980
 981        http_init();
 982
 983        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
 984
 985        alt = xmalloc(sizeof(*alt));
 986        alt->base = url;
 987        alt->got_indices = 0;
 988        alt->packs = NULL;
 989        alt->next = NULL;
 990
 991        if (pull(commit_id))
 992                rc = 1;
 993
 994        curl_slist_free_all(no_pragma_header);
 995
 996        http_cleanup();
 997
 998        if (corrupt_object_found) {
 999                fprintf(stderr,
1000"Some loose object were found to be corrupt, but they might be just\n"
1001"a false '404 Not Found' error message sent with incorrect HTTP\n"
1002"status code.  Suggest running git fsck-objects.\n");
1003        }
1004        return rc;
1005}